1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
10 use Text::Tradition::Collation::Data;
11 use Text::Tradition::Collation::Reading;
12 use Text::Tradition::Collation::RelationshipStore;
13 use Text::Tradition::Error;
14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
16 use XML::LibXML::XPathContext;
20 isa => 'Text::Tradition::Collation::Data',
58 isa => 'Text::Tradition',
59 writer => '_set_tradition',
65 Text::Tradition::Collation - a software model for a text collation
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
80 my $svg_variant_graph = $t->collation->as_svg();
84 Text::Tradition is a library for representation and analysis of collated
85 texts, particularly medieval ones. The Collation is the central feature of
86 a Tradition, where the text, its sequence of readings, and its relationships
87 between readings are actually kept.
93 The constructor. Takes a hash or hashref of the following arguments:
97 =item * tradition - The Text::Tradition object to which the collation
100 =item * linear - Whether the collation should be linear; that is, whether
101 transposed readings should be treated as two linked readings rather than one,
102 and therefore whether the collation graph is acyclic. Defaults to true.
104 =item * baselabel - The default label for the path taken by a base text
105 (if any). Defaults to 'base text'.
107 =item * wit_list_separator - The string to join a list of witnesses for
108 purposes of making labels in display graphs. Defaults to ', '.
110 =item * ac_label - The extra label to tack onto a witness sigil when
111 representing another layer of path for the given witness - that is, when
112 a text has more than one possible reading due to scribal corrections or
113 the like. Defaults to ' (a.c.)'.
115 =item * wordsep - The string used to separate words in the original text.
126 =head2 wit_list_separator
134 Simple accessors for collation attributes.
138 The meta-reading at the start of every witness path.
142 The meta-reading at the end of every witness path.
146 Returns all Reading objects in the graph.
148 =head2 reading( $id )
150 Returns the Reading object corresponding to the given ID.
152 =head2 add_reading( $reading_args )
154 Adds a new reading object to the collation.
155 See L<Text::Tradition::Collation::Reading> for the available arguments.
157 =head2 del_reading( $object_or_id )
159 Removes the given reading from the collation, implicitly removing its
160 paths and relationships.
162 =head2 has_reading( $id )
164 Predicate to see whether a given reading ID is in the graph.
166 =head2 reading_witnesses( $object_or_id )
168 Returns a list of sigils whose witnesses contain the reading.
172 Returns all reading paths within the document - that is, all edges in the
173 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
175 =head2 add_path( $source, $target, $sigil )
177 Links the given readings in the collation in sequence, under the given witness
178 sigil. The readings may be specified by object or ID.
180 =head2 del_path( $source, $target, $sigil )
182 Links the given readings in the collation in sequence, under the given witness
183 sigil. The readings may be specified by object or ID.
185 =head2 has_path( $source, $target );
187 Returns true if the two readings are linked in sequence in any witness.
188 The readings may be specified by object or ID.
192 Returns all Relationship objects in the collation.
194 =head2 add_relationship( $reading, $other_reading, $options )
196 Adds a new relationship of the type given in $options between the two readings,
197 which may be specified by object or ID. Returns a value of ( $status, @vectors)
198 where $status is true on success, and @vectors is a list of relationship edges
199 that were ultimately added.
200 See L<Text::Tradition::Collation::Relationship> for the available options.
205 my ( $class, @args ) = @_;
206 my %args = @args == 1 ? %{ $args[0] } : @args;
207 # TODO determine these from the Moose::Meta object
208 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
209 linear wordsep start end cached_table _graphcalc_done);
211 for my $attr (@delegate_attrs) {
212 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
214 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
220 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
221 $self->_set_start( $self->add_reading(
222 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
223 $self->_set_end( $self->add_reading(
224 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
227 sub register_relationship_type {
229 my %args = @_ == 1 ? %{$_[0]} : @_;
230 if( $self->relations->has_type( $args{name} ) ) {
231 throw( 'Relationship type ' . $args{name} . ' already registered' );
233 $self->relations->add_type( %args );
236 sub get_relationship_type {
237 my( $self, $name ) = @_;
238 return $self->relations->has_type( $name )
239 ? $self->relations->type( $name ) : undef;
242 ### Reading construct/destruct functions
245 my( $self, $reading ) = @_;
246 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
247 my %args = %$reading;
248 if( $args{'init'} ) {
249 # If we are initializing an empty collation, don't assume that we
250 # have set a tradition.
251 delete $args{'init'};
252 } elsif( $self->tradition->can('language') && $self->tradition->has_language
253 && !exists $args{'language'} ) {
254 $args{'language'} = $self->tradition->language;
256 $reading = Text::Tradition::Collation::Reading->new(
257 'collation' => $self,
260 # First check to see if a reading with this ID exists.
261 if( $self->reading( $reading->id ) ) {
262 throw( "Collation already has a reading with id " . $reading->id );
264 $self->_graphcalc_done(0);
265 $self->_add_reading( $reading->id => $reading );
266 # Once the reading has been added, put it in both graphs.
267 $self->sequence->add_vertex( $reading->id );
268 $self->relations->add_reading( $reading->id );
272 around del_reading => sub {
277 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
280 # Remove the reading from the graphs.
281 $self->_graphcalc_done(0);
282 $self->_clear_cache; # Explicitly clear caches to GC the reading
283 $self->sequence->delete_vertex( $arg );
284 $self->relations->delete_reading( $arg );
287 $self->$orig( $arg );
290 =head2 merge_readings( $main, $second, $concatenate, $with_str )
292 Merges the $second reading into the $main one. If $concatenate is true, then
293 the merged node will carry the text of both readings, concatenated with either
294 $with_str (if specified) or a sensible default (the empty string if the
295 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
297 The first two arguments may be either readings or reading IDs.
303 my $cxfile = 't/data/Collatex-16.xml';
304 my $t = Text::Tradition->new(
306 'input' => 'CollateX',
309 my $c = $t->collation;
311 my $rno = scalar $c->readings;
312 # Split n21 ('unto') for testing purposes
313 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
314 my $old_r = $c->reading( 'n21' );
315 $old_r->alter_text( 'to' );
316 $c->del_path( 'n20', 'n21', 'A' );
317 $c->add_path( 'n20', 'n21p0', 'A' );
318 $c->add_path( 'n21p0', 'n21', 'A' );
319 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
321 ok( $c->reading( 'n21p0' ), "New reading exists" );
322 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
324 # Combine n3 and n4 ( with his )
325 $c->merge_readings( 'n3', 'n4', 1 );
326 ok( !$c->reading('n4'), "Reading n4 is gone" );
327 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
329 # Collapse n9 and n10 ( rood / root )
330 $c->merge_readings( 'n9', 'n10' );
331 ok( !$c->reading('n10'), "Reading n10 is gone" );
332 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
334 # Combine n21 and n21p0
335 my $remaining = $c->reading('n21');
336 $remaining ||= $c->reading('n22'); # one of these should still exist
337 $c->merge_readings( 'n21p0', $remaining, 1 );
338 ok( !$c->reading('n21'), "Reading $remaining is gone" );
339 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
349 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
350 my $mergemeta = $kept_obj->is_meta;
351 throw( "Cannot merge meta and non-meta reading" )
352 unless ( $mergemeta && $del_obj->is_meta )
353 || ( !$mergemeta && !$del_obj->is_meta );
355 throw( "Cannot merge with start or end node" )
356 if( $kept_obj eq $self->start || $kept_obj eq $self->end
357 || $del_obj eq $self->start || $del_obj eq $self->end );
358 throw( "Cannot combine text of meta readings" ) if $combine;
360 # We only need the IDs for adding paths to the graph, not the reading
361 # objects themselves.
362 my $kept = $kept_obj->id;
363 my $deleted = $del_obj->id;
364 $self->_graphcalc_done(0);
366 # The kept reading should inherit the paths and the relationships
367 # of the deleted reading.
368 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
369 my @vector = ( $kept );
370 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
371 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
372 next if $vector[0] eq $vector[1]; # Don't add a self loop
373 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
374 $self->sequence->add_edge( @vector );
375 my $fwits = $self->sequence->get_edge_attributes( @vector );
376 @wits{keys %$fwits} = values %$fwits;
377 $self->sequence->set_edge_attributes( @vector, \%wits );
379 $self->relations->merge_readings( $kept, $deleted, $combine );
381 # Do the deletion deed.
383 # Combine the text of the readings
384 my $joinstr = $combine_char;
385 unless( defined $joinstr ) {
386 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
387 $joinstr = $self->wordsep unless defined $joinstr;
389 $kept_obj->_combine( $del_obj, $joinstr );
391 $self->del_reading( $deleted );
394 =head2 merge_related( @relationship_types )
396 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
398 WARNING: This operation cannot be undone.
407 my $t = Text::Tradition->new(
410 'file' => 't/data/legendfrag.xml',
412 my $c = $t->collation;
415 map { $rdg_ids{$_} = 1 } $c->readings;
416 $c->merge_related( 'orthographic' );
417 is( scalar( $c->readings ), keys( %rdg_ids ) - 8,
418 "Successfully collapsed orthographic variation" );
419 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.1 /;
420 foreach my $rid ( keys %rdg_ids ) {
421 my $exp = $rdg_ids{$rid};
422 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
423 ( $exp ? "retained" : "removed" ) );
425 ok( $c->linear, "Graph is still linear" );
427 $c->calculate_ranks; # This should succeed
428 ok( 1, "Can still calculate ranks on the new graph" );
430 ok( 0, "Rank calculation on merged graph failed: $@" );
433 # Now add some transpositions
434 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
435 $c->merge_related( 'transposition' );
436 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
437 "Transposed relationship is merged away" );
438 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
439 ok( !$c->linear, "Graph is no longer linear" );
441 $c->calculate_ranks; # This should fail
442 ok( 0, "Rank calculation happened on nonlinear graph?!" );
443 } catch ( Text::Tradition::Error $e ) {
444 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
445 "Rank calculation on merged graph threw an error" );
454 # TODO: there should be a way to display merged without affecting the underlying data!
459 map { $reltypehash{$_} = 1 } @_;
461 # Set up the filter for finding related readings
463 exists $reltypehash{$_[0]->type};
467 # Go through all readings looking for related ones
468 foreach my $r ( $self->readings ) {
469 next unless $self->reading( "$r" ); # might have been deleted meanwhile
470 my @related = $self->related_readings( $r, $filter );
472 push( @related, $r );
474 scalar $b->witnesses <=> scalar $a->witnesses
476 my $keep = shift @related;
477 foreach my $delr ( @related ) {
479 unless( $self->get_relationship( $keep, $delr )->colocated );
480 $self->merge_readings( $keep, $delr );
484 $self->linear( $linear );
487 =head2 compress_readings
489 Where possible in the graph, compresses plain sequences of readings into a
490 single reading. The sequences must consist of readings with no
491 relationships to other readings, with only a single witness path between
492 them and no other witness paths from either that would skip the other. The
493 readings must also not be marked as nonsense or bad grammar.
495 WARNING: This operation cannot be undone.
499 sub compress_readings {
501 # Anywhere in the graph that there is a reading that joins only to a single
502 # successor, and neither of these have any relationships, just join the two
504 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
505 # Now look for readings that can be joined to their successors.
506 next unless $rdg->is_combinable;
508 while( $self->sequence->successors( $rdg ) == 1 ) {
509 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
510 throw( "Infinite loop" ) if $seen{$next->id};
511 $seen{$next->id} = 1;
512 last if $self->sequence->predecessors( $next ) > 1;
513 last unless $next->is_combinable;
514 say "Joining readings $rdg and $next";
515 $self->merge_readings( $rdg, $next, 1 );
518 # Make sure we haven't screwed anything up
519 foreach my $wit ( $self->tradition->witnesses ) {
520 my $pathtext = $self->path_text( $wit->sigil );
521 my $origtext = join( ' ', @{$wit->text} );
522 throw( "Text differs for witness " . $wit->sigil )
523 unless $pathtext eq $origtext;
524 if( $wit->is_layered ) {
525 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
526 $origtext = join( ' ', @{$wit->layertext} );
527 throw( "Ante-corr text differs for witness " . $wit->sigil )
528 unless $pathtext eq $origtext;
532 $self->relations->rebuild_equivalence();
533 $self->calculate_ranks();
536 # Helper function for manipulating the graph.
537 sub _stringify_args {
538 my( $self, $first, $second, @args ) = @_;
540 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
541 $second = $second->id
542 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
543 return( $first, $second, @args );
546 # Helper function for manipulating the graph.
547 sub _objectify_args {
548 my( $self, $first, $second, $arg ) = @_;
549 $first = $self->reading( $first )
550 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
551 $second = $self->reading( $second )
552 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
553 return( $first, $second, $arg );
556 =head2 duplicate_reading( $reading, @witlist )
558 Split the given reading into two, so that the new reading is in the path for
559 the witnesses given in @witlist. If the result is that certain non-colocated
560 relationships (e.g. transpositions) are no longer valid, these will be removed.
561 Returns the newly-created reading.
565 use Test::More::UTF8;
569 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
570 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
571 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
573 my $sc = $st->collation;
575 ok( $sc->reading('n131'), "Tradition has the affected reading" );
576 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
577 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
579 # Detach the erroneously collated reading
580 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
581 ok( $newr, "New reading was created" );
582 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
583 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
584 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
585 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
586 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
588 # Check that the bad transposition is gone
589 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
590 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
592 # The collation should not be fixed
593 my @pairs = $sc->identical_readings();
594 is( scalar @pairs, 0, "Not re-collated yet" );
596 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
597 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
598 is( scalar @pairs, 3, "Found three more identical readings" );
599 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
600 $sc->flatten_ranks();
601 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
603 # Check that we can't "duplicate" a reading with no wits or with all wits
605 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
606 ok( 0, "Reading duplication without witnesses throws an error" );
607 } catch( Text::Tradition::Error $e ) {
608 like( $e->message, qr/Must specify one or more witnesses/,
609 "Reading duplication without witnesses throws the expected error" );
611 ok( 0, "Reading duplication without witnesses threw the wrong error" );
615 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
616 ok( 0, "Reading duplication with all witnesses throws an error" );
617 } catch( Text::Tradition::Error $e ) {
618 like( $e->message, qr/Cannot join all witnesses/,
619 "Reading duplication with all witnesses throws the expected error" );
621 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
628 sub duplicate_reading {
629 my( $self, $r, @wits ) = @_;
630 # Check that we are not doing anything unwise.
631 throw( "Must specify one or more witnesses for the duplicated reading" )
633 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
634 $r = $self->reading( $r );
636 throw( "Cannot duplicate a meta-reading" )
638 throw( "Cannot join all witnesses to the new reading" )
639 if scalar( @wits ) == scalar( $r->witnesses );
641 # Get all the reading attributes and duplicate them.
642 my $rmeta = Text::Tradition::Collation::Reading->meta;
644 foreach my $attr( $rmeta->get_all_attributes ) {
645 next if $attr->name =~ /^_/;
646 my $acc = $attr->get_read_method;
647 if( !$acc && $attr->has_applied_traits ) {
648 my $tr = $attr->applied_traits;
649 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
651 my %methods = reverse %{$attr->handles};
652 $acc = $methods{elements};
653 $args{$attr->name} = $which eq 'Array'
654 ? [ $r->$acc ] : { $r->$acc };
657 $args{$attr->name} = $r->$acc if $acc;
660 # By definition the new reading will no longer be common.
661 $args{is_common} = 0;
662 # The new reading also needs its own ID.
663 $args{id} = $self->_generate_dup_id( $r->id );
665 # Try to make the new reading.
666 my $newr = $self->add_reading( \%args );
667 # The old reading is also no longer common.
670 # For each of the witnesses, dissociate from the old reading and
671 # associate with the new.
672 foreach my $wit ( @wits ) {
673 my $prior = $self->prior_reading( $r, $wit );
674 my $next = $self->next_reading( $r, $wit );
675 $self->del_path( $prior, $r, $wit );
676 $self->add_path( $prior, $newr, $wit );
677 $self->del_path( $r, $next, $wit );
678 $self->add_path( $newr, $next, $wit );
681 # If the graph is ranked, we need to look for relationships that are now
682 # invalid (i.e. 'non-colocation' types that might now be colocated) and
683 # remove them. If not, we can skip it.
686 my @deleted_relations;
687 if( $self->end->has_rank ) {
688 # Find the point where we can stop checking
689 $succ = $self->common_successor( $r, $newr );
691 # Hash the existing ranks
692 foreach my $rdg ( $self->readings ) {
693 $rrk{$rdg->id} = $rdg->rank;
695 # Calculate the new ranks
696 $self->calculate_ranks();
698 # Check for invalid non-colocated relationships among changed-rank readings
699 # from where the ranks start changing up to $succ
700 my $lastrank = $succ->rank;
701 foreach my $rdg ( $self->readings ) {
702 next if $rdg->rank > $lastrank;
703 next if $rdg->rank == $rrk{$rdg->id};
704 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
705 next unless @noncolo;
706 foreach my $nc ( @noncolo ) {
707 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
708 push( @deleted_relations, [ $rdg->id, $nc->id ] );
713 return ( $newr, @deleted_relations );
716 sub _generate_dup_id {
717 my( $self, $rid ) = @_;
722 if( $self->has_reading( $newid ) ) {
735 # We only need the IDs for adding paths to the graph, not the reading
736 # objects themselves.
737 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
739 $self->_graphcalc_done(0);
740 # Connect the readings
741 unless( $self->sequence->has_edge( $source, $target ) ) {
742 $self->sequence->add_edge( $source, $target );
743 $self->relations->add_equivalence_edge( $source, $target );
745 # Note the witness in question
746 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
752 if( ref( $_[0] ) eq 'ARRAY' ) {
759 # We only need the IDs for removing paths from the graph, not the reading
760 # objects themselves.
761 my( $source, $target, $wit ) = $self->_stringify_args( @args );
763 $self->_graphcalc_done(0);
764 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
765 $self->sequence->delete_edge_attribute( $source, $target, $wit );
767 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
768 $self->sequence->delete_edge( $source, $target );
769 $self->relations->delete_equivalence_edge( $source, $target );
774 # Extra graph-alike utility
777 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
778 return undef unless $self->sequence->has_edge( $source, $target );
779 return $self->sequence->has_edge_attribute( $source, $target, $wit );
782 =head2 clear_witness( @sigil_list )
784 Clear the given witnesses out of the collation entirely, removing references
785 to them in paths, and removing readings that belong only to them. Should only
786 be called via $tradition->del_witness.
791 my( $self, @sigils ) = @_;
793 $self->_graphcalc_done(0);
794 # Clear the witness(es) out of the paths
795 foreach my $e ( $self->paths ) {
796 foreach my $sig ( @sigils ) {
797 $self->del_path( $e, $sig );
801 # Clear out the newly unused readings
802 foreach my $r ( $self->readings ) {
803 unless( $self->reading_witnesses( $r ) ) {
804 $self->del_reading( $r );
809 sub add_relationship {
811 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
812 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
813 foreach my $v ( @vectors ) {
814 next unless $self->get_relationship( $v )->colocated;
815 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
816 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
817 $self->_graphcalc_done(0);
825 around qw/ get_relationship del_relationship / => sub {
829 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
832 my @stringargs = $self->_stringify_args( @args );
833 $self->$orig( @stringargs );
836 =head2 reading_witnesses( $reading )
838 Return a list of sigils corresponding to the witnesses in which the reading appears.
842 sub reading_witnesses {
843 my( $self, $reading ) = @_;
844 # We need only check either the incoming or the outgoing edges; I have
845 # arbitrarily chosen "incoming". Thus, special-case the start node.
846 if( $reading eq $self->start ) {
847 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
850 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
851 my $wits = $self->sequence->get_edge_attributes( @$e );
852 @all_witnesses{ keys %$wits } = 1;
854 my $acstr = $self->ac_label;
855 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
856 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
858 return keys %all_witnesses;
861 =head1 OUTPUT METHODS
863 =head2 as_svg( \%options )
865 Returns an SVG string that represents the graph, via as_dot and graphviz.
866 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
871 my( $self, $opts ) = @_;
872 throw( "Need GraphViz installed to output SVG" )
873 unless File::Which::which( 'dot' );
874 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
875 $self->calculate_ranks()
876 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
877 my @cmd = qw/dot -Tsvg/;
879 my $dotfile = File::Temp->new();
881 # $dotfile->unlink_on_destroy(0);
882 binmode $dotfile, ':utf8';
883 print $dotfile $self->as_dot( $opts );
884 push( @cmd, $dotfile->filename );
885 run( \@cmd, ">", binary(), \$svg );
886 $svg = decode_utf8( $svg );
891 =head2 as_dot( \%options )
893 Returns a string that is the collation graph expressed in dot
894 (i.e. GraphViz) format. Options include:
909 my( $self, $opts ) = @_;
910 my $startrank = $opts->{'from'} if $opts;
911 my $endrank = $opts->{'to'} if $opts;
912 my $color_common = $opts->{'color_common'} if $opts;
913 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
914 && $self->end->rank > 100;
915 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
917 # Check the arguments
919 return if $endrank && $startrank > $endrank;
920 return if $startrank > $self->end->rank;
922 if( defined $endrank ) {
923 return if $endrank < 0;
924 $endrank = undef if $endrank == $self->end->rank;
927 my $graph_name = $self->tradition->name;
928 $graph_name =~ s/[^\w\s]//g;
929 $graph_name = join( '_', split( /\s+/, $graph_name ) );
937 'fillcolor' => 'white',
942 'arrowhead' => 'open',
943 'color' => '#000000',
944 'fontcolor' => '#000000',
947 my $dot = sprintf( "digraph %s {\n", $graph_name );
948 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
949 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
951 # Output substitute start/end readings if necessary
953 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
956 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
958 if( $STRAIGHTENHACK ) {
960 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
961 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
962 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
964 my %used; # Keep track of the readings that actually appear in the graph
965 # Sort the readings by rank if we have ranks; this speeds layout.
966 my @all_readings = $self->end->has_rank
967 ? sort { $a->rank <=> $b->rank } $self->readings
969 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
970 foreach my $reading ( @all_readings ) {
971 # Only output readings within our rank range.
972 next if $startrank && $reading->rank < $startrank;
973 next if $endrank && $reading->rank > $endrank;
974 $used{$reading->id} = 1;
975 # Need not output nodes without separate labels
976 next if $reading->id eq $reading->text;
978 my $label = $reading->text;
979 $label .= '-' if $reading->join_next;
980 $label = "-$label" if $reading->join_prior;
981 $label =~ s/\"/\\\"/g;
982 $rattrs->{'label'} = $label;
983 $rattrs->{'id'} = $reading->id;
984 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
985 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
988 # Add the real edges. Need to weight one edge per rank jump, in a
990 # my $weighted = $self->_add_edge_weights;
991 my @edges = $self->paths;
992 my( %substart, %subend );
993 foreach my $edge ( @edges ) {
994 # Do we need to output this edge?
995 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
996 my $label = $self->_path_display_label( $opts,
997 $self->path_witnesses( $edge ) );
998 my $variables = { %edge_attrs, 'label' => $label };
1000 # Account for the rank gap if necessary
1001 my $rank0 = $self->reading( $edge->[0] )->rank
1002 if $self->reading( $edge->[0] )->has_rank;
1003 my $rank1 = $self->reading( $edge->[1] )->rank
1004 if $self->reading( $edge->[1] )->has_rank;
1005 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1006 $variables->{'minlen'} = $rank1 - $rank0;
1009 # Add the calculated edge weights
1010 # if( exists $weighted->{$edge->[0]}
1011 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
1012 # # $variables->{'color'} = 'red';
1013 # $variables->{'weight'} = 3.0;
1016 # EXPERIMENTAL: make edge width reflect no. of witnesses
1017 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1018 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1020 my $varopts = _dot_attr_string( $variables );
1021 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1022 $edge->[0], $edge->[1], $varopts );
1023 } elsif( $used{$edge->[0]} ) {
1024 $subend{$edge->[0]} = $edge->[1];
1025 } elsif( $used{$edge->[1]} ) {
1026 $substart{$edge->[1]} = $edge->[0];
1030 # If we are asked to, add relationship links
1031 if( exists $opts->{show_relations} ) {
1032 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1033 if( $filter eq 'transposition' ) {
1034 $filter =~ qr/^transposition$/;
1036 foreach my $redge ( $self->relationships ) {
1037 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1038 if( $filter ne 'all' ) {
1039 my $rel = $self->get_relationship( $redge );
1040 next unless $rel->type =~ /$filter/;
1042 arrowhead => 'none',
1044 constraint => 'false',
1045 label => uc( substr( $rel->type, 0, 4 ) ),
1048 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1049 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1055 # Add substitute start and end edges if necessary
1056 foreach my $node ( keys %substart ) {
1057 my $witstr = $self->_path_display_label( $opts,
1058 $self->path_witnesses( $substart{$node}, $node ) );
1059 my $variables = { %edge_attrs, 'label' => $witstr };
1060 my $nrdg = $self->reading( $node );
1061 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1062 # Substart is actually one lower than $startrank
1063 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1065 my $varopts = _dot_attr_string( $variables );
1066 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1068 foreach my $node ( keys %subend ) {
1069 my $witstr = $self->_path_display_label( $opts,
1070 $self->path_witnesses( $node, $subend{$node} ) );
1071 my $variables = { %edge_attrs, 'label' => $witstr };
1072 my $varopts = _dot_attr_string( $variables );
1073 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1076 if( $STRAIGHTENHACK ) {
1077 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1078 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1085 sub _dot_attr_string {
1088 foreach my $k ( sort keys %$hash ) {
1089 my $v = $hash->{$k};
1090 push( @attrs, $k.'="'.$v.'"' );
1092 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1095 sub _add_edge_weights {
1097 # Walk the graph from START to END, choosing the successor node with
1098 # the largest number of witness paths each time.
1100 my $curr = $self->start->id;
1101 my $ranked = $self->end->has_rank;
1102 while( $curr ne $self->end->id ) {
1103 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
1104 my @succ = sort { $self->path_witnesses( $curr, $a )
1105 <=> $self->path_witnesses( $curr, $b ) }
1106 $self->sequence->successors( $curr );
1107 my $next = pop @succ;
1108 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
1109 # Try to avoid lacunae in the weighted path.
1111 ( $self->reading( $next )->is_lacuna ||
1112 $nextrank - $rank > 1 ) ){
1115 $weighted->{$curr} = $next;
1121 =head2 path_witnesses( $edge )
1123 Returns the list of sigils whose witnesses are associated with the given edge.
1124 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1128 sub path_witnesses {
1129 my( $self, @edge ) = @_;
1130 # If edge is an arrayref, cope.
1131 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1132 my $e = shift @edge;
1135 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1139 # Helper function. Make a display label for the given witnesses, showing a.c.
1140 # witnesses only where the main witness is not also in the list.
1141 sub _path_display_label {
1145 map { $wits{$_} = 1 } @_;
1147 # If an a.c. wit is listed, remove it if the main wit is also listed.
1148 # Otherwise keep it for explicit listing.
1149 my $aclabel = $self->ac_label;
1151 foreach my $w ( sort keys %wits ) {
1152 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1153 if( exists $wits{$1} ) {
1156 push( @disp_ac, $w );
1161 if( $opts->{'explicit_wits'} ) {
1162 return join( ', ', sort keys %wits );
1164 # See if we are in a majority situation.
1165 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1166 $maj = $maj > 5 ? $maj : 5;
1167 if( scalar keys %wits > $maj ) {
1168 unshift( @disp_ac, 'majority' );
1169 return join( ', ', @disp_ac );
1171 return join( ', ', sort keys %wits );
1176 =head2 readings_at_rank( $rank )
1178 Returns a list of readings at a given rank, taken from the alignment table.
1182 sub readings_at_rank {
1183 my( $self, $rank ) = @_;
1184 my $table = $self->alignment_table;
1185 # Table rank is real rank - 1.
1186 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1188 foreach my $e ( @elements ) {
1189 next unless ref( $e ) eq 'HASH';
1190 next unless exists $e->{'t'};
1191 $readings{$e->{'t'}->id} = $e->{'t'};
1193 return values %readings;
1198 Returns a GraphML representation of the collation. The GraphML will contain
1199 two graphs. The first expresses the attributes of the readings and the witness
1200 paths that link them; the second expresses the relationships that link the
1201 readings. This is the native transfer format for a tradition.
1205 use Text::Tradition;
1211 my $datafile = 't/data/florilegium_tei_ps.xml';
1212 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1214 'file' => $datafile,
1217 ok( $tradition, "Got a tradition object" );
1218 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1219 ok( $tradition->collation, "Tradition has a collation" );
1221 my $c = $tradition->collation;
1222 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1223 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1224 is( scalar $c->relationships, 0, "Collation has all relationships" );
1226 # Add a few relationships
1227 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1228 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1229 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
1231 # Now write it to GraphML and parse it again.
1233 my $graphml = $c->as_graphml;
1234 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1235 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1236 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1237 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1239 # Now add a stemma, write to GraphML, and look at the output.
1241 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1242 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1243 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1244 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1245 $graphml = $c->as_graphml;
1246 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1253 ## TODO MOVE this to Tradition.pm and modularize it better
1255 my( $self, $options ) = @_;
1256 $self->calculate_ranks unless $self->_graphcalc_done;
1258 my $start = $options->{'from'}
1259 ? $self->reading( $options->{'from'} ) : $self->start;
1260 my $end = $options->{'to'}
1261 ? $self->reading( $options->{'to'} ) : $self->end;
1262 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1263 throw( 'Start node must be before end node' );
1265 # The readings need to be ranked for this to work.
1266 $start = $self->start unless $start->has_rank;
1267 $end = $self->end unless $end->has_rank;
1269 unless( $start eq $self->start ) {
1270 $rankoffset = $start->rank - 1;
1275 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1276 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1277 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1278 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1280 # Create the document and root node
1281 require XML::LibXML;
1282 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1283 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1284 $graphml->setDocumentElement( $root );
1285 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1286 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1288 # List of attribute types to save on our objects and their corresponding
1293 'Bool' => 'boolean',
1294 'ReadingID' => 'string',
1295 'RelationshipType' => 'string',
1296 'RelationshipScope' => 'string',
1299 # Add the data keys for the graph. Include an extra key 'version' for the
1300 # GraphML output version.
1301 my %graph_data_keys;
1303 my %graph_attributes = ( 'version' => 'string' );
1304 # Graph attributes include those of Tradition and those of Collation.
1306 # TODO Use meta introspection method from duplicate_reading to do this
1307 # instead of naming custom keys.
1308 my $tmeta = $self->tradition->meta;
1309 my $cmeta = $self->meta;
1310 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1311 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1312 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1313 next if $attr->name =~ /^_/;
1314 next unless $save_types{$attr->type_constraint->name};
1315 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1317 # Extra custom keys for complex objects that should be saved in some form.
1318 # The subroutine should return a string, or undef/empty.
1319 if( $tmeta->has_method('stemmata') ) {
1320 $graph_attributes{'stemmata'} = sub {
1322 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1323 $self->tradition->stemmata;
1324 join( "\n", @stemstrs );
1328 if( $tmeta->has_method('user') ) {
1329 $graph_attributes{'user'} = sub {
1330 $self->tradition->user ? $self->tradition->user->id : undef
1334 foreach my $datum ( sort keys %graph_attributes ) {
1335 $graph_data_keys{$datum} = 'dg'.$gdi++;
1336 my $key = $root->addNewChild( $graphml_ns, 'key' );
1337 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1338 : $graph_attributes{$datum};
1339 $key->setAttribute( 'attr.name', $datum );
1340 $key->setAttribute( 'attr.type', $dtype );
1341 $key->setAttribute( 'for', 'graph' );
1342 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1345 # Add the data keys for reading nodes
1346 my %reading_attributes;
1347 my $rmeta = Text::Tradition::Collation::Reading->meta;
1348 foreach my $attr( $rmeta->get_all_attributes ) {
1349 next if $attr->name =~ /^_/;
1350 next unless $save_types{$attr->type_constraint->name};
1351 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1353 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1354 # Extra custom key for the reading morphology
1355 $reading_attributes{'lexemes'} = 'string';
1360 foreach my $datum ( sort keys %reading_attributes ) {
1361 $node_data_keys{$datum} = 'dn'.$ndi++;
1362 my $key = $root->addNewChild( $graphml_ns, 'key' );
1363 $key->setAttribute( 'attr.name', $datum );
1364 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1365 $key->setAttribute( 'for', 'node' );
1366 $key->setAttribute( 'id', $node_data_keys{$datum} );
1369 # Add the data keys for edges, that is, paths and relationships. Path
1370 # data does not come from a Moose class so is here manually.
1373 my %edge_attributes = (
1374 witness => 'string', # ID/label for a path
1375 extra => 'boolean', # Path key
1377 my @path_attributes = keys %edge_attributes; # track our manual additions
1378 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1379 foreach my $attr( $pmeta->get_all_attributes ) {
1380 next if $attr->name =~ /^_/;
1381 next unless $save_types{$attr->type_constraint->name};
1382 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1384 foreach my $datum ( sort keys %edge_attributes ) {
1385 $edge_data_keys{$datum} = 'de'.$edi++;
1386 my $key = $root->addNewChild( $graphml_ns, 'key' );
1387 $key->setAttribute( 'attr.name', $datum );
1388 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1389 $key->setAttribute( 'for', 'edge' );
1390 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1393 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1394 my $xmlidname = $self->tradition->name;
1395 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1396 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1397 $xmlidname = '_'.$xmlidname;
1399 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1400 $sgraph->setAttribute( 'edgedefault', 'directed' );
1401 $sgraph->setAttribute( 'id', $xmlidname );
1402 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1403 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1404 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1405 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1406 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1408 # Tradition/collation attribute data
1409 foreach my $datum ( keys %graph_attributes ) {
1411 if( $datum eq 'version' ) {
1413 } elsif( ref( $graph_attributes{$datum} ) ) {
1414 my $sub = $graph_attributes{$datum};
1416 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1417 $value = $self->tradition->$datum;
1419 $value = $self->$datum;
1421 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1426 # Add our readings to the graph
1427 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1428 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1429 ( $n->rank < $start->rank || $n->rank > $end->rank );
1430 $use_readings{$n->id} = 1;
1431 # Add to the main graph
1432 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1433 my $node_xmlid = 'n' . $node_ctr++;
1434 $node_hash{ $n->id } = $node_xmlid;
1435 $node_el->setAttribute( 'id', $node_xmlid );
1436 foreach my $d ( keys %reading_attributes ) {
1438 # Custom serialization
1439 if( $d eq 'lexemes' ) {
1440 # If nval is a true value, we have lexemes so we need to
1441 # serialize them. Otherwise set nval to undef so that the
1442 # key is excluded from this reading.
1443 $nval = $nval ? $n->_serialize_lexemes : undef;
1444 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1447 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1448 # Adjust the ranks within the subgraph.
1449 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1450 : $nval - $rankoffset;
1452 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1457 # Add the path edges to the sequence graph
1459 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1460 # We add an edge in the graphml for every witness in $e.
1461 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1462 my @edge_wits = sort $self->path_witnesses( $e );
1463 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1464 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1465 # Skip any path from start to end; that witness is not in the subgraph.
1466 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1467 foreach my $wit ( @edge_wits ) {
1468 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1469 $node_hash{ $e->[0] },
1470 $node_hash{ $e->[1] } );
1471 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1472 $edge_el->setAttribute( 'source', $from );
1473 $edge_el->setAttribute( 'target', $to );
1474 $edge_el->setAttribute( 'id', $id );
1476 # It's a witness path, so add the witness
1478 my $key = $edge_data_keys{'witness'};
1479 # Is this an ante-corr witness?
1480 my $aclabel = $self->ac_label;
1481 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1482 # Keep the base witness
1484 # ...and record that this is an 'extra' reading path
1485 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1487 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1491 # Report the actual number of nodes and edges that went in
1492 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1493 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1495 # Add the relationship graph to the XML
1496 map { delete $edge_data_keys{$_} } @path_attributes;
1497 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1498 $node_data_keys{'id'}, \%edge_data_keys );
1500 # Save and return the thing
1501 my $result = decode_utf8( $graphml->toString(1) );
1505 sub _add_graphml_data {
1506 my( $el, $key, $value ) = @_;
1507 return unless defined $value;
1508 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1509 $data_el->setAttribute( 'key', $key );
1510 $data_el->appendText( $value );
1515 Returns a CSV alignment table representation of the collation graph, one
1516 row per witness (or witness uncorrected.)
1520 Returns a tab-separated alignment table representation of the collation graph,
1521 one row per witness (or witness uncorrected.)
1525 use Text::Tradition;
1533 my $datafile = 't/data/florilegium_tei_ps.xml';
1534 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1536 'file' => $datafile,
1539 my $c = $tradition->collation;
1540 # Export the thing to CSV
1541 my $csvstr = $c->as_csv();
1543 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1544 my @lines = split(/\n/, $csvstr );
1545 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1546 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1547 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1548 ok( @q_ac, "Found a layered witness" );
1550 my $t2 = Text::Tradition->new( input => 'Tabular',
1554 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1555 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1557 # Now do it with TSV
1558 my $tsvstr = $c->as_tsv();
1559 my $t3 = Text::Tradition->new( input => 'Tabular',
1563 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1564 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1566 my $table = $c->alignment_table;
1567 my $noaccsv = $c->as_csv({ noac => 1 });
1568 my @noaclines = split(/\n/, $noaccsv );
1569 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1570 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1571 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1573 my $safecsv = $c->as_csv({ safe_ac => 1});
1574 my @safelines = split(/\n/, $safecsv );
1575 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1576 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1577 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1578 ok( @q_ac, "Found a sanitized layered witness" );
1579 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1586 my( $self, $opts ) = @_;
1587 my $table = $self->alignment_table( $opts );
1588 my $csv_options = { binary => 1, quote_null => 0 };
1589 $csv_options->{'sep_char'} = $opts->{fieldsep};
1590 if( $opts->{fieldsep} eq "\t" ) {
1591 # If it is really tab separated, nothing is an escape char.
1592 $csv_options->{'quote_char'} = undef;
1593 $csv_options->{'escape_char'} = '';
1595 my $csv = Text::CSV->new( $csv_options );
1597 # Make the header row
1598 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1599 push( @result, $csv->string );
1600 # Make the rest of the rows
1601 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1602 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1603 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1604 $csv->combine( @row );
1605 push( @result, $csv->string );
1607 return join( "\n", @result );
1612 my $opts = shift || {};
1613 $opts->{fieldsep} = ',';
1614 return $self->_tabular( $opts );
1619 my $opts = shift || {};
1620 $opts->{fieldsep} = "\t";
1621 return $self->_tabular( $opts );
1624 =head2 alignment_table
1626 Return a reference to an alignment table, in a slightly enhanced CollateX
1627 format which looks like this:
1629 $table = { alignment => [ { witness => "SIGIL",
1630 tokens => [ { t => "TEXT" }, ... ] },
1631 { witness => "SIG2",
1632 tokens => [ { t => "TEXT" }, ... ] },
1634 length => TEXTLEN };
1638 sub alignment_table {
1639 my( $self, $opts ) = @_;
1640 if( $self->has_cached_table ) {
1641 return $self->cached_table
1642 unless $opts->{noac} || $opts->{safe_ac};
1645 # Make sure we can do this
1646 throw( "Need a linear graph in order to make an alignment table" )
1647 unless $self->linear;
1648 $self->calculate_ranks()
1649 unless $self->_graphcalc_done && $self->end->has_rank;
1651 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1652 my @all_pos = ( 1 .. $self->end->rank - 1 );
1653 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1654 # say STDERR "Making witness row(s) for " . $wit->sigil;
1655 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1656 my @row = _make_witness_row( \@wit_path, \@all_pos );
1657 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1658 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1659 push( @{$table->{'alignment'}}, $witobj );
1660 if( $wit->is_layered && !$opts->{noac} ) {
1661 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1662 $wit->sigil.$self->ac_label );
1663 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1664 my $witlabel = $opts->{safe_ac}
1665 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1666 my $witacobj = { 'witness' => $witlabel,
1667 'tokens' => \@ac_row };
1668 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1669 push( @{$table->{'alignment'}}, $witacobj );
1672 unless( $opts->{noac} || $opts->{safe_ac} ) {
1673 $self->cached_table( $table );
1678 sub _make_witness_row {
1679 my( $path, $positions ) = @_;
1681 map { $char_hash{$_} = undef } @$positions;
1683 foreach my $rdg ( @$path ) {
1684 say STDERR "rank " . $rdg->rank if $debug;
1685 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1686 $char_hash{$rdg->rank} = { 't' => $rdg };
1688 my @row = map { $char_hash{$_} } @$positions;
1689 # Fill in lacuna markers for undef spots in the row
1690 my $last_el = shift @row;
1691 my @filled_row = ( $last_el );
1692 foreach my $el ( @row ) {
1693 # If we are using node reference, make the lacuna node appear many times
1694 # in the table. If not, use the lacuna tag.
1695 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1698 push( @filled_row, $el );
1705 =head1 NAVIGATION METHODS
1707 =head2 reading_sequence( $first, $last, $sigil, $backup )
1709 Returns the ordered list of readings, starting with $first and ending
1710 with $last, for the witness given in $sigil. If a $backup sigil is
1711 specified (e.g. when walking a layered witness), it will be used wherever
1712 no $sigil path exists. If there is a base text reading, that will be
1713 used wherever no path exists for $sigil or $backup.
1717 # TODO Think about returning some lazy-eval iterator.
1718 # TODO Get rid of backup; we should know from what witness is whether we need it.
1720 sub reading_sequence {
1721 my( $self, $start, $end, $witness ) = @_;
1723 $witness = $self->baselabel unless $witness;
1724 my @readings = ( $start );
1727 while( $n && $n->id ne $end->id ) {
1728 if( exists( $seen{$n->id} ) ) {
1729 throw( "Detected loop for $witness at " . $n->id );
1733 my $next = $self->next_reading( $n, $witness );
1735 throw( "Did not find any path for $witness from reading " . $n->id );
1737 push( @readings, $next );
1740 # Check that the last reading is our end reading.
1741 my $last = $readings[$#readings];
1742 throw( "Last reading found from " . $start->text .
1743 " for witness $witness is not the end!" ) # TODO do we get this far?
1744 unless $last->id eq $end->id;
1749 =head2 next_reading( $reading, $sigil );
1751 Returns the reading that follows the given reading along the given witness
1757 # Return the successor via the corresponding path.
1759 my $answer = $self->_find_linked_reading( 'next', @_ );
1760 return undef unless $answer;
1761 return $self->reading( $answer );
1764 =head2 prior_reading( $reading, $sigil )
1766 Returns the reading that precedes the given reading along the given witness
1772 # Return the predecessor via the corresponding path.
1774 my $answer = $self->_find_linked_reading( 'prior', @_ );
1775 return $self->reading( $answer );
1778 sub _find_linked_reading {
1779 my( $self, $direction, $node, $path ) = @_;
1781 # Get a backup if we are dealing with a layered witness
1783 my $aclabel = $self->ac_label;
1784 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1788 my @linked_paths = $direction eq 'next'
1789 ? $self->sequence->edges_from( $node )
1790 : $self->sequence->edges_to( $node );
1791 return undef unless scalar( @linked_paths );
1793 # We have to find the linked path that contains all of the
1794 # witnesses supplied in $path.
1795 my( @path_wits, @alt_path_wits );
1796 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1797 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1800 foreach my $le ( @linked_paths ) {
1801 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1804 my @le_wits = sort $self->path_witnesses( $le );
1805 if( _is_within( \@path_wits, \@le_wits ) ) {
1806 # This is the right path.
1807 return $direction eq 'next' ? $le->[1] : $le->[0];
1808 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1812 # Got this far? Return the alternate path if it exists.
1813 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1816 # Got this far? Return the base path if it exists.
1817 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1820 # Got this far? We have no appropriate path.
1821 warn "Could not find $direction node from " . $node->id
1822 . " along path $path";
1828 my( $set1, $set2 ) = @_;
1829 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1830 foreach my $el ( @$set1 ) {
1831 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1836 # Return the string that joins together a list of witnesses for
1837 # display on a single path.
1838 sub _witnesses_of_label {
1839 my( $self, $label ) = @_;
1840 my $regex = $self->wit_list_separator;
1841 my @answer = split( /\Q$regex\E/, $label );
1845 =head2 common_readings
1847 Returns the list of common readings in the graph (i.e. those readings that are
1848 shared by all non-lacunose witnesses.)
1852 sub common_readings {
1854 my @common = grep { $_->is_common } $self->readings;
1858 =head2 path_text( $sigil, [, $start, $end ] )
1860 Returns the text of a witness (plus its backup, if we are using a layer)
1861 as stored in the collation. The text is returned as a string, where the
1862 individual readings are joined with spaces and the meta-readings (e.g.
1863 lacunae) are omitted. Optional specification of $start and $end allows
1864 the generation of a subset of the witness text.
1869 my( $self, $wit, $start, $end ) = @_;
1870 $start = $self->start unless $start;
1871 $end = $self->end unless $end;
1872 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1875 foreach my $r ( @path ) {
1876 unless ( $r->join_prior || !$last || $last->join_next ) {
1879 $pathtext .= $r->text;
1885 =head1 INITIALIZATION METHODS
1887 These are mostly for use by parsers.
1889 =head2 make_witness_path( $witness )
1891 Link the array of readings contained in $witness->path (and in
1892 $witness->uncorrected_path if it exists) into collation paths.
1893 Clear out the arrays when finished.
1895 =head2 make_witness_paths
1897 Call make_witness_path for all witnesses in the tradition.
1901 # For use when a collation is constructed from a base text and an apparatus.
1902 # We have the sequences of readings and just need to add path edges.
1903 # When we are done, clear out the witness path attributes, as they are no
1905 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1907 sub make_witness_paths {
1909 foreach my $wit ( $self->tradition->witnesses ) {
1910 # say STDERR "Making path for " . $wit->sigil;
1911 $self->make_witness_path( $wit );
1915 sub make_witness_path {
1916 my( $self, $wit ) = @_;
1917 my @chain = @{$wit->path};
1918 my $sig = $wit->sigil;
1919 # Add start and end if necessary
1920 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1921 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1922 foreach my $idx ( 0 .. $#chain-1 ) {
1923 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1925 if( $wit->is_layered ) {
1926 @chain = @{$wit->uncorrected_path};
1927 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1928 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1929 foreach my $idx( 0 .. $#chain-1 ) {
1930 my $source = $chain[$idx];
1931 my $target = $chain[$idx+1];
1932 $self->add_path( $source, $target, $sig.$self->ac_label )
1933 unless $self->has_path( $source, $target, $sig );
1937 $wit->clear_uncorrected_path;
1940 =head2 calculate_ranks
1942 Calculate the reading ranks (that is, their aligned positions relative
1943 to each other) for the graph. This can only be called on linear collations.
1947 use Text::Tradition;
1949 my $cxfile = 't/data/Collatex-16.xml';
1950 my $t = Text::Tradition->new(
1952 'input' => 'CollateX',
1955 my $c = $t->collation;
1958 my $table = $c->alignment_table;
1959 ok( $c->has_cached_table, "Alignment table was cached" );
1960 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1961 $c->calculate_ranks;
1962 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1963 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
1964 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
1965 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
1966 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
1972 sub calculate_ranks {
1974 # Save the existing ranks, in case we need to invalidate the cached SVG.
1975 throw( "Cannot calculate ranks on a non-linear graph" )
1976 unless $self->linear;
1978 map { $existing_ranks{$_} = $_->rank } $self->readings;
1980 # Do the rankings based on the relationship equivalence graph, starting
1981 # with the start node.
1982 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1984 # Transfer our rankings from the topological graph to the real one.
1985 foreach my $r ( $self->readings ) {
1986 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1987 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1989 # Die. Find the last rank we calculated.
1990 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1991 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1993 my $last = pop @all_defined;
1994 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1997 # Do we need to invalidate the cached data?
1998 if( $self->has_cached_table ) {
1999 foreach my $r ( $self->readings ) {
2000 next if defined( $existing_ranks{$r} )
2001 && $existing_ranks{$r} == $r->rank;
2002 # Something has changed, so clear the cache
2003 $self->_clear_cache;
2004 # ...and recalculate the common readings.
2005 $self->calculate_common_readings();
2009 # The graph calculation information is now up to date.
2010 $self->_graphcalc_done(1);
2015 $self->wipe_table if $self->has_cached_table;
2019 =head2 flatten_ranks
2021 A convenience method for parsing collation data. Searches the graph for readings
2022 with the same text at the same rank, and merges any that are found.
2027 my ( $self, %args ) = shift;
2028 my %unique_rank_rdg;
2030 foreach my $p ( $self->identical_readings( %args ) ) {
2031 # say STDERR "Combining readings at same rank: @$p";
2033 $self->merge_readings( @$p );
2034 # TODO see if this now makes a common point.
2036 # If we merged readings, the ranks are still fine but the alignment
2037 # table is wrong. Wipe it.
2038 $self->wipe_table() if $changed;
2041 =head2 identical_readings
2042 =head2 identical_readings( start => $startnode, end => $endnode )
2043 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2045 Goes through the graph identifying all pairs of readings that appear to be
2046 identical, and therefore able to be merged into a single reading. Returns the
2047 relevant identical pairs. Can be restricted to run over only a part of the
2048 graph, specified either by node or by rank.
2052 sub identical_readings {
2053 my ( $self, %args ) = @_;
2054 # Find where we should start and end.
2055 my $startrank = $args{startrank} || 0;
2056 if( $args{start} ) {
2057 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2058 && $self->reading( $args{start} )->has_rank;
2059 $startrank = $self->reading( $args{start} )->rank;
2061 my $endrank = $args{endrank} || $self->end->rank;
2063 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2064 && $self->reading( $args{end} )->has_rank;
2065 $endrank = $self->reading( $args{end} )->rank;
2068 # Make sure the ranks are correct.
2069 unless( $self->_graphcalc_done ) {
2070 $self->calculate_ranks;
2072 # Go through the readings looking for duplicates.
2073 my %unique_rank_rdg;
2075 foreach my $rdg ( $self->readings ) {
2076 next unless $rdg->has_rank;
2077 my $rk = $rdg->rank;
2078 next if $rk > $endrank || $rk < $startrank;
2079 my $key = $rk . "||" . $rdg->text;
2080 if( exists $unique_rank_rdg{$key} ) {
2081 # Make sure they don't have different grammatical forms
2082 my $ur = $unique_rank_rdg{$key};
2083 if( $rdg->is_identical( $ur ) ) {
2084 push( @pairs, [ $ur, $rdg ] );
2087 $unique_rank_rdg{$key} = $rdg;
2095 =head2 calculate_common_readings
2097 Goes through the graph identifying the readings that appear in every witness
2098 (apart from those with lacunae at that spot.) Marks them as common and returns
2103 use Text::Tradition;
2105 my $cxfile = 't/data/Collatex-16.xml';
2106 my $t = Text::Tradition->new(
2108 'input' => 'CollateX',
2111 my $c = $t->collation;
2113 my @common = $c->calculate_common_readings();
2114 is( scalar @common, 8, "Found correct number of common readings" );
2115 my @marked = sort $c->common_readings();
2116 is( scalar @common, 8, "All common readings got marked as such" );
2117 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2118 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2124 sub calculate_common_readings {
2127 map { $_->is_common( 0 ) } $self->readings;
2128 # Implicitly calls calculate_ranks
2129 my $table = $self->alignment_table;
2130 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2131 my @row = map { $_->{'tokens'}->[$idx]
2132 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2133 @{$table->{'alignment'}};
2135 foreach my $r ( @row ) {
2137 $hash{$r->id} = $r unless $r->is_meta;
2139 $hash{'UNDEF'} = $r;
2142 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2143 my( $r ) = values %hash;
2145 push( @common, $r );
2151 =head2 text_from_paths
2153 Calculate the text array for all witnesses from the path, for later consistency
2154 checking. Only to be used if there is no non-graph-based way to know the
2159 sub text_from_paths {
2161 foreach my $wit ( $self->tradition->witnesses ) {
2162 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2164 foreach my $r ( @readings ) {
2165 next if $r->is_meta;
2166 push( @text, $r->text );
2168 $wit->text( \@text );
2169 if( $wit->is_layered ) {
2170 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2171 $wit->sigil.$self->ac_label );
2173 foreach my $r ( @ucrdgs ) {
2174 next if $r->is_meta;
2175 push( @uctext, $r->text );
2177 $wit->layertext( \@uctext );
2182 =head1 UTILITY FUNCTIONS
2184 =head2 common_predecessor( $reading_a, $reading_b )
2186 Find the last reading that occurs in sequence before both the given readings.
2187 At the very least this should be $self->start.
2189 =head2 common_successor( $reading_a, $reading_b )
2191 Find the first reading that occurs in sequence after both the given readings.
2192 At the very least this should be $self->end.
2196 use Text::Tradition;
2198 my $cxfile = 't/data/Collatex-16.xml';
2199 my $t = Text::Tradition->new(
2201 'input' => 'CollateX',
2204 my $c = $t->collation;
2206 is( $c->common_predecessor( 'n24', 'n23' )->id,
2207 'n20', "Found correct common predecessor" );
2208 is( $c->common_successor( 'n24', 'n23' )->id,
2209 '__END__', "Found correct common successor" );
2211 is( $c->common_predecessor( 'n19', 'n17' )->id,
2212 'n16', "Found correct common predecessor for readings on same path" );
2213 is( $c->common_successor( 'n21', 'n10' )->id,
2214 '__END__', "Found correct common successor for readings on same path" );
2220 ## Return the closest reading that is a predecessor of both the given readings.
2221 sub common_predecessor {
2223 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2224 return $self->_common_in_path( $r1, $r2, 'predecessors' );
2227 sub common_successor {
2229 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2230 return $self->_common_in_path( $r1, $r2, 'successors' );
2234 # TODO think about how to do this without ranks...
2235 sub _common_in_path {
2236 my( $self, $r1, $r2, $dir ) = @_;
2237 my $iter = $self->end->rank;
2239 my @last_r1 = ( $r1 );
2240 my @last_r2 = ( $r2 );
2241 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2243 # say STDERR "Finding common $dir for $r1, $r2";
2244 while( !@candidates ) {
2245 last unless $iter--; # Avoid looping infinitely
2246 # Iterate separately down the graph from r1 and r2
2247 my( @new_lc1, @new_lc2 );
2248 foreach my $lc ( @last_r1 ) {
2249 foreach my $p ( $lc->$dir ) {
2250 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2251 # say STDERR "Path candidate $p from $lc";
2252 push( @candidates, $p );
2253 } elsif( !$all_seen{$p->id} ) {
2254 $all_seen{$p->id} = 'r1';
2255 push( @new_lc1, $p );
2259 foreach my $lc ( @last_r2 ) {
2260 foreach my $p ( $lc->$dir ) {
2261 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2262 # say STDERR "Path candidate $p from $lc";
2263 push( @candidates, $p );
2264 } elsif( !$all_seen{$p->id} ) {
2265 $all_seen{$p->id} = 'r2';
2266 push( @new_lc2, $p );
2270 @last_r1 = @new_lc1;
2271 @last_r2 = @new_lc2;
2273 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2274 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2278 Text::Tradition::Error->throw(
2279 'ident' => 'Collation error',
2285 __PACKAGE__->meta->make_immutable;
2291 =item * Rework XML serialization in a more modular way
2297 This package is free software and is provided "as is" without express
2298 or implied warranty. You can redistribute it and/or modify it under
2299 the same terms as Perl itself.
2303 Tara L Andrews E<lt>aurum@cpan.orgE<gt>