1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
10 use Text::Tradition::Collation::Data;
11 use Text::Tradition::Collation::Reading;
12 use Text::Tradition::Collation::RelationshipStore;
13 use Text::Tradition::Error;
14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
16 use XML::LibXML::XPathContext;
20 isa => 'Text::Tradition::Collation::Data',
58 isa => 'Text::Tradition',
59 writer => '_set_tradition',
65 Text::Tradition::Collation - a software model for a text collation
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
80 my $svg_variant_graph = $t->collation->as_svg();
84 Text::Tradition is a library for representation and analysis of collated
85 texts, particularly medieval ones. The Collation is the central feature of
86 a Tradition, where the text, its sequence of readings, and its relationships
87 between readings are actually kept.
93 The constructor. Takes a hash or hashref of the following arguments:
97 =item * tradition - The Text::Tradition object to which the collation
100 =item * linear - Whether the collation should be linear; that is, whether
101 transposed readings should be treated as two linked readings rather than one,
102 and therefore whether the collation graph is acyclic. Defaults to true.
104 =item * baselabel - The default label for the path taken by a base text
105 (if any). Defaults to 'base text'.
107 =item * wit_list_separator - The string to join a list of witnesses for
108 purposes of making labels in display graphs. Defaults to ', '.
110 =item * ac_label - The extra label to tack onto a witness sigil when
111 representing another layer of path for the given witness - that is, when
112 a text has more than one possible reading due to scribal corrections or
113 the like. Defaults to ' (a.c.)'.
115 =item * wordsep - The string used to separate words in the original text.
126 =head2 wit_list_separator
134 Simple accessors for collation attributes.
138 The meta-reading at the start of every witness path.
142 The meta-reading at the end of every witness path.
146 Returns all Reading objects in the graph.
148 =head2 reading( $id )
150 Returns the Reading object corresponding to the given ID.
152 =head2 add_reading( $reading_args )
154 Adds a new reading object to the collation.
155 See L<Text::Tradition::Collation::Reading> for the available arguments.
157 =head2 del_reading( $object_or_id )
159 Removes the given reading from the collation, implicitly removing its
160 paths and relationships.
162 =head2 has_reading( $id )
164 Predicate to see whether a given reading ID is in the graph.
166 =head2 reading_witnesses( $object_or_id )
168 Returns a list of sigils whose witnesses contain the reading.
172 Returns all reading paths within the document - that is, all edges in the
173 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
175 =head2 add_path( $source, $target, $sigil )
177 Links the given readings in the collation in sequence, under the given witness
178 sigil. The readings may be specified by object or ID.
180 =head2 del_path( $source, $target, $sigil )
182 Links the given readings in the collation in sequence, under the given witness
183 sigil. The readings may be specified by object or ID.
185 =head2 has_path( $source, $target );
187 Returns true if the two readings are linked in sequence in any witness.
188 The readings may be specified by object or ID.
192 Returns all Relationship objects in the collation.
194 =head2 add_relationship( $reading, $other_reading, $options )
196 Adds a new relationship of the type given in $options between the two readings,
197 which may be specified by object or ID. Returns a value of ( $status, @vectors)
198 where $status is true on success, and @vectors is a list of relationship edges
199 that were ultimately added.
200 See L<Text::Tradition::Collation::Relationship> for the available options.
205 my ( $class, @args ) = @_;
206 my %args = @args == 1 ? %{ $args[0] } : @args;
207 # TODO determine these from the Moose::Meta object
208 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
209 linear wordsep start end cached_table _graphcalc_done);
211 for my $attr (@delegate_attrs) {
212 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
214 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
220 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
221 $self->_set_start( $self->add_reading(
222 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
223 $self->_set_end( $self->add_reading(
224 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
227 sub register_relationship_type {
229 my %args = @_ == 1 ? %{$_[0]} : @_;
230 if( $self->relations->has_type( $args{name} ) ) {
231 throw( 'Relationship type ' . $args{name} . ' already registered' );
233 $self->relations->add_type( %args );
236 sub get_relationship_type {
237 my( $self, $name ) = @_;
238 return $self->relations->has_type( $name )
239 ? $self->relations->type( $name ) : undef;
242 ### Reading construct/destruct functions
245 my( $self, $reading ) = @_;
246 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
247 my %args = %$reading;
248 if( $args{'init'} ) {
249 # If we are initializing an empty collation, don't assume that we
250 # have set a tradition.
251 delete $args{'init'};
252 } elsif( $self->tradition->can('language') && $self->tradition->has_language
253 && !exists $args{'language'} ) {
254 $args{'language'} = $self->tradition->language;
256 $reading = Text::Tradition::Collation::Reading->new(
257 'collation' => $self,
260 # First check to see if a reading with this ID exists.
261 if( $self->reading( $reading->id ) ) {
262 throw( "Collation already has a reading with id " . $reading->id );
264 $self->_graphcalc_done(0);
265 $self->_add_reading( $reading->id => $reading );
266 # Once the reading has been added, put it in both graphs.
267 $self->sequence->add_vertex( $reading->id );
268 $self->relations->add_reading( $reading->id );
272 around del_reading => sub {
277 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
280 # Remove the reading from the graphs.
281 $self->_graphcalc_done(0);
282 $self->_clear_cache; # Explicitly clear caches to GC the reading
283 $self->sequence->delete_vertex( $arg );
284 $self->relations->delete_reading( $arg );
287 $self->$orig( $arg );
290 =head2 merge_readings( $main, $second, $concatenate, $with_str )
292 Merges the $second reading into the $main one. If $concatenate is true, then
293 the merged node will carry the text of both readings, concatenated with either
294 $with_str (if specified) or a sensible default (the empty string if the
295 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
297 The first two arguments may be either readings or reading IDs.
303 my $cxfile = 't/data/Collatex-16.xml';
304 my $t = Text::Tradition->new(
306 'input' => 'CollateX',
309 my $c = $t->collation;
311 my $rno = scalar $c->readings;
312 # Split n21 ('unto') for testing purposes
313 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
314 my $old_r = $c->reading( 'n21' );
315 $old_r->alter_text( 'to' );
316 $c->del_path( 'n20', 'n21', 'A' );
317 $c->add_path( 'n20', 'n21p0', 'A' );
318 $c->add_path( 'n21p0', 'n21', 'A' );
319 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
321 ok( $c->reading( 'n21p0' ), "New reading exists" );
322 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
324 # Combine n3 and n4 ( with his )
325 $c->merge_readings( 'n3', 'n4', 1 );
326 ok( !$c->reading('n4'), "Reading n4 is gone" );
327 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
329 # Collapse n9 and n10 ( rood / root )
330 $c->merge_readings( 'n9', 'n10' );
331 ok( !$c->reading('n10'), "Reading n10 is gone" );
332 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
334 # Combine n21 and n21p0
335 my $remaining = $c->reading('n21');
336 $remaining ||= $c->reading('n22'); # one of these should still exist
337 $c->merge_readings( 'n21p0', $remaining, 1 );
338 ok( !$c->reading('n21'), "Reading $remaining is gone" );
339 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
349 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
350 my $mergemeta = $kept_obj->is_meta;
351 throw( "Cannot merge meta and non-meta reading" )
352 unless ( $mergemeta && $del_obj->is_meta )
353 || ( !$mergemeta && !$del_obj->is_meta );
355 throw( "Cannot merge with start or end node" )
356 if( $kept_obj eq $self->start || $kept_obj eq $self->end
357 || $del_obj eq $self->start || $del_obj eq $self->end );
358 throw( "Cannot combine text of meta readings" ) if $combine;
360 # We only need the IDs for adding paths to the graph, not the reading
361 # objects themselves.
362 my $kept = $kept_obj->id;
363 my $deleted = $del_obj->id;
364 $self->_graphcalc_done(0);
366 # The kept reading should inherit the paths and the relationships
367 # of the deleted reading.
368 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
369 my @vector = ( $kept );
370 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
371 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
372 next if $vector[0] eq $vector[1]; # Don't add a self loop
373 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
374 $self->sequence->add_edge( @vector );
375 my $fwits = $self->sequence->get_edge_attributes( @vector );
376 @wits{keys %$fwits} = values %$fwits;
377 $self->sequence->set_edge_attributes( @vector, \%wits );
379 $self->relations->merge_readings( $kept, $deleted, $combine );
381 # Do the deletion deed.
383 # Combine the text of the readings
384 my $joinstr = $combine_char;
385 unless( defined $joinstr ) {
386 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
387 $joinstr = $self->wordsep unless defined $joinstr;
389 $kept_obj->_combine( $del_obj, $joinstr );
391 $self->del_reading( $deleted );
394 =head2 merge_related( @relationship_types )
396 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
398 WARNING: This operation cannot be undone.
407 my $t = Text::Tradition->new(
410 'file' => 't/data/legendfrag.xml',
412 my $c = $t->collation;
415 map { $rdg_ids{$_} = 1 } $c->readings;
416 $c->merge_related( 'orthographic' );
417 is( scalar( $c->readings ), keys( %rdg_ids ) - 8,
418 "Successfully collapsed orthographic variation" );
419 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.1 /;
420 foreach my $rid ( keys %rdg_ids ) {
421 my $exp = $rdg_ids{$rid};
422 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
423 ( $exp ? "retained" : "removed" ) );
425 ok( $c->linear, "Graph is still linear" );
427 $c->calculate_ranks; # This should succeed
428 ok( 1, "Can still calculate ranks on the new graph" );
430 ok( 0, "Rank calculation on merged graph failed: $@" );
433 # Now add some transpositions
434 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
435 $c->merge_related( 'transposition' );
436 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
437 "Transposed relationship is merged away" );
438 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
439 ok( !$c->linear, "Graph is no longer linear" );
441 $c->calculate_ranks; # This should fail
442 ok( 0, "Rank calculation happened on nonlinear graph?!" );
443 } catch ( Text::Tradition::Error $e ) {
444 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
445 "Rank calculation on merged graph threw an error" );
454 # TODO: there should be a way to display merged without affecting the underlying data!
459 map { $reltypehash{$_} = 1 } @_;
461 # Set up the filter for finding related readings
463 exists $reltypehash{$_[0]->type};
467 # Go through all readings looking for related ones
468 foreach my $r ( $self->readings ) {
469 next unless $self->reading( "$r" ); # might have been deleted meanwhile
470 my @related = $self->related_readings( $r, $filter );
472 push( @related, $r );
474 scalar $b->witnesses <=> scalar $a->witnesses
476 my $keep = shift @related;
477 foreach my $delr ( @related ) {
479 unless( $self->get_relationship( $keep, $delr )->colocated );
480 $self->merge_readings( $keep, $delr );
484 $self->linear( $linear );
487 =head2 compress_readings
489 Where possible in the graph, compresses plain sequences of readings into a
490 single reading. The sequences must consist of readings with no
491 relationships to other readings, with only a single witness path between
492 them and no other witness paths from either that would skip the other. The
493 readings must also not be marked as nonsense or bad grammar.
495 WARNING: This operation cannot be undone.
499 sub compress_readings {
501 # Anywhere in the graph that there is a reading that joins only to a single
502 # successor, and neither of these have any relationships, just join the two
504 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
505 # Now look for readings that can be joined to their successors.
506 next unless $rdg->is_combinable;
508 while( $self->sequence->successors( $rdg ) == 1 ) {
509 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
510 throw( "Infinite loop" ) if $seen{$next->id};
511 $seen{$next->id} = 1;
512 last if $self->sequence->predecessors( $next ) > 1;
513 last unless $next->is_combinable;
514 say "Joining readings $rdg and $next";
515 $self->merge_readings( $rdg, $next, 1 );
518 # Make sure we haven't screwed anything up
519 foreach my $wit ( $self->tradition->witnesses ) {
520 my $pathtext = $self->path_text( $wit->sigil );
521 my $origtext = join( ' ', @{$wit->text} );
522 throw( "Text differs for witness " . $wit->sigil )
523 unless $pathtext eq $origtext;
524 if( $wit->is_layered ) {
525 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
526 $origtext = join( ' ', @{$wit->layertext} );
527 throw( "Ante-corr text differs for witness " . $wit->sigil )
528 unless $pathtext eq $origtext;
532 $self->relations->rebuild_equivalence();
533 $self->calculate_ranks();
536 # Helper function for manipulating the graph.
537 sub _stringify_args {
538 my( $self, $first, $second, @args ) = @_;
540 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
541 $second = $second->id
542 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
543 return( $first, $second, @args );
546 # Helper function for manipulating the graph.
547 sub _objectify_args {
548 my( $self, $first, $second, $arg ) = @_;
549 $first = $self->reading( $first )
550 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
551 $second = $self->reading( $second )
552 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
553 return( $first, $second, $arg );
556 =head2 duplicate_reading( $reading, @witlist )
558 Split the given reading into two, so that the new reading is in the path for
559 the witnesses given in @witlist. If the result is that certain non-colocated
560 relationships (e.g. transpositions) are no longer valid, these will be removed.
561 Returns the newly-created reading.
565 use Test::More::UTF8;
569 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
570 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
571 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
573 my $sc = $st->collation;
575 ok( $sc->reading('n131'), "Tradition has the affected reading" );
576 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
577 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
579 # Detach the erroneously collated reading
580 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
581 ok( $newr, "New reading was created" );
582 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
583 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
584 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
585 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
586 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
588 # Check that the bad transposition is gone
589 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
590 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
592 # The collation should not be fixed
593 my @pairs = $sc->identical_readings();
594 is( scalar @pairs, 0, "Not re-collated yet" );
596 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
597 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
598 is( scalar @pairs, 3, "Found three more identical readings" );
599 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
600 $sc->flatten_ranks();
601 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
603 # Check that we can't "duplicate" a reading with no wits or with all wits
605 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
606 ok( 0, "Reading duplication without witnesses throws an error" );
607 } catch( Text::Tradition::Error $e ) {
608 like( $e->message, qr/Must specify one or more witnesses/,
609 "Reading duplication without witnesses throws the expected error" );
611 ok( 0, "Reading duplication without witnesses threw the wrong error" );
615 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
616 ok( 0, "Reading duplication with all witnesses throws an error" );
617 } catch( Text::Tradition::Error $e ) {
618 like( $e->message, qr/Cannot join all witnesses/,
619 "Reading duplication with all witnesses throws the expected error" );
621 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
628 sub duplicate_reading {
629 my( $self, $r, @wits ) = @_;
630 # Check that we are not doing anything unwise.
631 throw( "Must specify one or more witnesses for the duplicated reading" )
633 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
634 $r = $self->reading( $r );
636 throw( "Cannot duplicate a meta-reading" )
638 throw( "Cannot join all witnesses to the new reading" )
639 if scalar( @wits ) == scalar( $r->witnesses );
641 # Get all the reading attributes and duplicate them.
642 my $rmeta = Text::Tradition::Collation::Reading->meta;
644 foreach my $attr( $rmeta->get_all_attributes ) {
645 next if $attr->name =~ /^_/;
646 my $acc = $attr->get_read_method;
647 if( !$acc && $attr->has_applied_traits ) {
648 my $tr = $attr->applied_traits;
649 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
651 my %methods = reverse %{$attr->handles};
652 $acc = $methods{elements};
653 $args{$attr->name} = $which eq 'Array'
654 ? [ $r->$acc ] : { $r->$acc };
657 $args{$attr->name} = $r->$acc if $acc;
660 # By definition the new reading will no longer be common.
661 $args{is_common} = 0;
662 # The new reading also needs its own ID.
663 $args{id} = $self->_generate_dup_id( $r->id );
665 # Try to make the new reading.
666 my $newr = $self->add_reading( \%args );
667 # The old reading is also no longer common.
670 # For each of the witnesses, dissociate from the old reading and
671 # associate with the new.
672 foreach my $wit ( @wits ) {
673 my $prior = $self->prior_reading( $r, $wit );
674 my $next = $self->next_reading( $r, $wit );
675 $self->del_path( $prior, $r, $wit );
676 $self->add_path( $prior, $newr, $wit );
677 $self->del_path( $r, $next, $wit );
678 $self->add_path( $newr, $next, $wit );
681 # If the graph is ranked, we need to look for relationships that are now
682 # invalid (i.e. 'non-colocation' types that might now be colocated) and
683 # remove them. If not, we can skip it.
686 my @deleted_relations;
687 if( $self->end->has_rank ) {
688 # Find the point where we can stop checking
689 $succ = $self->common_successor( $r, $newr );
691 # Hash the existing ranks
692 foreach my $rdg ( $self->readings ) {
693 $rrk{$rdg->id} = $rdg->rank;
695 # Calculate the new ranks
696 $self->calculate_ranks();
698 # Check for invalid non-colocated relationships among changed-rank readings
699 # from where the ranks start changing up to $succ
700 my $lastrank = $succ->rank;
701 foreach my $rdg ( $self->readings ) {
702 next if $rdg->rank > $lastrank;
703 next if $rdg->rank == $rrk{$rdg->id};
704 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
705 next unless @noncolo;
706 foreach my $nc ( @noncolo ) {
707 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
708 push( @deleted_relations, [ $rdg->id, $nc->id ] );
713 return ( $newr, @deleted_relations );
716 sub _generate_dup_id {
717 my( $self, $rid ) = @_;
722 if( $self->has_reading( $newid ) ) {
735 # We only need the IDs for adding paths to the graph, not the reading
736 # objects themselves.
737 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
739 $self->_graphcalc_done(0);
740 # Connect the readings
741 unless( $self->sequence->has_edge( $source, $target ) ) {
742 $self->sequence->add_edge( $source, $target );
743 $self->relations->add_equivalence_edge( $source, $target );
745 # Note the witness in question
746 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
752 if( ref( $_[0] ) eq 'ARRAY' ) {
759 # We only need the IDs for removing paths from the graph, not the reading
760 # objects themselves.
761 my( $source, $target, $wit ) = $self->_stringify_args( @args );
763 $self->_graphcalc_done(0);
764 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
765 $self->sequence->delete_edge_attribute( $source, $target, $wit );
767 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
768 $self->sequence->delete_edge( $source, $target );
769 $self->relations->delete_equivalence_edge( $source, $target );
774 # Extra graph-alike utility
777 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
778 return undef unless $self->sequence->has_edge( $source, $target );
779 return $self->sequence->has_edge_attribute( $source, $target, $wit );
782 =head2 clear_witness( @sigil_list )
784 Clear the given witnesses out of the collation entirely, removing references
785 to them in paths, and removing readings that belong only to them. Should only
786 be called via $tradition->del_witness.
791 my( $self, @sigils ) = @_;
793 $self->_graphcalc_done(0);
794 # Clear the witness(es) out of the paths
795 foreach my $e ( $self->paths ) {
796 foreach my $sig ( @sigils ) {
797 $self->del_path( $e, $sig );
801 # Clear out the newly unused readings
802 foreach my $r ( $self->readings ) {
803 unless( $self->reading_witnesses( $r ) ) {
804 $self->del_reading( $r );
809 sub add_relationship {
811 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
812 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
813 foreach my $v ( @vectors ) {
814 next unless $self->get_relationship( $v )->colocated;
815 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
816 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
817 $self->_graphcalc_done(0);
825 around qw/ get_relationship del_relationship / => sub {
829 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
832 my @stringargs = $self->_stringify_args( @args );
833 $self->$orig( @stringargs );
836 =head2 reading_witnesses( $reading )
838 Return a list of sigils corresponding to the witnesses in which the reading appears.
842 sub reading_witnesses {
843 my( $self, $reading ) = @_;
844 # We need only check either the incoming or the outgoing edges; I have
845 # arbitrarily chosen "incoming". Thus, special-case the start node.
846 if( $reading eq $self->start ) {
847 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
850 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
851 my $wits = $self->sequence->get_edge_attributes( @$e );
852 @all_witnesses{ keys %$wits } = 1;
854 my $acstr = $self->ac_label;
855 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
856 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
858 return keys %all_witnesses;
861 =head1 OUTPUT METHODS
863 =head2 as_svg( \%options )
865 Returns an SVG string that represents the graph, via as_dot and graphviz.
866 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
871 my( $self, $opts ) = @_;
872 throw( "Need GraphViz installed to output SVG" )
873 unless File::Which::which( 'dot' );
874 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
875 $self->calculate_ranks()
876 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
877 my @cmd = qw/dot -Tsvg/;
879 my $dotfile = File::Temp->new();
881 # $dotfile->unlink_on_destroy(0);
882 binmode $dotfile, ':utf8';
883 print $dotfile $self->as_dot( $opts );
884 push( @cmd, $dotfile->filename );
885 run( \@cmd, ">", binary(), \$svg );
886 $svg = decode_utf8( $svg );
891 =head2 as_dot( \%options )
893 Returns a string that is the collation graph expressed in dot
894 (i.e. GraphViz) format. Options include:
909 my( $self, $opts ) = @_;
910 my $startrank = $opts->{'from'} if $opts;
911 my $endrank = $opts->{'to'} if $opts;
912 my $color_common = $opts->{'color_common'} if $opts;
913 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
914 && $self->end->rank > 100;
915 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
917 # Check the arguments
919 return if $endrank && $startrank > $endrank;
920 return if $startrank > $self->end->rank;
922 if( defined $endrank ) {
923 return if $endrank < 0;
924 $endrank = undef if $endrank == $self->end->rank;
927 my $graph_name = $self->tradition->name;
928 $graph_name =~ s/[^\w\s]//g;
929 $graph_name = join( '_', split( /\s+/, $graph_name ) );
937 'fillcolor' => 'white',
942 'arrowhead' => 'open',
943 'color' => '#000000',
944 'fontcolor' => '#000000',
947 my $dot = sprintf( "digraph %s {\n", $graph_name );
948 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
949 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
951 # Output substitute start/end readings if necessary
953 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
956 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
958 if( $STRAIGHTENHACK ) {
960 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
961 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
962 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
964 my %used; # Keep track of the readings that actually appear in the graph
965 # Sort the readings by rank if we have ranks; this speeds layout.
966 my @all_readings = $self->end->has_rank
967 ? sort { $a->rank <=> $b->rank } $self->readings
969 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
970 foreach my $reading ( @all_readings ) {
971 # Only output readings within our rank range.
972 next if $startrank && $reading->rank < $startrank;
973 next if $endrank && $reading->rank > $endrank;
974 $used{$reading->id} = 1;
975 # Need not output nodes without separate labels
976 next if $reading->id eq $reading->text;
978 my $label = $reading->text;
979 $label .= '-' if $reading->join_next;
980 $label = "-$label" if $reading->join_prior;
981 $label =~ s/\"/\\\"/g;
982 $rattrs->{'label'} = $label;
983 $rattrs->{'id'} = $reading->id;
984 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
985 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
988 # Add the real edges. Need to weight one edge per rank jump, in a
990 # my $weighted = $self->_add_edge_weights;
991 my @edges = $self->paths;
992 my( %substart, %subend );
993 foreach my $edge ( @edges ) {
994 # Do we need to output this edge?
995 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
996 my $label = $self->_path_display_label( $opts,
997 $self->path_witnesses( $edge ) );
998 my $variables = { %edge_attrs, 'label' => $label };
1000 # Account for the rank gap if necessary
1001 my $rank0 = $self->reading( $edge->[0] )->rank
1002 if $self->reading( $edge->[0] )->has_rank;
1003 my $rank1 = $self->reading( $edge->[1] )->rank
1004 if $self->reading( $edge->[1] )->has_rank;
1005 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1006 $variables->{'minlen'} = $rank1 - $rank0;
1009 # Add the calculated edge weights
1010 # if( exists $weighted->{$edge->[0]}
1011 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
1012 # # $variables->{'color'} = 'red';
1013 # $variables->{'weight'} = 3.0;
1016 # EXPERIMENTAL: make edge width reflect no. of witnesses
1017 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1018 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1020 my $varopts = _dot_attr_string( $variables );
1021 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1022 $edge->[0], $edge->[1], $varopts );
1023 } elsif( $used{$edge->[0]} ) {
1024 $subend{$edge->[0]} = $edge->[1];
1025 } elsif( $used{$edge->[1]} ) {
1026 $substart{$edge->[1]} = $edge->[0];
1030 # If we are asked to, add relationship links
1031 if( exists $opts->{show_relations} ) {
1032 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1033 if( $filter eq 'transposition' ) {
1034 $filter =~ qr/^transposition$/;
1037 my @types = sort( map { $_->name } $self->relations->types );
1038 if( exists $opts->{graphcolors} ) {
1039 foreach my $tdx ( 0 .. $#types ) {
1040 $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1043 map { $typecolors{$_} = '#FFA14F' } @types;
1045 foreach my $redge ( $self->relationships ) {
1046 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1047 my $rel = $self->get_relationship( $redge );
1048 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1050 arrowhead => 'none',
1051 color => $typecolors{$rel->type},
1052 constraint => 'false',
1055 unless( exists $opts->{graphcolors} ) {
1056 $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1058 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1059 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1064 # Add substitute start and end edges if necessary
1065 foreach my $node ( keys %substart ) {
1066 my $witstr = $self->_path_display_label( $opts,
1067 $self->path_witnesses( $substart{$node}, $node ) );
1068 my $variables = { %edge_attrs, 'label' => $witstr };
1069 my $nrdg = $self->reading( $node );
1070 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1071 # Substart is actually one lower than $startrank
1072 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1074 my $varopts = _dot_attr_string( $variables );
1075 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1077 foreach my $node ( keys %subend ) {
1078 my $witstr = $self->_path_display_label( $opts,
1079 $self->path_witnesses( $node, $subend{$node} ) );
1080 my $variables = { %edge_attrs, 'label' => $witstr };
1081 my $varopts = _dot_attr_string( $variables );
1082 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1085 if( $STRAIGHTENHACK ) {
1086 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1087 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1094 sub _dot_attr_string {
1097 foreach my $k ( sort keys %$hash ) {
1098 my $v = $hash->{$k};
1099 push( @attrs, $k.'="'.$v.'"' );
1101 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1104 sub _add_edge_weights {
1106 # Walk the graph from START to END, choosing the successor node with
1107 # the largest number of witness paths each time.
1109 my $curr = $self->start->id;
1110 my $ranked = $self->end->has_rank;
1111 while( $curr ne $self->end->id ) {
1112 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
1113 my @succ = sort { $self->path_witnesses( $curr, $a )
1114 <=> $self->path_witnesses( $curr, $b ) }
1115 $self->sequence->successors( $curr );
1116 my $next = pop @succ;
1117 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
1118 # Try to avoid lacunae in the weighted path.
1120 ( $self->reading( $next )->is_lacuna ||
1121 $nextrank - $rank > 1 ) ){
1124 $weighted->{$curr} = $next;
1130 =head2 path_witnesses( $edge )
1132 Returns the list of sigils whose witnesses are associated with the given edge.
1133 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1137 sub path_witnesses {
1138 my( $self, @edge ) = @_;
1139 # If edge is an arrayref, cope.
1140 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1141 my $e = shift @edge;
1144 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1148 # Helper function. Make a display label for the given witnesses, showing a.c.
1149 # witnesses only where the main witness is not also in the list.
1150 sub _path_display_label {
1154 map { $wits{$_} = 1 } @_;
1156 # If an a.c. wit is listed, remove it if the main wit is also listed.
1157 # Otherwise keep it for explicit listing.
1158 my $aclabel = $self->ac_label;
1160 foreach my $w ( sort keys %wits ) {
1161 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1162 if( exists $wits{$1} ) {
1165 push( @disp_ac, $w );
1170 if( $opts->{'explicit_wits'} ) {
1171 return join( ', ', sort keys %wits );
1173 # See if we are in a majority situation.
1174 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1175 $maj = $maj > 5 ? $maj : 5;
1176 if( scalar keys %wits > $maj ) {
1177 unshift( @disp_ac, 'majority' );
1178 return join( ', ', @disp_ac );
1180 return join( ', ', sort keys %wits );
1185 =head2 readings_at_rank( $rank )
1187 Returns a list of readings at a given rank, taken from the alignment table.
1191 sub readings_at_rank {
1192 my( $self, $rank ) = @_;
1193 my $table = $self->alignment_table;
1194 # Table rank is real rank - 1.
1195 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1197 foreach my $e ( @elements ) {
1198 next unless ref( $e ) eq 'HASH';
1199 next unless exists $e->{'t'};
1200 $readings{$e->{'t'}->id} = $e->{'t'};
1202 return values %readings;
1207 Returns a GraphML representation of the collation. The GraphML will contain
1208 two graphs. The first expresses the attributes of the readings and the witness
1209 paths that link them; the second expresses the relationships that link the
1210 readings. This is the native transfer format for a tradition.
1214 use Text::Tradition;
1220 my $datafile = 't/data/florilegium_tei_ps.xml';
1221 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1223 'file' => $datafile,
1226 ok( $tradition, "Got a tradition object" );
1227 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1228 ok( $tradition->collation, "Tradition has a collation" );
1230 my $c = $tradition->collation;
1231 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1232 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1233 is( scalar $c->relationships, 0, "Collation has all relationships" );
1235 # Add a few relationships
1236 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1237 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1238 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
1240 # Now write it to GraphML and parse it again.
1242 my $graphml = $c->as_graphml;
1243 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1244 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1245 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1246 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1248 # Now add a stemma, write to GraphML, and look at the output.
1250 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1251 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1252 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1253 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1254 $graphml = $c->as_graphml;
1255 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1262 ## TODO MOVE this to Tradition.pm and modularize it better
1264 my( $self, $options ) = @_;
1265 $self->calculate_ranks unless $self->_graphcalc_done;
1267 my $start = $options->{'from'}
1268 ? $self->reading( $options->{'from'} ) : $self->start;
1269 my $end = $options->{'to'}
1270 ? $self->reading( $options->{'to'} ) : $self->end;
1271 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1272 throw( 'Start node must be before end node' );
1274 # The readings need to be ranked for this to work.
1275 $start = $self->start unless $start->has_rank;
1276 $end = $self->end unless $end->has_rank;
1278 unless( $start eq $self->start ) {
1279 $rankoffset = $start->rank - 1;
1284 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1285 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1286 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1287 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1289 # Create the document and root node
1290 require XML::LibXML;
1291 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1292 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1293 $graphml->setDocumentElement( $root );
1294 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1295 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1297 # List of attribute types to save on our objects and their corresponding
1302 'Bool' => 'boolean',
1303 'ReadingID' => 'string',
1304 'RelationshipType' => 'string',
1305 'RelationshipScope' => 'string',
1308 # Add the data keys for the graph. Include an extra key 'version' for the
1309 # GraphML output version.
1310 my %graph_data_keys;
1312 my %graph_attributes = ( 'version' => 'string' );
1313 # Graph attributes include those of Tradition and those of Collation.
1315 # TODO Use meta introspection method from duplicate_reading to do this
1316 # instead of naming custom keys.
1317 my $tmeta = $self->tradition->meta;
1318 my $cmeta = $self->meta;
1319 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1320 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1321 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1322 next if $attr->name =~ /^_/;
1323 next unless $save_types{$attr->type_constraint->name};
1324 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1326 # Extra custom keys for complex objects that should be saved in some form.
1327 # The subroutine should return a string, or undef/empty.
1328 if( $tmeta->has_method('stemmata') ) {
1329 $graph_attributes{'stemmata'} = sub {
1331 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1332 $self->tradition->stemmata;
1333 join( "\n", @stemstrs );
1337 if( $tmeta->has_method('user') ) {
1338 $graph_attributes{'user'} = sub {
1339 $self->tradition->user ? $self->tradition->user->id : undef
1343 foreach my $datum ( sort keys %graph_attributes ) {
1344 $graph_data_keys{$datum} = 'dg'.$gdi++;
1345 my $key = $root->addNewChild( $graphml_ns, 'key' );
1346 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1347 : $graph_attributes{$datum};
1348 $key->setAttribute( 'attr.name', $datum );
1349 $key->setAttribute( 'attr.type', $dtype );
1350 $key->setAttribute( 'for', 'graph' );
1351 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1354 # Add the data keys for reading nodes
1355 my %reading_attributes;
1356 my $rmeta = Text::Tradition::Collation::Reading->meta;
1357 foreach my $attr( $rmeta->get_all_attributes ) {
1358 next if $attr->name =~ /^_/;
1359 next unless $save_types{$attr->type_constraint->name};
1360 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1362 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1363 # Extra custom key for the reading morphology
1364 $reading_attributes{'lexemes'} = 'string';
1369 foreach my $datum ( sort keys %reading_attributes ) {
1370 $node_data_keys{$datum} = 'dn'.$ndi++;
1371 my $key = $root->addNewChild( $graphml_ns, 'key' );
1372 $key->setAttribute( 'attr.name', $datum );
1373 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1374 $key->setAttribute( 'for', 'node' );
1375 $key->setAttribute( 'id', $node_data_keys{$datum} );
1378 # Add the data keys for edges, that is, paths and relationships. Path
1379 # data does not come from a Moose class so is here manually.
1382 my %edge_attributes = (
1383 witness => 'string', # ID/label for a path
1384 extra => 'boolean', # Path key
1386 my @path_attributes = keys %edge_attributes; # track our manual additions
1387 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1388 foreach my $attr( $pmeta->get_all_attributes ) {
1389 next if $attr->name =~ /^_/;
1390 next unless $save_types{$attr->type_constraint->name};
1391 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1393 foreach my $datum ( sort keys %edge_attributes ) {
1394 $edge_data_keys{$datum} = 'de'.$edi++;
1395 my $key = $root->addNewChild( $graphml_ns, 'key' );
1396 $key->setAttribute( 'attr.name', $datum );
1397 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1398 $key->setAttribute( 'for', 'edge' );
1399 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1402 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1403 my $xmlidname = $self->tradition->name;
1404 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1405 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1406 $xmlidname = '_'.$xmlidname;
1408 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1409 $sgraph->setAttribute( 'edgedefault', 'directed' );
1410 $sgraph->setAttribute( 'id', $xmlidname );
1411 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1412 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1413 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1414 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1415 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1417 # Tradition/collation attribute data
1418 foreach my $datum ( keys %graph_attributes ) {
1420 if( $datum eq 'version' ) {
1422 } elsif( ref( $graph_attributes{$datum} ) ) {
1423 my $sub = $graph_attributes{$datum};
1425 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1426 $value = $self->tradition->$datum;
1428 $value = $self->$datum;
1430 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1435 # Add our readings to the graph
1436 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1437 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1438 ( $n->rank < $start->rank || $n->rank > $end->rank );
1439 $use_readings{$n->id} = 1;
1440 # Add to the main graph
1441 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1442 my $node_xmlid = 'n' . $node_ctr++;
1443 $node_hash{ $n->id } = $node_xmlid;
1444 $node_el->setAttribute( 'id', $node_xmlid );
1445 foreach my $d ( keys %reading_attributes ) {
1447 # Custom serialization
1448 if( $d eq 'lexemes' ) {
1449 # If nval is a true value, we have lexemes so we need to
1450 # serialize them. Otherwise set nval to undef so that the
1451 # key is excluded from this reading.
1452 $nval = $nval ? $n->_serialize_lexemes : undef;
1453 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1456 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1457 # Adjust the ranks within the subgraph.
1458 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1459 : $nval - $rankoffset;
1461 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1466 # Add the path edges to the sequence graph
1468 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1469 # We add an edge in the graphml for every witness in $e.
1470 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1471 my @edge_wits = sort $self->path_witnesses( $e );
1472 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1473 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1474 # Skip any path from start to end; that witness is not in the subgraph.
1475 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1476 foreach my $wit ( @edge_wits ) {
1477 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1478 $node_hash{ $e->[0] },
1479 $node_hash{ $e->[1] } );
1480 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1481 $edge_el->setAttribute( 'source', $from );
1482 $edge_el->setAttribute( 'target', $to );
1483 $edge_el->setAttribute( 'id', $id );
1485 # It's a witness path, so add the witness
1487 my $key = $edge_data_keys{'witness'};
1488 # Is this an ante-corr witness?
1489 my $aclabel = $self->ac_label;
1490 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1491 # Keep the base witness
1493 # ...and record that this is an 'extra' reading path
1494 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1496 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1500 # Report the actual number of nodes and edges that went in
1501 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1502 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1504 # Add the relationship graph to the XML
1505 map { delete $edge_data_keys{$_} } @path_attributes;
1506 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1507 $node_data_keys{'id'}, \%edge_data_keys );
1509 # Save and return the thing
1510 my $result = decode_utf8( $graphml->toString(1) );
1514 sub _add_graphml_data {
1515 my( $el, $key, $value ) = @_;
1516 return unless defined $value;
1517 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1518 $data_el->setAttribute( 'key', $key );
1519 $data_el->appendText( $value );
1524 Returns a CSV alignment table representation of the collation graph, one
1525 row per witness (or witness uncorrected.)
1529 Returns a tab-separated alignment table representation of the collation graph,
1530 one row per witness (or witness uncorrected.)
1534 use Text::Tradition;
1542 my $datafile = 't/data/florilegium_tei_ps.xml';
1543 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1545 'file' => $datafile,
1548 my $c = $tradition->collation;
1549 # Export the thing to CSV
1550 my $csvstr = $c->as_csv();
1552 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1553 my @lines = split(/\n/, $csvstr );
1554 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1555 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1556 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1557 ok( @q_ac, "Found a layered witness" );
1559 my $t2 = Text::Tradition->new( input => 'Tabular',
1563 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1564 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1566 # Now do it with TSV
1567 my $tsvstr = $c->as_tsv();
1568 my $t3 = Text::Tradition->new( input => 'Tabular',
1572 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1573 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1575 my $table = $c->alignment_table;
1576 my $noaccsv = $c->as_csv({ noac => 1 });
1577 my @noaclines = split(/\n/, $noaccsv );
1578 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1579 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1580 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1582 my $safecsv = $c->as_csv({ safe_ac => 1});
1583 my @safelines = split(/\n/, $safecsv );
1584 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1585 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1586 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1587 ok( @q_ac, "Found a sanitized layered witness" );
1588 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1595 my( $self, $opts ) = @_;
1596 my $table = $self->alignment_table( $opts );
1597 my $csv_options = { binary => 1, quote_null => 0 };
1598 $csv_options->{'sep_char'} = $opts->{fieldsep};
1599 if( $opts->{fieldsep} eq "\t" ) {
1600 # If it is really tab separated, nothing is an escape char.
1601 $csv_options->{'quote_char'} = undef;
1602 $csv_options->{'escape_char'} = '';
1604 my $csv = Text::CSV->new( $csv_options );
1606 # Make the header row
1607 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1608 push( @result, $csv->string );
1609 # Make the rest of the rows
1610 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1611 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1612 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1613 $csv->combine( @row );
1614 push( @result, $csv->string );
1616 return join( "\n", @result );
1621 my $opts = shift || {};
1622 $opts->{fieldsep} = ',';
1623 return $self->_tabular( $opts );
1628 my $opts = shift || {};
1629 $opts->{fieldsep} = "\t";
1630 return $self->_tabular( $opts );
1633 =head2 alignment_table
1635 Return a reference to an alignment table, in a slightly enhanced CollateX
1636 format which looks like this:
1638 $table = { alignment => [ { witness => "SIGIL",
1639 tokens => [ { t => "TEXT" }, ... ] },
1640 { witness => "SIG2",
1641 tokens => [ { t => "TEXT" }, ... ] },
1643 length => TEXTLEN };
1647 sub alignment_table {
1648 my( $self, $opts ) = @_;
1649 if( $self->has_cached_table ) {
1650 return $self->cached_table
1651 unless $opts->{noac} || $opts->{safe_ac};
1654 # Make sure we can do this
1655 throw( "Need a linear graph in order to make an alignment table" )
1656 unless $self->linear;
1657 $self->calculate_ranks()
1658 unless $self->_graphcalc_done && $self->end->has_rank;
1660 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1661 my @all_pos = ( 1 .. $self->end->rank - 1 );
1662 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1663 # say STDERR "Making witness row(s) for " . $wit->sigil;
1664 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1665 my @row = _make_witness_row( \@wit_path, \@all_pos );
1666 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1667 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1668 push( @{$table->{'alignment'}}, $witobj );
1669 if( $wit->is_layered && !$opts->{noac} ) {
1670 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1671 $wit->sigil.$self->ac_label );
1672 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1673 my $witlabel = $opts->{safe_ac}
1674 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1675 my $witacobj = { 'witness' => $witlabel,
1676 'tokens' => \@ac_row };
1677 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1678 push( @{$table->{'alignment'}}, $witacobj );
1681 unless( $opts->{noac} || $opts->{safe_ac} ) {
1682 $self->cached_table( $table );
1687 sub _make_witness_row {
1688 my( $path, $positions ) = @_;
1690 map { $char_hash{$_} = undef } @$positions;
1692 foreach my $rdg ( @$path ) {
1693 say STDERR "rank " . $rdg->rank if $debug;
1694 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1695 $char_hash{$rdg->rank} = { 't' => $rdg };
1697 my @row = map { $char_hash{$_} } @$positions;
1698 # Fill in lacuna markers for undef spots in the row
1699 my $last_el = shift @row;
1700 my @filled_row = ( $last_el );
1701 foreach my $el ( @row ) {
1702 # If we are using node reference, make the lacuna node appear many times
1703 # in the table. If not, use the lacuna tag.
1704 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1707 push( @filled_row, $el );
1714 =head1 NAVIGATION METHODS
1716 =head2 reading_sequence( $first, $last, $sigil, $backup )
1718 Returns the ordered list of readings, starting with $first and ending
1719 with $last, for the witness given in $sigil. If a $backup sigil is
1720 specified (e.g. when walking a layered witness), it will be used wherever
1721 no $sigil path exists. If there is a base text reading, that will be
1722 used wherever no path exists for $sigil or $backup.
1726 # TODO Think about returning some lazy-eval iterator.
1727 # TODO Get rid of backup; we should know from what witness is whether we need it.
1729 sub reading_sequence {
1730 my( $self, $start, $end, $witness ) = @_;
1732 $witness = $self->baselabel unless $witness;
1733 my @readings = ( $start );
1736 while( $n && $n->id ne $end->id ) {
1737 if( exists( $seen{$n->id} ) ) {
1738 throw( "Detected loop for $witness at " . $n->id );
1742 my $next = $self->next_reading( $n, $witness );
1744 throw( "Did not find any path for $witness from reading " . $n->id );
1746 push( @readings, $next );
1749 # Check that the last reading is our end reading.
1750 my $last = $readings[$#readings];
1751 throw( "Last reading found from " . $start->text .
1752 " for witness $witness is not the end!" ) # TODO do we get this far?
1753 unless $last->id eq $end->id;
1758 =head2 next_reading( $reading, $sigil );
1760 Returns the reading that follows the given reading along the given witness
1766 # Return the successor via the corresponding path.
1768 my $answer = $self->_find_linked_reading( 'next', @_ );
1769 return undef unless $answer;
1770 return $self->reading( $answer );
1773 =head2 prior_reading( $reading, $sigil )
1775 Returns the reading that precedes the given reading along the given witness
1781 # Return the predecessor via the corresponding path.
1783 my $answer = $self->_find_linked_reading( 'prior', @_ );
1784 return $self->reading( $answer );
1787 sub _find_linked_reading {
1788 my( $self, $direction, $node, $path ) = @_;
1790 # Get a backup if we are dealing with a layered witness
1792 my $aclabel = $self->ac_label;
1793 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1797 my @linked_paths = $direction eq 'next'
1798 ? $self->sequence->edges_from( $node )
1799 : $self->sequence->edges_to( $node );
1800 return undef unless scalar( @linked_paths );
1802 # We have to find the linked path that contains all of the
1803 # witnesses supplied in $path.
1804 my( @path_wits, @alt_path_wits );
1805 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1806 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1809 foreach my $le ( @linked_paths ) {
1810 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1813 my @le_wits = sort $self->path_witnesses( $le );
1814 if( _is_within( \@path_wits, \@le_wits ) ) {
1815 # This is the right path.
1816 return $direction eq 'next' ? $le->[1] : $le->[0];
1817 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1821 # Got this far? Return the alternate path if it exists.
1822 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1825 # Got this far? Return the base path if it exists.
1826 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1829 # Got this far? We have no appropriate path.
1830 warn "Could not find $direction node from " . $node->id
1831 . " along path $path";
1837 my( $set1, $set2 ) = @_;
1838 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1839 foreach my $el ( @$set1 ) {
1840 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1845 # Return the string that joins together a list of witnesses for
1846 # display on a single path.
1847 sub _witnesses_of_label {
1848 my( $self, $label ) = @_;
1849 my $regex = $self->wit_list_separator;
1850 my @answer = split( /\Q$regex\E/, $label );
1854 =head2 common_readings
1856 Returns the list of common readings in the graph (i.e. those readings that are
1857 shared by all non-lacunose witnesses.)
1861 sub common_readings {
1863 my @common = grep { $_->is_common } $self->readings;
1867 =head2 path_text( $sigil, [, $start, $end ] )
1869 Returns the text of a witness (plus its backup, if we are using a layer)
1870 as stored in the collation. The text is returned as a string, where the
1871 individual readings are joined with spaces and the meta-readings (e.g.
1872 lacunae) are omitted. Optional specification of $start and $end allows
1873 the generation of a subset of the witness text.
1878 my( $self, $wit, $start, $end ) = @_;
1879 $start = $self->start unless $start;
1880 $end = $self->end unless $end;
1881 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1884 foreach my $r ( @path ) {
1885 unless ( $r->join_prior || !$last || $last->join_next ) {
1888 $pathtext .= $r->text;
1894 =head1 INITIALIZATION METHODS
1896 These are mostly for use by parsers.
1898 =head2 make_witness_path( $witness )
1900 Link the array of readings contained in $witness->path (and in
1901 $witness->uncorrected_path if it exists) into collation paths.
1902 Clear out the arrays when finished.
1904 =head2 make_witness_paths
1906 Call make_witness_path for all witnesses in the tradition.
1910 # For use when a collation is constructed from a base text and an apparatus.
1911 # We have the sequences of readings and just need to add path edges.
1912 # When we are done, clear out the witness path attributes, as they are no
1914 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1916 sub make_witness_paths {
1918 foreach my $wit ( $self->tradition->witnesses ) {
1919 # say STDERR "Making path for " . $wit->sigil;
1920 $self->make_witness_path( $wit );
1924 sub make_witness_path {
1925 my( $self, $wit ) = @_;
1926 my @chain = @{$wit->path};
1927 my $sig = $wit->sigil;
1928 # Add start and end if necessary
1929 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1930 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1931 foreach my $idx ( 0 .. $#chain-1 ) {
1932 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1934 if( $wit->is_layered ) {
1935 @chain = @{$wit->uncorrected_path};
1936 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1937 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1938 foreach my $idx( 0 .. $#chain-1 ) {
1939 my $source = $chain[$idx];
1940 my $target = $chain[$idx+1];
1941 $self->add_path( $source, $target, $sig.$self->ac_label )
1942 unless $self->has_path( $source, $target, $sig );
1946 $wit->clear_uncorrected_path;
1949 =head2 calculate_ranks
1951 Calculate the reading ranks (that is, their aligned positions relative
1952 to each other) for the graph. This can only be called on linear collations.
1956 use Text::Tradition;
1958 my $cxfile = 't/data/Collatex-16.xml';
1959 my $t = Text::Tradition->new(
1961 'input' => 'CollateX',
1964 my $c = $t->collation;
1967 my $table = $c->alignment_table;
1968 ok( $c->has_cached_table, "Alignment table was cached" );
1969 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1970 $c->calculate_ranks;
1971 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1972 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
1973 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
1974 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
1975 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
1981 sub calculate_ranks {
1983 # Save the existing ranks, in case we need to invalidate the cached SVG.
1984 throw( "Cannot calculate ranks on a non-linear graph" )
1985 unless $self->linear;
1987 map { $existing_ranks{$_} = $_->rank } $self->readings;
1989 # Do the rankings based on the relationship equivalence graph, starting
1990 # with the start node.
1991 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1993 # Transfer our rankings from the topological graph to the real one.
1994 foreach my $r ( $self->readings ) {
1995 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1996 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1998 # Die. Find the last rank we calculated.
1999 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2000 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2002 my $last = pop @all_defined;
2003 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2006 # Do we need to invalidate the cached data?
2007 if( $self->has_cached_table ) {
2008 foreach my $r ( $self->readings ) {
2009 next if defined( $existing_ranks{$r} )
2010 && $existing_ranks{$r} == $r->rank;
2011 # Something has changed, so clear the cache
2012 $self->_clear_cache;
2013 # ...and recalculate the common readings.
2014 $self->calculate_common_readings();
2018 # The graph calculation information is now up to date.
2019 $self->_graphcalc_done(1);
2024 $self->wipe_table if $self->has_cached_table;
2028 =head2 flatten_ranks
2030 A convenience method for parsing collation data. Searches the graph for readings
2031 with the same text at the same rank, and merges any that are found.
2036 my ( $self, %args ) = shift;
2037 my %unique_rank_rdg;
2039 foreach my $p ( $self->identical_readings( %args ) ) {
2040 # say STDERR "Combining readings at same rank: @$p";
2042 $self->merge_readings( @$p );
2043 # TODO see if this now makes a common point.
2045 # If we merged readings, the ranks are still fine but the alignment
2046 # table is wrong. Wipe it.
2047 $self->wipe_table() if $changed;
2050 =head2 identical_readings
2051 =head2 identical_readings( start => $startnode, end => $endnode )
2052 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2054 Goes through the graph identifying all pairs of readings that appear to be
2055 identical, and therefore able to be merged into a single reading. Returns the
2056 relevant identical pairs. Can be restricted to run over only a part of the
2057 graph, specified either by node or by rank.
2061 sub identical_readings {
2062 my ( $self, %args ) = @_;
2063 # Find where we should start and end.
2064 my $startrank = $args{startrank} || 0;
2065 if( $args{start} ) {
2066 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2067 && $self->reading( $args{start} )->has_rank;
2068 $startrank = $self->reading( $args{start} )->rank;
2070 my $endrank = $args{endrank} || $self->end->rank;
2072 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2073 && $self->reading( $args{end} )->has_rank;
2074 $endrank = $self->reading( $args{end} )->rank;
2077 # Make sure the ranks are correct.
2078 unless( $self->_graphcalc_done ) {
2079 $self->calculate_ranks;
2081 # Go through the readings looking for duplicates.
2082 my %unique_rank_rdg;
2084 foreach my $rdg ( $self->readings ) {
2085 next unless $rdg->has_rank;
2086 my $rk = $rdg->rank;
2087 next if $rk > $endrank || $rk < $startrank;
2088 my $key = $rk . "||" . $rdg->text;
2089 if( exists $unique_rank_rdg{$key} ) {
2090 # Make sure they don't have different grammatical forms
2091 my $ur = $unique_rank_rdg{$key};
2092 if( $rdg->is_identical( $ur ) ) {
2093 push( @pairs, [ $ur, $rdg ] );
2096 $unique_rank_rdg{$key} = $rdg;
2104 =head2 calculate_common_readings
2106 Goes through the graph identifying the readings that appear in every witness
2107 (apart from those with lacunae at that spot.) Marks them as common and returns
2112 use Text::Tradition;
2114 my $cxfile = 't/data/Collatex-16.xml';
2115 my $t = Text::Tradition->new(
2117 'input' => 'CollateX',
2120 my $c = $t->collation;
2122 my @common = $c->calculate_common_readings();
2123 is( scalar @common, 8, "Found correct number of common readings" );
2124 my @marked = sort $c->common_readings();
2125 is( scalar @common, 8, "All common readings got marked as such" );
2126 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2127 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2133 sub calculate_common_readings {
2136 map { $_->is_common( 0 ) } $self->readings;
2137 # Implicitly calls calculate_ranks
2138 my $table = $self->alignment_table;
2139 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2140 my @row = map { $_->{'tokens'}->[$idx]
2141 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2142 @{$table->{'alignment'}};
2144 foreach my $r ( @row ) {
2146 $hash{$r->id} = $r unless $r->is_meta;
2148 $hash{'UNDEF'} = $r;
2151 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2152 my( $r ) = values %hash;
2154 push( @common, $r );
2160 =head2 text_from_paths
2162 Calculate the text array for all witnesses from the path, for later consistency
2163 checking. Only to be used if there is no non-graph-based way to know the
2168 sub text_from_paths {
2170 foreach my $wit ( $self->tradition->witnesses ) {
2171 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2173 foreach my $r ( @readings ) {
2174 next if $r->is_meta;
2175 push( @text, $r->text );
2177 $wit->text( \@text );
2178 if( $wit->is_layered ) {
2179 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2180 $wit->sigil.$self->ac_label );
2182 foreach my $r ( @ucrdgs ) {
2183 next if $r->is_meta;
2184 push( @uctext, $r->text );
2186 $wit->layertext( \@uctext );
2191 =head1 UTILITY FUNCTIONS
2193 =head2 common_predecessor( $reading_a, $reading_b )
2195 Find the last reading that occurs in sequence before both the given readings.
2196 At the very least this should be $self->start.
2198 =head2 common_successor( $reading_a, $reading_b )
2200 Find the first reading that occurs in sequence after both the given readings.
2201 At the very least this should be $self->end.
2205 use Text::Tradition;
2207 my $cxfile = 't/data/Collatex-16.xml';
2208 my $t = Text::Tradition->new(
2210 'input' => 'CollateX',
2213 my $c = $t->collation;
2215 is( $c->common_predecessor( 'n24', 'n23' )->id,
2216 'n20', "Found correct common predecessor" );
2217 is( $c->common_successor( 'n24', 'n23' )->id,
2218 '__END__', "Found correct common successor" );
2220 is( $c->common_predecessor( 'n19', 'n17' )->id,
2221 'n16', "Found correct common predecessor for readings on same path" );
2222 is( $c->common_successor( 'n21', 'n10' )->id,
2223 '__END__', "Found correct common successor for readings on same path" );
2229 ## Return the closest reading that is a predecessor of both the given readings.
2230 sub common_predecessor {
2232 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2233 return $self->_common_in_path( $r1, $r2, 'predecessors' );
2236 sub common_successor {
2238 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2239 return $self->_common_in_path( $r1, $r2, 'successors' );
2243 # TODO think about how to do this without ranks...
2244 sub _common_in_path {
2245 my( $self, $r1, $r2, $dir ) = @_;
2246 my $iter = $self->end->rank;
2248 my @last_r1 = ( $r1 );
2249 my @last_r2 = ( $r2 );
2250 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2252 # say STDERR "Finding common $dir for $r1, $r2";
2253 while( !@candidates ) {
2254 last unless $iter--; # Avoid looping infinitely
2255 # Iterate separately down the graph from r1 and r2
2256 my( @new_lc1, @new_lc2 );
2257 foreach my $lc ( @last_r1 ) {
2258 foreach my $p ( $lc->$dir ) {
2259 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2260 # say STDERR "Path candidate $p from $lc";
2261 push( @candidates, $p );
2262 } elsif( !$all_seen{$p->id} ) {
2263 $all_seen{$p->id} = 'r1';
2264 push( @new_lc1, $p );
2268 foreach my $lc ( @last_r2 ) {
2269 foreach my $p ( $lc->$dir ) {
2270 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2271 # say STDERR "Path candidate $p from $lc";
2272 push( @candidates, $p );
2273 } elsif( !$all_seen{$p->id} ) {
2274 $all_seen{$p->id} = 'r2';
2275 push( @new_lc2, $p );
2279 @last_r1 = @new_lc1;
2280 @last_r2 = @new_lc2;
2282 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2283 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2287 Text::Tradition::Error->throw(
2288 'ident' => 'Collation error',
2294 __PACKAGE__->meta->make_immutable;
2300 =item * Rework XML serialization in a more modular way
2306 This package is free software and is provided "as is" without express
2307 or implied warranty. You can redistribute it and/or modify it under
2308 the same terms as Perl itself.
2312 Tara L Andrews E<lt>aurum@cpan.orgE<gt>