1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
10 use Text::Tradition::Collation::Data;
11 use Text::Tradition::Collation::Reading;
12 use Text::Tradition::Collation::RelationshipStore;
13 use Text::Tradition::Error;
14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
16 use XML::LibXML::XPathContext;
20 isa => 'Text::Tradition::Collation::Data',
59 isa => 'Text::Tradition',
60 writer => '_set_tradition',
68 Text::Tradition::Collation - a software model for a text collation
73 my $t = Text::Tradition->new(
74 'name' => 'this is a text',
76 'file' => '/path/to/tei_parallel_seg_file.xml' );
78 my $c = $t->collation;
79 my @readings = $c->readings;
80 my @paths = $c->paths;
81 my @relationships = $c->relationships;
83 my $svg_variant_graph = $t->collation->as_svg();
87 Text::Tradition is a library for representation and analysis of collated
88 texts, particularly medieval ones. The Collation is the central feature of
89 a Tradition, where the text, its sequence of readings, and its relationships
90 between readings are actually kept.
96 The constructor. Takes a hash or hashref of the following arguments:
100 =item * tradition - The Text::Tradition object to which the collation
103 =item * linear - Whether the collation should be linear; that is, whether
104 transposed readings should be treated as two linked readings rather than one,
105 and therefore whether the collation graph is acyclic. Defaults to true.
107 =item * baselabel - The default label for the path taken by a base text
108 (if any). Defaults to 'base text'.
110 =item * wit_list_separator - The string to join a list of witnesses for
111 purposes of making labels in display graphs. Defaults to ', '.
113 =item * ac_label - The extra label to tack onto a witness sigil when
114 representing another layer of path for the given witness - that is, when
115 a text has more than one possible reading due to scribal corrections or
116 the like. Defaults to ' (a.c.)'.
118 =item * wordsep - The string used to separate words in the original text.
129 =head2 wit_list_separator
137 Simple accessors for collation attributes.
141 The meta-reading at the start of every witness path.
145 The meta-reading at the end of every witness path.
149 Returns all Reading objects in the graph.
151 =head2 reading( $id )
153 Returns the Reading object corresponding to the given ID.
155 =head2 add_reading( $reading_args )
157 Adds a new reading object to the collation.
158 See L<Text::Tradition::Collation::Reading> for the available arguments.
160 =head2 del_reading( $object_or_id )
162 Removes the given reading from the collation, implicitly removing its
163 paths and relationships.
165 =head2 has_reading( $id )
167 Predicate to see whether a given reading ID is in the graph.
169 =head2 reading_witnesses( $object_or_id )
171 Returns a list of sigils whose witnesses contain the reading.
175 Returns all reading paths within the document - that is, all edges in the
176 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
178 =head2 add_path( $source, $target, $sigil )
180 Links the given readings in the collation in sequence, under the given witness
181 sigil. The readings may be specified by object or ID.
183 =head2 del_path( $source, $target, $sigil )
185 Links the given readings in the collation in sequence, under the given witness
186 sigil. The readings may be specified by object or ID.
188 =head2 has_path( $source, $target );
190 Returns true if the two readings are linked in sequence in any witness.
191 The readings may be specified by object or ID.
195 Returns all Relationship objects in the collation.
197 =head2 add_relationship( $reading, $other_reading, $options )
199 Adds a new relationship of the type given in $options between the two readings,
200 which may be specified by object or ID. Returns a value of ( $status, @vectors)
201 where $status is true on success, and @vectors is a list of relationship edges
202 that were ultimately added.
203 See L<Text::Tradition::Collation::Relationship> for the available options.
208 my ( $class, @args ) = @_;
209 my %args = @args == 1 ? %{ $args[0] } : @args;
210 # TODO determine these from the Moose::Meta object
211 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
212 linear wordsep start end cached_table _graphcalc_done);
214 for my $attr (@delegate_attrs) {
215 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
217 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
223 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
224 $self->_set_start( $self->add_reading(
225 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
226 $self->_set_end( $self->add_reading(
227 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
230 =head2 register_relationship_type( %relationship_definition )
232 Add a relationship type definition to this collation. The argument can be either a
233 hash or a hashref, defining the properties of the relationship. For relationship types
234 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
236 =head2 get_relationship_type( $relationship_name )
238 Retrieve the RelationshipType object for the relationship with the given name.
242 sub register_relationship_type {
244 my %args = @_ == 1 ? %{$_[0]} : @_;
245 if( $self->relations->has_type( $args{name} ) ) {
246 throw( 'Relationship type ' . $args{name} . ' already registered' );
248 $self->relations->add_type( %args );
251 sub get_relationship_type {
252 my( $self, $name ) = @_;
253 return $self->relations->has_type( $name )
254 ? $self->relations->type( $name ) : undef;
257 ### Reading construct/destruct functions
260 my( $self, $reading ) = @_;
261 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
262 my %args = %$reading;
263 if( $args{'init'} ) {
264 # If we are initializing an empty collation, don't assume that we
265 # have set a tradition.
266 delete $args{'init'};
267 } elsif( $self->tradition->can('language') && $self->tradition->has_language
268 && !exists $args{'language'} ) {
269 $args{'language'} = $self->tradition->language;
271 $reading = Text::Tradition::Collation::Reading->new(
272 'collation' => $self,
275 # First check to see if a reading with this ID exists.
276 if( $self->reading( $reading->id ) ) {
277 throw( "Collation already has a reading with id " . $reading->id );
279 $self->_graphcalc_done(0);
280 $self->_add_reading( $reading->id => $reading );
281 # Once the reading has been added, put it in both graphs.
282 $self->sequence->add_vertex( $reading->id );
283 $self->relations->add_reading( $reading->id );
287 around del_reading => sub {
292 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
295 # Remove the reading from the graphs.
296 $self->_graphcalc_done(0);
297 $self->_clear_cache; # Explicitly clear caches to GC the reading
298 $self->sequence->delete_vertex( $arg );
299 $self->relations->delete_reading( $arg );
302 $self->$orig( $arg );
305 =head2 merge_readings( $main, $second, $concatenate, $with_str )
307 Merges the $second reading into the $main one. If $concatenate is true, then
308 the merged node will carry the text of both readings, concatenated with either
309 $with_str (if specified) or a sensible default (the empty string if the
310 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
312 The first two arguments may be either readings or reading IDs.
319 my $cxfile = 't/data/Collatex-16.xml';
320 my $t = Text::Tradition->new(
322 'input' => 'CollateX',
325 my $c = $t->collation;
327 my $rno = scalar $c->readings;
328 # Split n21 ('unto') for testing purposes
329 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
330 my $old_r = $c->reading( 'n21' );
331 $old_r->alter_text( 'to' );
332 $c->del_path( 'n20', 'n21', 'A' );
333 $c->add_path( 'n20', 'n21p0', 'A' );
334 $c->add_path( 'n21p0', 'n21', 'A' );
335 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
337 ok( $c->reading( 'n21p0' ), "New reading exists" );
338 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
340 # Combine n3 and n4 ( with his )
341 $c->merge_readings( 'n3', 'n4', 1 );
342 ok( !$c->reading('n4'), "Reading n4 is gone" );
343 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
345 # Collapse n9 and n10 ( rood / root )
346 $c->merge_readings( 'n9', 'n10' );
347 ok( !$c->reading('n10'), "Reading n10 is gone" );
348 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
350 # Try to combine n21 and n21p0. This should break.
351 my $remaining = $c->reading('n21');
352 $remaining ||= $c->reading('n22'); # one of these should still exist
354 $c->merge_readings( 'n21p0', $remaining, 1 );
355 ok( 0, "Bad reading merge changed the graph" );
356 } catch( Text::Tradition::Error $e ) {
357 like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
359 ok( 0, "Unexpected error on bad reading merge: $@" );
363 $c->calculate_ranks();
364 ok( 1, "Graph is still evidently whole" );
365 } catch( Text::Tradition::Error $e ) {
366 ok( 0, "Caught a rank exception: " . $e->message );
377 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
378 my $mergemeta = $kept_obj->is_meta;
379 throw( "Cannot merge meta and non-meta reading" )
380 unless ( $mergemeta && $del_obj->is_meta )
381 || ( !$mergemeta && !$del_obj->is_meta );
383 throw( "Cannot merge with start or end node" )
384 if( $kept_obj eq $self->start || $kept_obj eq $self->end
385 || $del_obj eq $self->start || $del_obj eq $self->end );
386 throw( "Cannot combine text of meta readings" ) if $combine;
388 # We can only merge readings in a linear graph if:
389 # - they are contiguous with only one edge between them, OR
390 # - they are at equivalent ranks in the graph.
391 if( $self->linear ) {
392 my @delpred = $del_obj->predecessors;
393 my @keptsuc = $kept_obj->successors;
394 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
395 && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
396 my( $is_ok, $msg ) = $self->relations->relationship_valid(
397 $kept_obj, $del_obj, 'collated' );
399 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
404 # We only need the IDs for adding paths to the graph, not the reading
405 # objects themselves.
406 my $kept = $kept_obj->id;
407 my $deleted = $del_obj->id;
408 $self->_graphcalc_done(0);
410 # The kept reading should inherit the paths and the relationships
411 # of the deleted reading.
412 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
413 my @vector = ( $kept );
414 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
415 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
416 next if $vector[0] eq $vector[1]; # Don't add a self loop
417 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
418 $self->sequence->add_edge( @vector );
419 my $fwits = $self->sequence->get_edge_attributes( @vector );
420 @wits{keys %$fwits} = values %$fwits;
421 $self->sequence->set_edge_attributes( @vector, \%wits );
423 $self->relations->merge_readings( $kept, $deleted, $combine );
425 # Do the deletion deed.
427 # Combine the text of the readings
428 my $joinstr = $combine_char;
429 unless( defined $joinstr ) {
430 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
431 $joinstr = $self->wordsep unless defined $joinstr;
433 $kept_obj->_combine( $del_obj, $joinstr );
435 $self->del_reading( $deleted );
438 =head2 merge_related( @relationship_types )
440 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
442 WARNING: This operation cannot be undone.
454 $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
455 } [qr/Cannot set relationship on a meta reading/],
456 "Got expected relationship drop warning on parse";
458 my $c = $t->collation;
461 map { $rdg_ids{$_} = 1 } $c->readings;
462 $c->merge_related( 'orthographic' );
463 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
464 "Successfully collapsed orthographic variation" );
465 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
466 foreach my $rid ( keys %rdg_ids ) {
467 my $exp = $rdg_ids{$rid};
468 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
469 ( $exp ? "retained" : "removed" ) );
471 ok( $c->linear, "Graph is still linear" );
473 $c->calculate_ranks; # This should succeed
474 ok( 1, "Can still calculate ranks on the new graph" );
476 ok( 0, "Rank calculation on merged graph failed: $@" );
479 # Now add some transpositions
480 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
481 $c->merge_related( 'transposition' );
482 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
483 "Transposed relationship is merged away" );
484 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
485 ok( !$c->linear, "Graph is no longer linear" );
487 $c->calculate_ranks; # This should fail
488 ok( 0, "Rank calculation happened on nonlinear graph?!" );
489 } catch ( Text::Tradition::Error $e ) {
490 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
491 "Rank calculation on merged graph threw an error" );
498 # TODO: there should be a way to display merged without affecting the underlying data!
503 map { $reltypehash{$_} = 1 } @_;
505 # Set up the filter for finding related readings
507 exists $reltypehash{$_[0]->type};
510 # Go through all readings looking for related ones
511 foreach my $r ( $self->readings ) {
512 next unless $self->reading( "$r" ); # might have been deleted meanwhile
513 while( my @related = $self->related_readings( $r, $filter ) ) {
514 push( @related, $r );
516 scalar $b->witnesses <=> scalar $a->witnesses
518 my $keep = shift @related;
519 foreach my $delr ( @related ) {
521 unless( $self->get_relationship( $keep, $delr )->colocated );
522 $self->merge_readings( $keep, $delr );
528 =head2 compress_readings
530 Where possible in the graph, compresses plain sequences of readings into a
531 single reading. The sequences must consist of readings with no
532 relationships to other readings, with only a single witness path between
533 them and no other witness paths from either that would skip the other. The
534 readings must also not be marked as nonsense or bad grammar.
536 WARNING: This operation cannot be undone.
540 sub compress_readings {
542 # Anywhere in the graph that there is a reading that joins only to a single
543 # successor, and neither of these have any relationships, just join the two
545 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
546 # Now look for readings that can be joined to their successors.
547 next unless $rdg->is_combinable;
549 while( $self->sequence->successors( $rdg ) == 1 ) {
550 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
551 throw( "Infinite loop" ) if $seen{$next->id};
552 $seen{$next->id} = 1;
553 last if $self->sequence->predecessors( $next ) > 1;
554 last unless $next->is_combinable;
555 say "Joining readings $rdg and $next";
556 $self->merge_readings( $rdg, $next, 1 );
559 # Make sure we haven't screwed anything up
560 foreach my $wit ( $self->tradition->witnesses ) {
561 my $pathtext = $self->path_text( $wit->sigil );
562 my $origtext = join( ' ', @{$wit->text} );
563 throw( "Text differs for witness " . $wit->sigil )
564 unless $pathtext eq $origtext;
565 if( $wit->is_layered ) {
566 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
567 $origtext = join( ' ', @{$wit->layertext} );
568 throw( "Ante-corr text differs for witness " . $wit->sigil )
569 unless $pathtext eq $origtext;
573 $self->relations->rebuild_equivalence();
574 $self->calculate_ranks();
577 # Helper function for manipulating the graph.
578 sub _stringify_args {
579 my( $self, $first, $second, @args ) = @_;
581 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
582 $second = $second->id
583 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
584 return( $first, $second, @args );
587 # Helper function for manipulating the graph.
588 sub _objectify_args {
589 my( $self, $first, $second, $arg ) = @_;
590 $first = $self->reading( $first )
591 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
592 $second = $self->reading( $second )
593 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
594 return( $first, $second, $arg );
597 =head2 duplicate_reading( $reading, @witlist )
599 Split the given reading into two, so that the new reading is in the path for
600 the witnesses given in @witlist. If the result is that certain non-colocated
601 relationships (e.g. transpositions) are no longer valid, these will be removed.
602 Returns the newly-created reading.
606 use Test::More::UTF8;
610 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
611 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
612 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
614 my $sc = $st->collation;
616 ok( $sc->reading('n131'), "Tradition has the affected reading" );
617 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
618 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
620 # Detach the erroneously collated reading
621 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
622 ok( $newr, "New reading was created" );
623 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
624 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
625 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
626 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
627 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
629 # Check that the bad transposition is gone
630 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
631 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
633 # The collation should not be fixed
634 my @pairs = $sc->identical_readings();
635 is( scalar @pairs, 0, "Not re-collated yet" );
637 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
638 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
639 is( scalar @pairs, 3, "Found three more identical readings" );
640 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
641 $sc->flatten_ranks();
642 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
644 # Check that we can't "duplicate" a reading with no wits or with all wits
646 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
647 ok( 0, "Reading duplication without witnesses throws an error" );
648 } catch( Text::Tradition::Error $e ) {
649 like( $e->message, qr/Must specify one or more witnesses/,
650 "Reading duplication without witnesses throws the expected error" );
652 ok( 0, "Reading duplication without witnesses threw the wrong error" );
656 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
657 ok( 0, "Reading duplication with all witnesses throws an error" );
658 } catch( Text::Tradition::Error $e ) {
659 like( $e->message, qr/Cannot join all witnesses/,
660 "Reading duplication with all witnesses throws the expected error" );
662 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
666 $sc->calculate_ranks();
667 ok( 1, "Graph is still evidently whole" );
668 } catch( Text::Tradition::Error $e ) {
669 ok( 0, "Caught a rank exception: " . $e->message );
676 sub duplicate_reading {
677 my( $self, $r, @wits ) = @_;
678 # Check that we are not doing anything unwise.
679 throw( "Must specify one or more witnesses for the duplicated reading" )
681 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
682 $r = $self->reading( $r );
684 throw( "Cannot duplicate a meta-reading" )
686 throw( "Cannot join all witnesses to the new reading" )
687 if scalar( @wits ) == scalar( $r->witnesses );
689 # Get all the reading attributes and duplicate them.
690 my $rmeta = Text::Tradition::Collation::Reading->meta;
692 foreach my $attr( $rmeta->get_all_attributes ) {
693 next if $attr->name =~ /^_/;
694 my $acc = $attr->get_read_method;
695 if( !$acc && $attr->has_applied_traits ) {
696 my $tr = $attr->applied_traits;
697 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
699 my %methods = reverse %{$attr->handles};
700 $acc = $methods{elements};
701 $args{$attr->name} = $which eq 'Array'
702 ? [ $r->$acc ] : { $r->$acc };
705 $args{$attr->name} = $r->$acc if $acc;
708 # By definition the new reading will no longer be common.
709 $args{is_common} = 0;
710 # The new reading also needs its own ID.
711 $args{id} = $self->_generate_dup_id( $r->id );
713 # Try to make the new reading.
714 my $newr = $self->add_reading( \%args );
715 # The old reading is also no longer common.
718 # For each of the witnesses, dissociate from the old reading and
719 # associate with the new.
720 foreach my $wit ( @wits ) {
721 my $prior = $self->prior_reading( $r, $wit );
722 my $next = $self->next_reading( $r, $wit );
723 $self->del_path( $prior, $r, $wit );
724 $self->add_path( $prior, $newr, $wit );
725 $self->del_path( $r, $next, $wit );
726 $self->add_path( $newr, $next, $wit );
729 # If the graph is ranked, we need to look for relationships that are now
730 # invalid (i.e. 'non-colocation' types that might now be colocated) and
731 # remove them. If not, we can skip it.
734 my @deleted_relations;
735 if( $self->end->has_rank ) {
736 # Find the point where we can stop checking
737 $succ = $self->common_successor( $r, $newr );
739 # Hash the existing ranks
740 foreach my $rdg ( $self->readings ) {
741 $rrk{$rdg->id} = $rdg->rank;
743 # Calculate the new ranks
744 $self->calculate_ranks();
746 # Check for invalid non-colocated relationships among changed-rank readings
747 # from where the ranks start changing up to $succ
748 my $lastrank = $succ->rank;
749 foreach my $rdg ( $self->readings ) {
750 next if $rdg->rank > $lastrank;
751 next if $rdg->rank == $rrk{$rdg->id};
752 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
753 next unless @noncolo;
754 foreach my $nc ( @noncolo ) {
755 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
756 push( @deleted_relations, [ $rdg->id, $nc->id ] );
761 return ( $newr, @deleted_relations );
764 sub _generate_dup_id {
765 my( $self, $rid ) = @_;
770 if( $self->has_reading( $newid ) ) {
783 # We only need the IDs for adding paths to the graph, not the reading
784 # objects themselves.
785 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
787 $self->_graphcalc_done(0);
788 # Connect the readings
789 unless( $self->sequence->has_edge( $source, $target ) ) {
790 $self->sequence->add_edge( $source, $target );
791 $self->relations->add_equivalence_edge( $source, $target );
793 # Note the witness in question
794 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
800 if( ref( $_[0] ) eq 'ARRAY' ) {
807 # We only need the IDs for removing paths from the graph, not the reading
808 # objects themselves.
809 my( $source, $target, $wit ) = $self->_stringify_args( @args );
811 $self->_graphcalc_done(0);
812 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
813 $self->sequence->delete_edge_attribute( $source, $target, $wit );
815 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
816 $self->sequence->delete_edge( $source, $target );
817 $self->relations->delete_equivalence_edge( $source, $target );
822 # Extra graph-alike utility
825 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
826 return undef unless $self->sequence->has_edge( $source, $target );
827 return $self->sequence->has_edge_attribute( $source, $target, $wit );
830 =head2 clear_witness( @sigil_list )
832 Clear the given witnesses out of the collation entirely, removing references
833 to them in paths, and removing readings that belong only to them. Should only
834 be called via $tradition->del_witness.
839 my( $self, @sigils ) = @_;
841 $self->_graphcalc_done(0);
842 # Clear the witness(es) out of the paths
843 foreach my $e ( $self->paths ) {
844 foreach my $sig ( @sigils ) {
845 $self->del_path( $e, $sig );
849 # Clear out the newly unused readings
850 foreach my $r ( $self->readings ) {
851 unless( $self->reading_witnesses( $r ) ) {
852 $self->del_reading( $r );
857 sub add_relationship {
859 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
860 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
861 foreach my $v ( @vectors ) {
862 next unless $self->get_relationship( $v )->colocated;
863 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
864 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
865 $self->_graphcalc_done(0);
873 around qw/ get_relationship del_relationship / => sub {
877 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
880 my @stringargs = $self->_stringify_args( @args );
881 $self->$orig( @stringargs );
884 =head2 reading_witnesses( $reading )
886 Return a list of sigils corresponding to the witnesses in which the reading appears.
890 sub reading_witnesses {
891 my( $self, $reading ) = @_;
892 # We need only check either the incoming or the outgoing edges; I have
893 # arbitrarily chosen "incoming". Thus, special-case the start node.
894 if( $reading eq $self->start ) {
895 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
898 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
899 my $wits = $self->sequence->get_edge_attributes( @$e );
900 @all_witnesses{ keys %$wits } = 1;
902 my $acstr = $self->ac_label;
903 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
904 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
906 return keys %all_witnesses;
909 =head1 OUTPUT METHODS
911 =head2 as_svg( \%options )
913 Returns an SVG string that represents the graph, via as_dot and graphviz.
914 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
919 my( $self, $opts ) = @_;
920 throw( "Need GraphViz installed to output SVG" )
921 unless File::Which::which( 'dot' );
922 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
923 $self->calculate_ranks()
924 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
925 my @cmd = qw/dot -Tsvg/;
927 my $dotfile = File::Temp->new();
929 # $dotfile->unlink_on_destroy(0);
930 binmode $dotfile, ':utf8';
931 print $dotfile $self->as_dot( $opts );
932 push( @cmd, $dotfile->filename );
933 run( \@cmd, ">", binary(), \$svg );
934 $svg = decode_utf8( $svg );
939 =head2 as_dot( \%options )
941 Returns a string that is the collation graph expressed in dot
942 (i.e. GraphViz) format. Options include:
957 my( $self, $opts ) = @_;
958 my $startrank = $opts->{'from'} if $opts;
959 my $endrank = $opts->{'to'} if $opts;
960 my $color_common = $opts->{'color_common'} if $opts;
961 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
962 && $self->end->rank > 100;
963 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
965 # Check the arguments
967 return if $endrank && $startrank > $endrank;
968 return if $startrank > $self->end->rank;
970 if( defined $endrank ) {
971 return if $endrank < 0;
972 $endrank = undef if $endrank == $self->end->rank;
975 my $graph_name = $self->tradition->name;
976 $graph_name =~ s/[^\w\s]//g;
977 $graph_name = join( '_', split( /\s+/, $graph_name ) );
985 'fillcolor' => 'white',
990 'arrowhead' => 'open',
991 'color' => '#000000',
992 'fontcolor' => '#000000',
995 my $dot = sprintf( "digraph %s {\n", $graph_name );
996 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
997 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
999 # Output substitute start/end readings if necessary
1001 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1004 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1006 if( $STRAIGHTENHACK ) {
1008 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1009 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1010 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1012 my %used; # Keep track of the readings that actually appear in the graph
1013 # Sort the readings by rank if we have ranks; this speeds layout.
1014 my @all_readings = $self->end->has_rank
1015 ? sort { $a->rank <=> $b->rank } $self->readings
1017 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1018 foreach my $reading ( @all_readings ) {
1019 # Only output readings within our rank range.
1020 next if $startrank && $reading->rank < $startrank;
1021 next if $endrank && $reading->rank > $endrank;
1022 $used{$reading->id} = 1;
1023 # Need not output nodes without separate labels
1024 next if $reading->id eq $reading->text;
1026 my $label = $reading->text;
1027 unless( $label =~ /^[[:punct:]]+$/ ) {
1028 $label .= '-' if $reading->join_next;
1029 $label = "-$label" if $reading->join_prior;
1031 $label =~ s/\"/\\\"/g;
1032 $rattrs->{'label'} = $label;
1033 $rattrs->{'id'} = $reading->id;
1034 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1035 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1038 # Add the real edges. Need to weight one edge per rank jump, in a
1040 # my $weighted = $self->_add_edge_weights;
1041 my @edges = $self->paths;
1042 my( %substart, %subend );
1043 foreach my $edge ( @edges ) {
1044 # Do we need to output this edge?
1045 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1046 my $label = $self->_path_display_label( $opts,
1047 $self->path_witnesses( $edge ) );
1048 my $variables = { %edge_attrs, 'label' => $label };
1050 # Account for the rank gap if necessary
1051 my $rank0 = $self->reading( $edge->[0] )->rank
1052 if $self->reading( $edge->[0] )->has_rank;
1053 my $rank1 = $self->reading( $edge->[1] )->rank
1054 if $self->reading( $edge->[1] )->has_rank;
1055 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1056 $variables->{'minlen'} = $rank1 - $rank0;
1059 # Add the calculated edge weights
1060 # if( exists $weighted->{$edge->[0]}
1061 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
1062 # # $variables->{'color'} = 'red';
1063 # $variables->{'weight'} = 3.0;
1066 # EXPERIMENTAL: make edge width reflect no. of witnesses
1067 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1068 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1070 my $varopts = _dot_attr_string( $variables );
1071 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1072 $edge->[0], $edge->[1], $varopts );
1073 } elsif( $used{$edge->[0]} ) {
1074 $subend{$edge->[0]} = $edge->[1];
1075 } elsif( $used{$edge->[1]} ) {
1076 $substart{$edge->[1]} = $edge->[0];
1080 # If we are asked to, add relationship links
1081 if( exists $opts->{show_relations} ) {
1082 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1083 if( $filter eq 'transposition' ) {
1084 $filter =~ qr/^transposition$/;
1087 my @types = sort( map { $_->name } $self->relations->types );
1088 if( exists $opts->{graphcolors} ) {
1089 foreach my $tdx ( 0 .. $#types ) {
1090 $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1093 map { $typecolors{$_} = '#FFA14F' } @types;
1095 foreach my $redge ( $self->relationships ) {
1096 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1097 my $rel = $self->get_relationship( $redge );
1098 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1100 arrowhead => 'none',
1101 color => $typecolors{$rel->type},
1102 constraint => 'false',
1105 unless( exists $opts->{graphcolors} ) {
1106 $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1108 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1109 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1114 # Add substitute start and end edges if necessary
1115 foreach my $node ( keys %substart ) {
1116 my $witstr = $self->_path_display_label( $opts,
1117 $self->path_witnesses( $substart{$node}, $node ) );
1118 my $variables = { %edge_attrs, 'label' => $witstr };
1119 my $nrdg = $self->reading( $node );
1120 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1121 # Substart is actually one lower than $startrank
1122 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1124 my $varopts = _dot_attr_string( $variables );
1125 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1127 foreach my $node ( keys %subend ) {
1128 my $witstr = $self->_path_display_label( $opts,
1129 $self->path_witnesses( $node, $subend{$node} ) );
1130 my $variables = { %edge_attrs, 'label' => $witstr };
1131 my $varopts = _dot_attr_string( $variables );
1132 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1135 if( $STRAIGHTENHACK ) {
1136 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1137 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1144 sub _dot_attr_string {
1147 foreach my $k ( sort keys %$hash ) {
1148 my $v = $hash->{$k};
1149 push( @attrs, $k.'="'.$v.'"' );
1151 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1154 sub _add_edge_weights {
1156 # Walk the graph from START to END, choosing the successor node with
1157 # the largest number of witness paths each time.
1159 my $curr = $self->start->id;
1160 my $ranked = $self->end->has_rank;
1161 while( $curr ne $self->end->id ) {
1162 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
1163 my @succ = sort { $self->path_witnesses( $curr, $a )
1164 <=> $self->path_witnesses( $curr, $b ) }
1165 $self->sequence->successors( $curr );
1166 my $next = pop @succ;
1167 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
1168 # Try to avoid lacunae in the weighted path.
1170 ( $self->reading( $next )->is_lacuna ||
1171 $nextrank - $rank > 1 ) ){
1174 $weighted->{$curr} = $next;
1180 =head2 path_witnesses( $edge )
1182 Returns the list of sigils whose witnesses are associated with the given edge.
1183 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1187 sub path_witnesses {
1188 my( $self, @edge ) = @_;
1189 # If edge is an arrayref, cope.
1190 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1191 my $e = shift @edge;
1194 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1198 # Helper function. Make a display label for the given witnesses, showing a.c.
1199 # witnesses only where the main witness is not also in the list.
1200 sub _path_display_label {
1204 map { $wits{$_} = 1 } @_;
1206 # If an a.c. wit is listed, remove it if the main wit is also listed.
1207 # Otherwise keep it for explicit listing.
1208 my $aclabel = $self->ac_label;
1210 foreach my $w ( sort keys %wits ) {
1211 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1212 if( exists $wits{$1} ) {
1215 push( @disp_ac, $w );
1220 if( $opts->{'explicit_wits'} ) {
1221 return join( ', ', sort keys %wits );
1223 # See if we are in a majority situation.
1224 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1225 $maj = $maj > 5 ? $maj : 5;
1226 if( scalar keys %wits > $maj ) {
1227 unshift( @disp_ac, 'majority' );
1228 return join( ', ', @disp_ac );
1230 return join( ', ', sort keys %wits );
1235 =head2 readings_at_rank( $rank )
1237 Returns a list of readings at a given rank, taken from the alignment table.
1241 sub readings_at_rank {
1242 my( $self, $rank ) = @_;
1243 my $table = $self->alignment_table;
1244 # Table rank is real rank - 1.
1245 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1247 foreach my $e ( @elements ) {
1248 next unless ref( $e ) eq 'HASH';
1249 next unless exists $e->{'t'};
1250 $readings{$e->{'t'}->id} = $e->{'t'};
1252 return values %readings;
1257 Returns a GraphML representation of the collation. The GraphML will contain
1258 two graphs. The first expresses the attributes of the readings and the witness
1259 paths that link them; the second expresses the relationships that link the
1260 readings. This is the native transfer format for a tradition.
1264 use Text::Tradition;
1270 my $datafile = 't/data/florilegium_tei_ps.xml';
1271 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1273 'file' => $datafile,
1276 ok( $tradition, "Got a tradition object" );
1277 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1278 ok( $tradition->collation, "Tradition has a collation" );
1280 my $c = $tradition->collation;
1281 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1282 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1283 is( scalar $c->relationships, 0, "Collation has all relationships" );
1285 # Add a few relationships
1286 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1287 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1288 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1289 'is_significant' => 'yes' } );
1291 # Now write it to GraphML and parse it again.
1293 my $graphml = $c->as_graphml;
1294 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1295 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1296 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1297 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1298 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1299 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1301 # Now add a stemma, write to GraphML, and look at the output.
1303 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1304 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1305 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1306 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1307 $graphml = $c->as_graphml;
1308 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1315 ## TODO MOVE this to Tradition.pm and modularize it better
1317 my( $self, $options ) = @_;
1318 $self->calculate_ranks unless $self->_graphcalc_done;
1320 my $start = $options->{'from'}
1321 ? $self->reading( $options->{'from'} ) : $self->start;
1322 my $end = $options->{'to'}
1323 ? $self->reading( $options->{'to'} ) : $self->end;
1324 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1325 throw( 'Start node must be before end node' );
1327 # The readings need to be ranked for this to work.
1328 $start = $self->start unless $start->has_rank;
1329 $end = $self->end unless $end->has_rank;
1331 unless( $start eq $self->start ) {
1332 $rankoffset = $start->rank - 1;
1337 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1338 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1339 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1340 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1342 # Create the document and root node
1343 require XML::LibXML;
1344 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1345 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1346 $graphml->setDocumentElement( $root );
1347 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1348 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1350 # List of attribute types to save on our objects and their corresponding
1355 'Bool' => 'boolean',
1356 'ReadingID' => 'string',
1357 'RelationshipType' => 'string',
1358 'RelationshipScope' => 'string',
1359 'Ternary' => 'string',
1362 # Add the data keys for the graph. Include an extra key 'version' for the
1363 # GraphML output version.
1364 my %graph_data_keys;
1366 my %graph_attributes = ( 'version' => 'string' );
1367 # Graph attributes include those of Tradition and those of Collation.
1369 # TODO Use meta introspection method from duplicate_reading to do this
1370 # instead of naming custom keys.
1371 my $tmeta = $self->tradition->meta;
1372 my $cmeta = $self->meta;
1373 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1374 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1375 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1376 next if $attr->name =~ /^_/;
1377 next unless $save_types{$attr->type_constraint->name};
1378 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1380 # Extra custom keys for complex objects that should be saved in some form.
1381 # The subroutine should return a string, or undef/empty.
1382 if( $tmeta->has_method('stemmata') ) {
1383 $graph_attributes{'stemmata'} = sub {
1385 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1386 $self->tradition->stemmata;
1387 join( "\n", @stemstrs );
1391 if( $tmeta->has_method('user') ) {
1392 $graph_attributes{'user'} = sub {
1393 $self->tradition->user ? $self->tradition->user->id : undef
1397 foreach my $datum ( sort keys %graph_attributes ) {
1398 $graph_data_keys{$datum} = 'dg'.$gdi++;
1399 my $key = $root->addNewChild( $graphml_ns, 'key' );
1400 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1401 : $graph_attributes{$datum};
1402 $key->setAttribute( 'attr.name', $datum );
1403 $key->setAttribute( 'attr.type', $dtype );
1404 $key->setAttribute( 'for', 'graph' );
1405 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1408 # Add the data keys for reading nodes
1409 my %reading_attributes;
1410 my $rmeta = Text::Tradition::Collation::Reading->meta;
1411 foreach my $attr( $rmeta->get_all_attributes ) {
1412 next if $attr->name =~ /^_/;
1413 next unless $save_types{$attr->type_constraint->name};
1414 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1416 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1417 # Extra custom key for the reading morphology
1418 $reading_attributes{'lexemes'} = 'string';
1423 foreach my $datum ( sort keys %reading_attributes ) {
1424 $node_data_keys{$datum} = 'dn'.$ndi++;
1425 my $key = $root->addNewChild( $graphml_ns, 'key' );
1426 $key->setAttribute( 'attr.name', $datum );
1427 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1428 $key->setAttribute( 'for', 'node' );
1429 $key->setAttribute( 'id', $node_data_keys{$datum} );
1432 # Add the data keys for edges, that is, paths and relationships. Path
1433 # data does not come from a Moose class so is here manually.
1436 my %edge_attributes = (
1437 witness => 'string', # ID/label for a path
1438 extra => 'boolean', # Path key
1440 my @path_attributes = keys %edge_attributes; # track our manual additions
1441 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1442 foreach my $attr( $pmeta->get_all_attributes ) {
1443 next if $attr->name =~ /^_/;
1444 next unless $save_types{$attr->type_constraint->name};
1445 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1447 foreach my $datum ( sort keys %edge_attributes ) {
1448 $edge_data_keys{$datum} = 'de'.$edi++;
1449 my $key = $root->addNewChild( $graphml_ns, 'key' );
1450 $key->setAttribute( 'attr.name', $datum );
1451 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1452 $key->setAttribute( 'for', 'edge' );
1453 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1456 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1457 my $xmlidname = $self->tradition->name;
1458 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1459 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1460 $xmlidname = '_'.$xmlidname;
1462 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1463 $sgraph->setAttribute( 'edgedefault', 'directed' );
1464 $sgraph->setAttribute( 'id', $xmlidname );
1465 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1466 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1467 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1468 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1469 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1471 # Tradition/collation attribute data
1472 foreach my $datum ( keys %graph_attributes ) {
1474 if( $datum eq 'version' ) {
1476 } elsif( ref( $graph_attributes{$datum} ) ) {
1477 my $sub = $graph_attributes{$datum};
1479 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1480 $value = $self->tradition->$datum;
1482 $value = $self->$datum;
1484 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1489 # Add our readings to the graph
1490 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1491 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1492 ( $n->rank < $start->rank || $n->rank > $end->rank );
1493 $use_readings{$n->id} = 1;
1494 # Add to the main graph
1495 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1496 my $node_xmlid = 'n' . $node_ctr++;
1497 $node_hash{ $n->id } = $node_xmlid;
1498 $node_el->setAttribute( 'id', $node_xmlid );
1499 foreach my $d ( keys %reading_attributes ) {
1501 # Custom serialization
1502 if( $d eq 'lexemes' ) {
1503 # If nval is a true value, we have lexemes so we need to
1504 # serialize them. Otherwise set nval to undef so that the
1505 # key is excluded from this reading.
1506 $nval = $nval ? $n->_serialize_lexemes : undef;
1507 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1510 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1511 # Adjust the ranks within the subgraph.
1512 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1513 : $nval - $rankoffset;
1515 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1520 # Add the path edges to the sequence graph
1522 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1523 # We add an edge in the graphml for every witness in $e.
1524 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1525 my @edge_wits = sort $self->path_witnesses( $e );
1526 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1527 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1528 # Skip any path from start to end; that witness is not in the subgraph.
1529 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1530 foreach my $wit ( @edge_wits ) {
1531 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1532 $node_hash{ $e->[0] },
1533 $node_hash{ $e->[1] } );
1534 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1535 $edge_el->setAttribute( 'source', $from );
1536 $edge_el->setAttribute( 'target', $to );
1537 $edge_el->setAttribute( 'id', $id );
1539 # It's a witness path, so add the witness
1541 my $key = $edge_data_keys{'witness'};
1542 # Is this an ante-corr witness?
1543 my $aclabel = $self->ac_label;
1544 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1545 # Keep the base witness
1547 # ...and record that this is an 'extra' reading path
1548 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1550 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1554 # Report the actual number of nodes and edges that went in
1555 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1556 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1558 # Add the relationship graph to the XML
1559 map { delete $edge_data_keys{$_} } @path_attributes;
1560 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1561 $node_data_keys{'id'}, \%edge_data_keys );
1563 # Save and return the thing
1564 my $result = decode_utf8( $graphml->toString(1) );
1568 sub _add_graphml_data {
1569 my( $el, $key, $value ) = @_;
1570 return unless defined $value;
1571 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1572 $data_el->setAttribute( 'key', $key );
1573 $data_el->appendText( $value );
1578 Returns a CSV alignment table representation of the collation graph, one
1579 row per witness (or witness uncorrected.)
1583 Returns a tab-separated alignment table representation of the collation graph,
1584 one row per witness (or witness uncorrected.)
1588 use Text::Tradition;
1596 my $datafile = 't/data/florilegium_tei_ps.xml';
1597 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1599 'file' => $datafile,
1602 my $c = $tradition->collation;
1603 # Export the thing to CSV
1604 my $csvstr = $c->as_csv();
1606 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1607 my @lines = split(/\n/, $csvstr );
1608 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1609 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1610 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1611 ok( @q_ac, "Found a layered witness" );
1613 my $t2 = Text::Tradition->new( input => 'Tabular',
1617 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1618 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1620 # Now do it with TSV
1621 my $tsvstr = $c->as_tsv();
1622 my $t3 = Text::Tradition->new( input => 'Tabular',
1626 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1627 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1629 my $table = $c->alignment_table;
1630 my $noaccsv = $c->as_csv({ noac => 1 });
1631 my @noaclines = split(/\n/, $noaccsv );
1632 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1633 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1634 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1636 my $safecsv = $c->as_csv({ safe_ac => 1});
1637 my @safelines = split(/\n/, $safecsv );
1638 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1639 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1640 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1641 ok( @q_ac, "Found a sanitized layered witness" );
1642 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1649 my( $self, $opts ) = @_;
1650 my $table = $self->alignment_table( $opts );
1651 my $csv_options = { binary => 1, quote_null => 0 };
1652 $csv_options->{'sep_char'} = $opts->{fieldsep};
1653 if( $opts->{fieldsep} eq "\t" ) {
1654 # If it is really tab separated, nothing is an escape char.
1655 $csv_options->{'quote_char'} = undef;
1656 $csv_options->{'escape_char'} = '';
1658 my $csv = Text::CSV->new( $csv_options );
1660 # Make the header row
1661 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1662 push( @result, $csv->string );
1663 # Make the rest of the rows
1664 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1665 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1666 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1667 $csv->combine( @row );
1668 push( @result, $csv->string );
1670 return join( "\n", @result );
1675 my $opts = shift || {};
1676 $opts->{fieldsep} = ',';
1677 return $self->_tabular( $opts );
1682 my $opts = shift || {};
1683 $opts->{fieldsep} = "\t";
1684 return $self->_tabular( $opts );
1687 =head2 alignment_table
1689 Return a reference to an alignment table, in a slightly enhanced CollateX
1690 format which looks like this:
1692 $table = { alignment => [ { witness => "SIGIL",
1693 tokens => [ { t => "TEXT" }, ... ] },
1694 { witness => "SIG2",
1695 tokens => [ { t => "TEXT" }, ... ] },
1697 length => TEXTLEN };
1701 sub alignment_table {
1702 my( $self, $opts ) = @_;
1703 if( $self->has_cached_table ) {
1704 return $self->cached_table
1705 unless $opts->{noac} || $opts->{safe_ac};
1708 # Make sure we can do this
1709 throw( "Need a linear graph in order to make an alignment table" )
1710 unless $self->linear;
1711 $self->calculate_ranks()
1712 unless $self->_graphcalc_done && $self->end->has_rank;
1714 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1715 my @all_pos = ( 1 .. $self->end->rank - 1 );
1716 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1717 # say STDERR "Making witness row(s) for " . $wit->sigil;
1718 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1719 my @row = _make_witness_row( \@wit_path, \@all_pos );
1720 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1721 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1722 push( @{$table->{'alignment'}}, $witobj );
1723 if( $wit->is_layered && !$opts->{noac} ) {
1724 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1725 $wit->sigil.$self->ac_label );
1726 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1727 my $witlabel = $opts->{safe_ac}
1728 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1729 my $witacobj = { 'witness' => $witlabel,
1730 'tokens' => \@ac_row };
1731 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1732 push( @{$table->{'alignment'}}, $witacobj );
1735 unless( $opts->{noac} || $opts->{safe_ac} ) {
1736 $self->cached_table( $table );
1741 sub _make_witness_row {
1742 my( $path, $positions ) = @_;
1744 map { $char_hash{$_} = undef } @$positions;
1746 foreach my $rdg ( @$path ) {
1747 say STDERR "rank " . $rdg->rank if $debug;
1748 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1749 $char_hash{$rdg->rank} = { 't' => $rdg };
1751 my @row = map { $char_hash{$_} } @$positions;
1752 # Fill in lacuna markers for undef spots in the row
1753 my $last_el = shift @row;
1754 my @filled_row = ( $last_el );
1755 foreach my $el ( @row ) {
1756 # If we are using node reference, make the lacuna node appear many times
1757 # in the table. If not, use the lacuna tag.
1758 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1761 push( @filled_row, $el );
1768 =head1 NAVIGATION METHODS
1770 =head2 reading_sequence( $first, $last, $sigil, $backup )
1772 Returns the ordered list of readings, starting with $first and ending
1773 with $last, for the witness given in $sigil. If a $backup sigil is
1774 specified (e.g. when walking a layered witness), it will be used wherever
1775 no $sigil path exists. If there is a base text reading, that will be
1776 used wherever no path exists for $sigil or $backup.
1780 # TODO Think about returning some lazy-eval iterator.
1781 # TODO Get rid of backup; we should know from what witness is whether we need it.
1783 sub reading_sequence {
1784 my( $self, $start, $end, $witness ) = @_;
1786 $witness = $self->baselabel unless $witness;
1787 my @readings = ( $start );
1790 while( $n && $n->id ne $end->id ) {
1791 if( exists( $seen{$n->id} ) ) {
1792 throw( "Detected loop for $witness at " . $n->id );
1796 my $next = $self->next_reading( $n, $witness );
1798 throw( "Did not find any path for $witness from reading " . $n->id );
1800 push( @readings, $next );
1803 # Check that the last reading is our end reading.
1804 my $last = $readings[$#readings];
1805 throw( "Last reading found from " . $start->text .
1806 " for witness $witness is not the end!" ) # TODO do we get this far?
1807 unless $last->id eq $end->id;
1812 =head2 next_reading( $reading, $sigil );
1814 Returns the reading that follows the given reading along the given witness
1820 # Return the successor via the corresponding path.
1822 my $answer = $self->_find_linked_reading( 'next', @_ );
1823 return undef unless $answer;
1824 return $self->reading( $answer );
1827 =head2 prior_reading( $reading, $sigil )
1829 Returns the reading that precedes the given reading along the given witness
1835 # Return the predecessor via the corresponding path.
1837 my $answer = $self->_find_linked_reading( 'prior', @_ );
1838 return $self->reading( $answer );
1841 sub _find_linked_reading {
1842 my( $self, $direction, $node, $path ) = @_;
1844 # Get a backup if we are dealing with a layered witness
1846 my $aclabel = $self->ac_label;
1847 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1851 my @linked_paths = $direction eq 'next'
1852 ? $self->sequence->edges_from( $node )
1853 : $self->sequence->edges_to( $node );
1854 return undef unless scalar( @linked_paths );
1856 # We have to find the linked path that contains all of the
1857 # witnesses supplied in $path.
1858 my( @path_wits, @alt_path_wits );
1859 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1860 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1863 foreach my $le ( @linked_paths ) {
1864 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1867 my @le_wits = sort $self->path_witnesses( $le );
1868 if( _is_within( \@path_wits, \@le_wits ) ) {
1869 # This is the right path.
1870 return $direction eq 'next' ? $le->[1] : $le->[0];
1871 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1875 # Got this far? Return the alternate path if it exists.
1876 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1879 # Got this far? Return the base path if it exists.
1880 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1883 # Got this far? We have no appropriate path.
1884 warn "Could not find $direction node from " . $node->id
1885 . " along path $path";
1891 my( $set1, $set2 ) = @_;
1892 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1893 foreach my $el ( @$set1 ) {
1894 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1899 # Return the string that joins together a list of witnesses for
1900 # display on a single path.
1901 sub _witnesses_of_label {
1902 my( $self, $label ) = @_;
1903 my $regex = $self->wit_list_separator;
1904 my @answer = split( /\Q$regex\E/, $label );
1908 =head2 common_readings
1910 Returns the list of common readings in the graph (i.e. those readings that are
1911 shared by all non-lacunose witnesses.)
1915 sub common_readings {
1917 my @common = grep { $_->is_common } $self->readings;
1921 =head2 path_text( $sigil, [, $start, $end ] )
1923 Returns the text of a witness (plus its backup, if we are using a layer)
1924 as stored in the collation. The text is returned as a string, where the
1925 individual readings are joined with spaces and the meta-readings (e.g.
1926 lacunae) are omitted. Optional specification of $start and $end allows
1927 the generation of a subset of the witness text.
1932 my( $self, $wit, $start, $end ) = @_;
1933 $start = $self->start unless $start;
1934 $end = $self->end unless $end;
1935 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1938 foreach my $r ( @path ) {
1939 unless ( $r->join_prior || !$last || $last->join_next ) {
1942 $pathtext .= $r->text;
1948 =head1 INITIALIZATION METHODS
1950 These are mostly for use by parsers.
1952 =head2 make_witness_path( $witness )
1954 Link the array of readings contained in $witness->path (and in
1955 $witness->uncorrected_path if it exists) into collation paths.
1956 Clear out the arrays when finished.
1958 =head2 make_witness_paths
1960 Call make_witness_path for all witnesses in the tradition.
1964 # For use when a collation is constructed from a base text and an apparatus.
1965 # We have the sequences of readings and just need to add path edges.
1966 # When we are done, clear out the witness path attributes, as they are no
1968 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1970 sub make_witness_paths {
1972 foreach my $wit ( $self->tradition->witnesses ) {
1973 # say STDERR "Making path for " . $wit->sigil;
1974 $self->make_witness_path( $wit );
1978 sub make_witness_path {
1979 my( $self, $wit ) = @_;
1980 my @chain = @{$wit->path};
1981 my $sig = $wit->sigil;
1982 # Add start and end if necessary
1983 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1984 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1985 foreach my $idx ( 0 .. $#chain-1 ) {
1986 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1988 if( $wit->is_layered ) {
1989 @chain = @{$wit->uncorrected_path};
1990 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1991 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1992 foreach my $idx( 0 .. $#chain-1 ) {
1993 my $source = $chain[$idx];
1994 my $target = $chain[$idx+1];
1995 $self->add_path( $source, $target, $sig.$self->ac_label )
1996 unless $self->has_path( $source, $target, $sig );
2000 $wit->clear_uncorrected_path;
2003 =head2 calculate_ranks
2005 Calculate the reading ranks (that is, their aligned positions relative
2006 to each other) for the graph. This can only be called on linear collations.
2010 use Text::Tradition;
2012 my $cxfile = 't/data/Collatex-16.xml';
2013 my $t = Text::Tradition->new(
2015 'input' => 'CollateX',
2018 my $c = $t->collation;
2021 my $table = $c->alignment_table;
2022 ok( $c->has_cached_table, "Alignment table was cached" );
2023 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2024 $c->calculate_ranks;
2025 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2026 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2027 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2028 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2029 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2035 sub calculate_ranks {
2037 # Save the existing ranks, in case we need to invalidate the cached SVG.
2038 throw( "Cannot calculate ranks on a non-linear graph" )
2039 unless $self->linear;
2041 map { $existing_ranks{$_} = $_->rank } $self->readings;
2043 # Do the rankings based on the relationship equivalence graph, starting
2044 # with the start node.
2045 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2047 # Transfer our rankings from the topological graph to the real one.
2048 foreach my $r ( $self->readings ) {
2049 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2050 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2052 # Die. Find the last rank we calculated.
2053 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2054 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2056 my $last = pop @all_defined;
2057 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2060 # Do we need to invalidate the cached data?
2061 if( $self->has_cached_table ) {
2062 foreach my $r ( $self->readings ) {
2063 next if defined( $existing_ranks{$r} )
2064 && $existing_ranks{$r} == $r->rank;
2065 # Something has changed, so clear the cache
2066 $self->_clear_cache;
2067 # ...and recalculate the common readings.
2068 $self->calculate_common_readings();
2072 # The graph calculation information is now up to date.
2073 $self->_graphcalc_done(1);
2078 $self->wipe_table if $self->has_cached_table;
2082 =head2 flatten_ranks
2084 A convenience method for parsing collation data. Searches the graph for readings
2085 with the same text at the same rank, and merges any that are found.
2090 my ( $self, %args ) = shift;
2091 my %unique_rank_rdg;
2093 foreach my $p ( $self->identical_readings( %args ) ) {
2094 # say STDERR "Combining readings at same rank: @$p";
2096 $self->merge_readings( @$p );
2097 # TODO see if this now makes a common point.
2099 # If we merged readings, the ranks are still fine but the alignment
2100 # table is wrong. Wipe it.
2101 $self->wipe_table() if $changed;
2104 =head2 identical_readings
2105 =head2 identical_readings( start => $startnode, end => $endnode )
2106 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2108 Goes through the graph identifying all pairs of readings that appear to be
2109 identical, and therefore able to be merged into a single reading. Returns the
2110 relevant identical pairs. Can be restricted to run over only a part of the
2111 graph, specified either by node or by rank.
2115 sub identical_readings {
2116 my ( $self, %args ) = @_;
2117 # Find where we should start and end.
2118 my $startrank = $args{startrank} || 0;
2119 if( $args{start} ) {
2120 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2121 && $self->reading( $args{start} )->has_rank;
2122 $startrank = $self->reading( $args{start} )->rank;
2124 my $endrank = $args{endrank} || $self->end->rank;
2126 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2127 && $self->reading( $args{end} )->has_rank;
2128 $endrank = $self->reading( $args{end} )->rank;
2131 # Make sure the ranks are correct.
2132 unless( $self->_graphcalc_done ) {
2133 $self->calculate_ranks;
2135 # Go through the readings looking for duplicates.
2136 my %unique_rank_rdg;
2138 foreach my $rdg ( $self->readings ) {
2139 next unless $rdg->has_rank;
2140 my $rk = $rdg->rank;
2141 next if $rk > $endrank || $rk < $startrank;
2142 my $key = $rk . "||" . $rdg->text;
2143 if( exists $unique_rank_rdg{$key} ) {
2144 # Make sure they don't have different grammatical forms
2145 my $ur = $unique_rank_rdg{$key};
2146 if( $rdg->is_identical( $ur ) ) {
2147 push( @pairs, [ $ur, $rdg ] );
2150 $unique_rank_rdg{$key} = $rdg;
2158 =head2 calculate_common_readings
2160 Goes through the graph identifying the readings that appear in every witness
2161 (apart from those with lacunae at that spot.) Marks them as common and returns
2166 use Text::Tradition;
2168 my $cxfile = 't/data/Collatex-16.xml';
2169 my $t = Text::Tradition->new(
2171 'input' => 'CollateX',
2174 my $c = $t->collation;
2176 my @common = $c->calculate_common_readings();
2177 is( scalar @common, 8, "Found correct number of common readings" );
2178 my @marked = sort $c->common_readings();
2179 is( scalar @common, 8, "All common readings got marked as such" );
2180 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2181 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2187 sub calculate_common_readings {
2190 map { $_->is_common( 0 ) } $self->readings;
2191 # Implicitly calls calculate_ranks
2192 my $table = $self->alignment_table;
2193 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2194 my @row = map { $_->{'tokens'}->[$idx]
2195 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2196 @{$table->{'alignment'}};
2198 foreach my $r ( @row ) {
2200 $hash{$r->id} = $r unless $r->is_meta;
2202 $hash{'UNDEF'} = $r;
2205 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2206 my( $r ) = values %hash;
2208 push( @common, $r );
2214 =head2 text_from_paths
2216 Calculate the text array for all witnesses from the path, for later consistency
2217 checking. Only to be used if there is no non-graph-based way to know the
2222 sub text_from_paths {
2224 foreach my $wit ( $self->tradition->witnesses ) {
2225 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2227 foreach my $r ( @readings ) {
2228 next if $r->is_meta;
2229 push( @text, $r->text );
2231 $wit->text( \@text );
2232 if( $wit->is_layered ) {
2233 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2234 $wit->sigil.$self->ac_label );
2236 foreach my $r ( @ucrdgs ) {
2237 next if $r->is_meta;
2238 push( @uctext, $r->text );
2240 $wit->layertext( \@uctext );
2245 =head1 UTILITY FUNCTIONS
2247 =head2 common_predecessor( $reading_a, $reading_b )
2249 Find the last reading that occurs in sequence before both the given readings.
2250 At the very least this should be $self->start.
2252 =head2 common_successor( $reading_a, $reading_b )
2254 Find the first reading that occurs in sequence after both the given readings.
2255 At the very least this should be $self->end.
2259 use Text::Tradition;
2261 my $cxfile = 't/data/Collatex-16.xml';
2262 my $t = Text::Tradition->new(
2264 'input' => 'CollateX',
2267 my $c = $t->collation;
2269 is( $c->common_predecessor( 'n24', 'n23' )->id,
2270 'n20', "Found correct common predecessor" );
2271 is( $c->common_successor( 'n24', 'n23' )->id,
2272 '__END__', "Found correct common successor" );
2274 is( $c->common_predecessor( 'n19', 'n17' )->id,
2275 'n16', "Found correct common predecessor for readings on same path" );
2276 is( $c->common_successor( 'n21', 'n10' )->id,
2277 '__END__', "Found correct common successor for readings on same path" );
2283 ## Return the closest reading that is a predecessor of both the given readings.
2284 sub common_predecessor {
2286 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2287 return $self->_common_in_path( $r1, $r2, 'predecessors' );
2290 sub common_successor {
2292 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2293 return $self->_common_in_path( $r1, $r2, 'successors' );
2297 # TODO think about how to do this without ranks...
2298 sub _common_in_path {
2299 my( $self, $r1, $r2, $dir ) = @_;
2300 my $iter = $self->end->rank;
2302 my @last_r1 = ( $r1 );
2303 my @last_r2 = ( $r2 );
2304 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2306 # say STDERR "Finding common $dir for $r1, $r2";
2307 while( !@candidates ) {
2308 last unless $iter--; # Avoid looping infinitely
2309 # Iterate separately down the graph from r1 and r2
2310 my( @new_lc1, @new_lc2 );
2311 foreach my $lc ( @last_r1 ) {
2312 foreach my $p ( $lc->$dir ) {
2313 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2314 # say STDERR "Path candidate $p from $lc";
2315 push( @candidates, $p );
2316 } elsif( !$all_seen{$p->id} ) {
2317 $all_seen{$p->id} = 'r1';
2318 push( @new_lc1, $p );
2322 foreach my $lc ( @last_r2 ) {
2323 foreach my $p ( $lc->$dir ) {
2324 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2325 # say STDERR "Path candidate $p from $lc";
2326 push( @candidates, $p );
2327 } elsif( !$all_seen{$p->id} ) {
2328 $all_seen{$p->id} = 'r2';
2329 push( @new_lc2, $p );
2333 @last_r1 = @new_lc1;
2334 @last_r2 = @new_lc2;
2336 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2337 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2341 Text::Tradition::Error->throw(
2342 'ident' => 'Collation error',
2348 __PACKAGE__->meta->make_immutable;
2354 =item * Rework XML serialization in a more modular way
2360 This package is free software and is provided "as is" without express
2361 or implied warranty. You can redistribute it and/or modify it under
2362 the same terms as Perl itself.
2366 Tara L Andrews E<lt>aurum@cpan.orgE<gt>