1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
10 use Text::Tradition::Collation::Data;
11 use Text::Tradition::Collation::Reading;
12 use Text::Tradition::Collation::RelationshipStore;
13 use Text::Tradition::Error;
14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
16 use XML::LibXML::XPathContext;
20 isa => 'Text::Tradition::Collation::Data',
59 isa => 'Text::Tradition',
60 writer => '_set_tradition',
68 Text::Tradition::Collation - a software model for a text collation
73 my $t = Text::Tradition->new(
74 'name' => 'this is a text',
76 'file' => '/path/to/tei_parallel_seg_file.xml' );
78 my $c = $t->collation;
79 my @readings = $c->readings;
80 my @paths = $c->paths;
81 my @relationships = $c->relationships;
83 my $svg_variant_graph = $t->collation->as_svg();
87 Text::Tradition is a library for representation and analysis of collated
88 texts, particularly medieval ones. The Collation is the central feature of
89 a Tradition, where the text, its sequence of readings, and its relationships
90 between readings are actually kept.
96 The constructor. Takes a hash or hashref of the following arguments:
100 =item * tradition - The Text::Tradition object to which the collation
103 =item * linear - Whether the collation should be linear; that is, whether
104 transposed readings should be treated as two linked readings rather than one,
105 and therefore whether the collation graph is acyclic. Defaults to true.
107 =item * baselabel - The default label for the path taken by a base text
108 (if any). Defaults to 'base text'.
110 =item * wit_list_separator - The string to join a list of witnesses for
111 purposes of making labels in display graphs. Defaults to ', '.
113 =item * ac_label - The extra label to tack onto a witness sigil when
114 representing another layer of path for the given witness - that is, when
115 a text has more than one possible reading due to scribal corrections or
116 the like. Defaults to ' (a.c.)'.
118 =item * wordsep - The string used to separate words in the original text.
129 =head2 wit_list_separator
137 Simple accessors for collation attributes.
141 The meta-reading at the start of every witness path.
145 The meta-reading at the end of every witness path.
149 Returns all Reading objects in the graph.
151 =head2 reading( $id )
153 Returns the Reading object corresponding to the given ID.
155 =head2 add_reading( $reading_args )
157 Adds a new reading object to the collation.
158 See L<Text::Tradition::Collation::Reading> for the available arguments.
160 =head2 del_reading( $object_or_id )
162 Removes the given reading from the collation, implicitly removing its
163 paths and relationships.
165 =head2 has_reading( $id )
167 Predicate to see whether a given reading ID is in the graph.
169 =head2 reading_witnesses( $object_or_id )
171 Returns a list of sigils whose witnesses contain the reading.
175 Returns all reading paths within the document - that is, all edges in the
176 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
178 =head2 add_path( $source, $target, $sigil )
180 Links the given readings in the collation in sequence, under the given witness
181 sigil. The readings may be specified by object or ID.
183 =head2 del_path( $source, $target, $sigil )
185 Links the given readings in the collation in sequence, under the given witness
186 sigil. The readings may be specified by object or ID.
188 =head2 has_path( $source, $target );
190 Returns true if the two readings are linked in sequence in any witness.
191 The readings may be specified by object or ID.
195 Returns all Relationship objects in the collation.
197 =head2 add_relationship( $reading, $other_reading, $options )
199 Adds a new relationship of the type given in $options between the two readings,
200 which may be specified by object or ID. Returns a value of ( $status, @vectors)
201 where $status is true on success, and @vectors is a list of relationship edges
202 that were ultimately added.
203 See L<Text::Tradition::Collation::Relationship> for the available options.
208 my ( $class, @args ) = @_;
209 my %args = @args == 1 ? %{ $args[0] } : @args;
210 # TODO determine these from the Moose::Meta object
211 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
212 linear wordsep start end cached_table _graphcalc_done);
214 for my $attr (@delegate_attrs) {
215 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
217 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
223 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
224 $self->_set_start( $self->add_reading(
225 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
226 $self->_set_end( $self->add_reading(
227 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
230 =head2 register_relationship_type( %relationship_definition )
232 Add a relationship type definition to this collation. The argument can be either a
233 hash or a hashref, defining the properties of the relationship. For relationship types
234 and their properties, see L<Text::Tradition::Collation::RelationshipType>.
236 =head2 get_relationship_type( $relationship_name )
238 Retrieve the RelationshipType object for the relationship with the given name.
242 sub register_relationship_type {
244 my %args = @_ == 1 ? %{$_[0]} : @_;
245 if( $self->relations->has_type( $args{name} ) ) {
246 throw( 'Relationship type ' . $args{name} . ' already registered' );
248 $self->relations->add_type( %args );
251 sub get_relationship_type {
252 my( $self, $name ) = @_;
253 return $self->relations->has_type( $name )
254 ? $self->relations->type( $name ) : undef;
257 ### Reading construct/destruct functions
260 my( $self, $reading ) = @_;
261 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
262 my %args = %$reading;
263 if( $args{'init'} ) {
264 # If we are initializing an empty collation, don't assume that we
265 # have set a tradition.
266 delete $args{'init'};
267 } elsif( $self->tradition->can('language') && $self->tradition->has_language
268 && !exists $args{'language'} ) {
269 $args{'language'} = $self->tradition->language;
271 $reading = Text::Tradition::Collation::Reading->new(
272 'collation' => $self,
275 # First check to see if a reading with this ID exists.
276 if( $self->reading( $reading->id ) ) {
277 throw( "Collation already has a reading with id " . $reading->id );
279 $self->_graphcalc_done(0);
280 $self->_add_reading( $reading->id => $reading );
281 # Once the reading has been added, put it in both graphs.
282 $self->sequence->add_vertex( $reading->id );
283 $self->relations->add_reading( $reading->id );
287 around del_reading => sub {
292 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
295 # Remove the reading from the graphs.
296 $self->_graphcalc_done(0);
297 $self->_clear_cache; # Explicitly clear caches to GC the reading
298 $self->sequence->delete_vertex( $arg );
299 $self->relations->delete_reading( $arg );
302 $self->$orig( $arg );
305 =head2 merge_readings( $main, $second, $concatenate, $with_str )
307 Merges the $second reading into the $main one. If $concatenate is true, then
308 the merged node will carry the text of both readings, concatenated with either
309 $with_str (if specified) or a sensible default (the empty string if the
310 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
312 The first two arguments may be either readings or reading IDs.
319 my $cxfile = 't/data/Collatex-16.xml';
320 my $t = Text::Tradition->new(
322 'input' => 'CollateX',
325 my $c = $t->collation;
327 my $rno = scalar $c->readings;
328 # Split n21 ('unto') for testing purposes
329 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
330 my $old_r = $c->reading( 'n21' );
331 $old_r->alter_text( 'to' );
332 $c->del_path( 'n20', 'n21', 'A' );
333 $c->add_path( 'n20', 'n21p0', 'A' );
334 $c->add_path( 'n21p0', 'n21', 'A' );
335 $c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
337 ok( $c->reading( 'n21p0' ), "New reading exists" );
338 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
340 # Combine n3 and n4 ( with his )
341 $c->merge_readings( 'n3', 'n4', 1 );
342 ok( !$c->reading('n4'), "Reading n4 is gone" );
343 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
345 # Collapse n9 and n10 ( rood / root )
346 $c->merge_readings( 'n9', 'n10' );
347 ok( !$c->reading('n10'), "Reading n10 is gone" );
348 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
350 # Try to combine n21 and n21p0. This should break.
351 my $remaining = $c->reading('n21');
352 $remaining ||= $c->reading('n22'); # one of these should still exist
354 $c->merge_readings( 'n21p0', $remaining, 1 );
355 ok( 0, "Bad reading merge changed the graph" );
356 } catch( Text::Tradition::Error $e ) {
357 like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
359 ok( 0, "Unexpected error on bad reading merge: $@" );
363 $c->calculate_ranks();
364 ok( 1, "Graph is still evidently whole" );
365 } catch( Text::Tradition::Error $e ) {
366 ok( 0, "Caught a rank exception: " . $e->message );
377 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
378 my $mergemeta = $kept_obj->is_meta;
379 throw( "Cannot merge meta and non-meta reading" )
380 unless ( $mergemeta && $del_obj->is_meta )
381 || ( !$mergemeta && !$del_obj->is_meta );
383 throw( "Cannot merge with start or end node" )
384 if( $kept_obj eq $self->start || $kept_obj eq $self->end
385 || $del_obj eq $self->start || $del_obj eq $self->end );
386 throw( "Cannot combine text of meta readings" ) if $combine;
388 # We can only merge readings in a linear graph if:
389 # - they are contiguous with only one edge between them, OR
390 # - they are at equivalent ranks in the graph.
391 if( $self->linear ) {
392 my @delpred = $del_obj->predecessors;
393 my @keptsuc = $kept_obj->successors;
394 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
395 && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
396 my( $is_ok, $msg ) = $self->relations->relationship_valid(
397 $kept_obj, $del_obj, 'collated' );
399 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
404 # We only need the IDs for adding paths to the graph, not the reading
405 # objects themselves.
406 my $kept = $kept_obj->id;
407 my $deleted = $del_obj->id;
408 $self->_graphcalc_done(0);
410 # The kept reading should inherit the paths and the relationships
411 # of the deleted reading.
412 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
413 my @vector = ( $kept );
414 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
415 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
416 next if $vector[0] eq $vector[1]; # Don't add a self loop
417 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
418 $self->sequence->add_edge( @vector );
419 my $fwits = $self->sequence->get_edge_attributes( @vector );
420 @wits{keys %$fwits} = values %$fwits;
421 $self->sequence->set_edge_attributes( @vector, \%wits );
423 $self->relations->merge_readings( $kept, $deleted, $combine );
425 # Do the deletion deed.
427 # Combine the text of the readings
428 my $joinstr = $combine_char;
429 unless( defined $joinstr ) {
430 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
431 $joinstr = $self->wordsep unless defined $joinstr;
433 $kept_obj->_combine( $del_obj, $joinstr );
435 $self->del_reading( $deleted );
438 =head2 merge_related( @relationship_types )
440 Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
442 WARNING: This operation cannot be undone.
454 $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
455 } [qr/Cannot set relationship on a meta reading/],
456 "Got expected relationship drop warning on parse";
458 my $c = $t->collation;
461 map { $rdg_ids{$_} = 1 } $c->readings;
462 $c->merge_related( 'orthographic' );
463 is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
464 "Successfully collapsed orthographic variation" );
465 map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
466 foreach my $rid ( keys %rdg_ids ) {
467 my $exp = $rdg_ids{$rid};
468 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
469 ( $exp ? "retained" : "removed" ) );
471 ok( $c->linear, "Graph is still linear" );
473 $c->calculate_ranks; # This should succeed
474 ok( 1, "Can still calculate ranks on the new graph" );
476 ok( 0, "Rank calculation on merged graph failed: $@" );
479 # Now add some transpositions
480 $c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
481 $c->merge_related( 'transposition' );
482 is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
483 "Transposed relationship is merged away" );
484 ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
485 ok( !$c->linear, "Graph is no longer linear" );
487 $c->calculate_ranks; # This should fail
488 ok( 0, "Rank calculation happened on nonlinear graph?!" );
489 } catch ( Text::Tradition::Error $e ) {
490 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
491 "Rank calculation on merged graph threw an error" );
498 # TODO: there should be a way to display merged without affecting the underlying data!
503 map { $reltypehash{$_} = 1 } @_;
505 # Set up the filter for finding related readings
507 exists $reltypehash{$_[0]->type};
510 # Go through all readings looking for related ones
511 foreach my $r ( $self->readings ) {
512 next unless $self->reading( "$r" ); # might have been deleted meanwhile
513 while( my @related = $self->related_readings( $r, $filter ) ) {
514 push( @related, $r );
516 scalar $b->witnesses <=> scalar $a->witnesses
518 my $keep = shift @related;
519 foreach my $delr ( @related ) {
521 unless( $self->get_relationship( $keep, $delr )->colocated );
522 $self->merge_readings( $keep, $delr );
528 =head2 compress_readings
530 Where possible in the graph, compresses plain sequences of readings into a
531 single reading. The sequences must consist of readings with no
532 relationships to other readings, with only a single witness path between
533 them and no other witness paths from either that would skip the other. The
534 readings must also not be marked as nonsense or bad grammar.
536 WARNING: This operation cannot be undone.
540 sub compress_readings {
542 # Anywhere in the graph that there is a reading that joins only to a single
543 # successor, and neither of these have any relationships, just join the two
545 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
546 # Now look for readings that can be joined to their successors.
547 next unless $rdg->is_combinable;
549 while( $self->sequence->successors( $rdg ) == 1 ) {
550 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
551 throw( "Infinite loop" ) if $seen{$next->id};
552 $seen{$next->id} = 1;
553 last if $self->sequence->predecessors( $next ) > 1;
554 last unless $next->is_combinable;
555 say "Joining readings $rdg and $next";
556 $self->merge_readings( $rdg, $next, 1 );
559 # Make sure we haven't screwed anything up
560 foreach my $wit ( $self->tradition->witnesses ) {
561 my $pathtext = $self->path_text( $wit->sigil );
562 my $origtext = join( ' ', @{$wit->text} );
563 throw( "Text differs for witness " . $wit->sigil )
564 unless $pathtext eq $origtext;
565 if( $wit->is_layered ) {
566 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
567 $origtext = join( ' ', @{$wit->layertext} );
568 throw( "Ante-corr text differs for witness " . $wit->sigil )
569 unless $pathtext eq $origtext;
573 $self->relations->rebuild_equivalence();
574 $self->calculate_ranks();
577 # Helper function for manipulating the graph.
578 sub _stringify_args {
579 my( $self, $first, $second, @args ) = @_;
581 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
582 $second = $second->id
583 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
584 return( $first, $second, @args );
587 # Helper function for manipulating the graph.
588 sub _objectify_args {
589 my( $self, $first, $second, $arg ) = @_;
590 $first = $self->reading( $first )
591 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
592 $second = $self->reading( $second )
593 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
594 return( $first, $second, $arg );
597 =head2 duplicate_reading( $reading, @witlist )
599 Split the given reading into two, so that the new reading is in the path for
600 the witnesses given in @witlist. If the result is that certain non-colocated
601 relationships (e.g. transpositions) are no longer valid, these will be removed.
602 Returns the newly-created reading.
606 use Test::More::UTF8;
610 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
611 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
612 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
614 my $sc = $st->collation;
616 ok( $sc->reading('n131'), "Tradition has the affected reading" );
617 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
618 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
620 # Detach the erroneously collated reading
621 my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
622 ok( $newr, "New reading was created" );
623 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
624 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
625 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
626 my $csucc = $sc->common_successor( 'n131', 'n131_0' );
627 is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
629 # Check that the bad transposition is gone
630 is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
631 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
633 # The collation should not be fixed
634 my @pairs = $sc->identical_readings();
635 is( scalar @pairs, 0, "Not re-collated yet" );
637 ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
638 @pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
639 is( scalar @pairs, 3, "Found three more identical readings" );
640 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
641 $sc->flatten_ranks();
642 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
644 # Check that we can't "duplicate" a reading with no wits or with all wits
646 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
647 ok( 0, "Reading duplication without witnesses throws an error" );
648 } catch( Text::Tradition::Error $e ) {
649 like( $e->message, qr/Must specify one or more witnesses/,
650 "Reading duplication without witnesses throws the expected error" );
652 ok( 0, "Reading duplication without witnesses threw the wrong error" );
656 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
657 ok( 0, "Reading duplication with all witnesses throws an error" );
658 } catch( Text::Tradition::Error $e ) {
659 like( $e->message, qr/Cannot join all witnesses/,
660 "Reading duplication with all witnesses throws the expected error" );
662 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
666 $sc->calculate_ranks();
667 ok( 1, "Graph is still evidently whole" );
668 } catch( Text::Tradition::Error $e ) {
669 ok( 0, "Caught a rank exception: " . $e->message );
676 sub duplicate_reading {
677 my( $self, $r, @wits ) = @_;
678 # Check that we are not doing anything unwise.
679 throw( "Must specify one or more witnesses for the duplicated reading" )
681 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
682 $r = $self->reading( $r );
684 throw( "Cannot duplicate a meta-reading" )
686 throw( "Cannot join all witnesses to the new reading" )
687 if scalar( @wits ) == scalar( $r->witnesses );
689 # Get all the reading attributes and duplicate them.
690 my $rmeta = Text::Tradition::Collation::Reading->meta;
692 foreach my $attr( $rmeta->get_all_attributes ) {
693 next if $attr->name =~ /^_/;
694 my $acc = $attr->get_read_method;
695 if( !$acc && $attr->has_applied_traits ) {
696 my $tr = $attr->applied_traits;
697 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
699 my %methods = reverse %{$attr->handles};
700 $acc = $methods{elements};
701 $args{$attr->name} = $which eq 'Array'
702 ? [ $r->$acc ] : { $r->$acc };
705 $args{$attr->name} = $r->$acc if $acc;
708 # By definition the new reading will no longer be common.
709 $args{is_common} = 0;
710 # The new reading also needs its own ID.
711 $args{id} = $self->_generate_dup_id( $r->id );
713 # Try to make the new reading.
714 my $newr = $self->add_reading( \%args );
715 # The old reading is also no longer common.
718 # For each of the witnesses, dissociate from the old reading and
719 # associate with the new.
720 foreach my $wit ( @wits ) {
721 my $prior = $self->prior_reading( $r, $wit );
722 my $next = $self->next_reading( $r, $wit );
723 $self->del_path( $prior, $r, $wit );
724 $self->add_path( $prior, $newr, $wit );
725 $self->del_path( $r, $next, $wit );
726 $self->add_path( $newr, $next, $wit );
729 # If the graph is ranked, we need to look for relationships that are now
730 # invalid (i.e. 'non-colocation' types that might now be colocated) and
731 # remove them. If not, we can skip it.
734 my @deleted_relations;
735 if( $self->end->has_rank ) {
736 # Find the point where we can stop checking
737 $succ = $self->common_successor( $r, $newr );
739 # Hash the existing ranks
740 foreach my $rdg ( $self->readings ) {
741 $rrk{$rdg->id} = $rdg->rank;
743 # Calculate the new ranks
744 $self->calculate_ranks();
746 # Check for invalid non-colocated relationships among changed-rank readings
747 # from where the ranks start changing up to $succ
748 my $lastrank = $succ->rank;
749 foreach my $rdg ( $self->readings ) {
750 next if $rdg->rank > $lastrank;
751 next if $rdg->rank == $rrk{$rdg->id};
752 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
753 next unless @noncolo;
754 foreach my $nc ( @noncolo ) {
755 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
756 push( @deleted_relations, [ $rdg->id, $nc->id ] );
761 return ( $newr, @deleted_relations );
764 sub _generate_dup_id {
765 my( $self, $rid ) = @_;
770 if( $self->has_reading( $newid ) ) {
783 # We only need the IDs for adding paths to the graph, not the reading
784 # objects themselves.
785 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
787 $self->_graphcalc_done(0);
788 # Connect the readings
789 unless( $self->sequence->has_edge( $source, $target ) ) {
790 $self->sequence->add_edge( $source, $target );
791 $self->relations->add_equivalence_edge( $source, $target );
793 # Note the witness in question
794 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
800 if( ref( $_[0] ) eq 'ARRAY' ) {
807 # We only need the IDs for removing paths from the graph, not the reading
808 # objects themselves.
809 my( $source, $target, $wit ) = $self->_stringify_args( @args );
811 $self->_graphcalc_done(0);
812 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
813 $self->sequence->delete_edge_attribute( $source, $target, $wit );
815 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
816 $self->sequence->delete_edge( $source, $target );
817 $self->relations->delete_equivalence_edge( $source, $target );
822 # Extra graph-alike utility
825 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
826 return undef unless $self->sequence->has_edge( $source, $target );
827 return $self->sequence->has_edge_attribute( $source, $target, $wit );
830 =head2 clear_witness( @sigil_list )
832 Clear the given witnesses out of the collation entirely, removing references
833 to them in paths, and removing readings that belong only to them. Should only
834 be called via $tradition->del_witness.
839 my( $self, @sigils ) = @_;
841 $self->_graphcalc_done(0);
842 # Clear the witness(es) out of the paths
843 foreach my $e ( $self->paths ) {
844 foreach my $sig ( @sigils ) {
845 $self->del_path( $e, $sig );
849 # Clear out the newly unused readings
850 foreach my $r ( $self->readings ) {
851 unless( $self->reading_witnesses( $r ) ) {
852 $self->del_reading( $r );
857 sub add_relationship {
859 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
860 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
861 foreach my $v ( @vectors ) {
862 next unless $self->get_relationship( $v )->colocated;
863 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
864 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
865 $self->_graphcalc_done(0);
873 around qw/ get_relationship del_relationship / => sub {
877 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
880 my @stringargs = $self->_stringify_args( @args );
881 $self->$orig( @stringargs );
884 =head2 reading_witnesses( $reading )
886 Return a list of sigils corresponding to the witnesses in which the reading appears.
890 sub reading_witnesses {
891 my( $self, $reading ) = @_;
892 # We need only check either the incoming or the outgoing edges; I have
893 # arbitrarily chosen "incoming". Thus, special-case the start node.
894 if( $reading eq $self->start ) {
895 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
898 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
899 my $wits = $self->sequence->get_edge_attributes( @$e );
900 @all_witnesses{ keys %$wits } = 1;
902 my $acstr = $self->ac_label;
903 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
904 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
906 return keys %all_witnesses;
909 =head1 OUTPUT METHODS
911 =head2 as_svg( \%options )
913 Returns an SVG string that represents the graph, via as_dot and graphviz.
914 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
919 my( $self, $opts ) = @_;
920 throw( "Need GraphViz installed to output SVG" )
921 unless File::Which::which( 'dot' );
922 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
923 $self->calculate_ranks()
924 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
925 my @cmd = qw/dot -Tsvg/;
927 my $dotfile = File::Temp->new();
929 # $dotfile->unlink_on_destroy(0);
930 binmode $dotfile, ':utf8';
931 print $dotfile $self->as_dot( $opts );
932 push( @cmd, $dotfile->filename );
933 run( \@cmd, ">", binary(), \$svg );
934 $svg = decode_utf8( $svg );
939 =head2 as_dot( \%options )
941 Returns a string that is the collation graph expressed in dot
942 (i.e. GraphViz) format. Options include:
957 my( $self, $opts ) = @_;
958 my $startrank = $opts->{'from'} if $opts;
959 my $endrank = $opts->{'to'} if $opts;
960 my $color_common = $opts->{'color_common'} if $opts;
961 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
962 && $self->end->rank > 100;
963 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
965 # Check the arguments
967 return if $endrank && $startrank > $endrank;
968 return if $startrank > $self->end->rank;
970 if( defined $endrank ) {
971 return if $endrank < 0;
972 $endrank = undef if $endrank == $self->end->rank;
975 my $graph_name = $self->tradition->name;
976 $graph_name =~ s/[^\w\s]//g;
977 $graph_name = join( '_', split( /\s+/, $graph_name ) );
985 'fillcolor' => 'white',
990 'arrowhead' => 'open',
991 'color' => '#000000',
992 'fontcolor' => '#000000',
995 my $dot = sprintf( "digraph %s {\n", $graph_name );
996 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
997 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
999 # Output substitute start/end readings if necessary
1001 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
1004 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
1006 if( $STRAIGHTENHACK ) {
1008 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1009 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
1010 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1012 my %used; # Keep track of the readings that actually appear in the graph
1013 # Sort the readings by rank if we have ranks; this speeds layout.
1014 my @all_readings = $self->end->has_rank
1015 ? sort { $a->rank <=> $b->rank } $self->readings
1017 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
1018 foreach my $reading ( @all_readings ) {
1019 # Only output readings within our rank range.
1020 next if $startrank && $reading->rank < $startrank;
1021 next if $endrank && $reading->rank > $endrank;
1022 $used{$reading->id} = 1;
1023 # Need not output nodes without separate labels
1024 next if $reading->id eq $reading->text;
1026 my $label = $reading->text;
1027 unless( $label =~ /^[[:punct:]]+$/ ) {
1028 $label .= '-' if $reading->join_next;
1029 $label = "-$label" if $reading->join_prior;
1031 $label =~ s/\"/\\\"/g;
1032 $rattrs->{'label'} = $label;
1033 $rattrs->{'id'} = $reading->id;
1034 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
1035 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
1038 # Add the real edges. Need to weight one edge per rank jump, in a
1040 # my $weighted = $self->_add_edge_weights;
1041 my @edges = $self->paths;
1042 my( %substart, %subend );
1043 foreach my $edge ( @edges ) {
1044 # Do we need to output this edge?
1045 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
1046 my $label = $self->_path_display_label( $opts,
1047 $self->path_witnesses( $edge ) );
1048 my $variables = { %edge_attrs, 'label' => $label };
1050 # Account for the rank gap if necessary
1051 my $rank0 = $self->reading( $edge->[0] )->rank
1052 if $self->reading( $edge->[0] )->has_rank;
1053 my $rank1 = $self->reading( $edge->[1] )->rank
1054 if $self->reading( $edge->[1] )->has_rank;
1055 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1056 $variables->{'minlen'} = $rank1 - $rank0;
1059 # Add the calculated edge weights
1060 # if( exists $weighted->{$edge->[0]}
1061 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
1062 # # $variables->{'color'} = 'red';
1063 # $variables->{'weight'} = 3.0;
1066 # EXPERIMENTAL: make edge width reflect no. of witnesses
1067 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1068 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1070 my $varopts = _dot_attr_string( $variables );
1071 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1072 $edge->[0], $edge->[1], $varopts );
1073 } elsif( $used{$edge->[0]} ) {
1074 $subend{$edge->[0]} = $edge->[1];
1075 } elsif( $used{$edge->[1]} ) {
1076 $substart{$edge->[1]} = $edge->[0];
1080 # If we are asked to, add relationship links
1081 if( exists $opts->{show_relations} ) {
1082 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1083 if( $filter eq 'transposition' ) {
1084 $filter =~ qr/^transposition$/;
1087 my @types = sort( map { $_->name } $self->relations->types );
1088 if( exists $opts->{graphcolors} ) {
1089 foreach my $tdx ( 0 .. $#types ) {
1090 $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1093 map { $typecolors{$_} = '#FFA14F' } @types;
1095 foreach my $redge ( $self->relationships ) {
1096 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1097 my $rel = $self->get_relationship( $redge );
1098 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1100 arrowhead => 'none',
1101 color => $typecolors{$rel->type},
1102 constraint => 'false',
1105 unless( exists $opts->{graphcolors} ) {
1106 $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
1108 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1109 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1114 # Add substitute start and end edges if necessary
1115 foreach my $node ( keys %substart ) {
1116 my $witstr = $self->_path_display_label( $opts,
1117 $self->path_witnesses( $substart{$node}, $node ) );
1118 my $variables = { %edge_attrs, 'label' => $witstr };
1119 my $nrdg = $self->reading( $node );
1120 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1121 # Substart is actually one lower than $startrank
1122 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1124 my $varopts = _dot_attr_string( $variables );
1125 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
1127 foreach my $node ( keys %subend ) {
1128 my $witstr = $self->_path_display_label( $opts,
1129 $self->path_witnesses( $node, $subend{$node} ) );
1130 my $variables = { %edge_attrs, 'label' => $witstr };
1131 my $varopts = _dot_attr_string( $variables );
1132 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
1135 if( $STRAIGHTENHACK ) {
1136 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1137 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
1144 sub _dot_attr_string {
1147 foreach my $k ( sort keys %$hash ) {
1148 my $v = $hash->{$k};
1149 push( @attrs, $k.'="'.$v.'"' );
1151 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1154 sub _add_edge_weights {
1156 # Walk the graph from START to END, choosing the successor node with
1157 # the largest number of witness paths each time.
1159 my $curr = $self->start->id;
1160 my $ranked = $self->end->has_rank;
1161 while( $curr ne $self->end->id ) {
1162 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
1163 my @succ = sort { $self->path_witnesses( $curr, $a )
1164 <=> $self->path_witnesses( $curr, $b ) }
1165 $self->sequence->successors( $curr );
1166 my $next = pop @succ;
1167 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
1168 # Try to avoid lacunae in the weighted path.
1170 ( $self->reading( $next )->is_lacuna ||
1171 $nextrank - $rank > 1 ) ){
1174 $weighted->{$curr} = $next;
1180 =head2 path_witnesses( $edge )
1182 Returns the list of sigils whose witnesses are associated with the given edge.
1183 The edge can be passed as either an array or an arrayref of ( $source, $target ).
1187 sub path_witnesses {
1188 my( $self, @edge ) = @_;
1189 # If edge is an arrayref, cope.
1190 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1191 my $e = shift @edge;
1194 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1198 # Helper function. Make a display label for the given witnesses, showing a.c.
1199 # witnesses only where the main witness is not also in the list.
1200 sub _path_display_label {
1204 map { $wits{$_} = 1 } @_;
1206 # If an a.c. wit is listed, remove it if the main wit is also listed.
1207 # Otherwise keep it for explicit listing.
1208 my $aclabel = $self->ac_label;
1210 foreach my $w ( sort keys %wits ) {
1211 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1212 if( exists $wits{$1} ) {
1215 push( @disp_ac, $w );
1220 if( $opts->{'explicit_wits'} ) {
1221 return join( ', ', sort keys %wits );
1223 # See if we are in a majority situation.
1224 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1225 $maj = $maj > 5 ? $maj : 5;
1226 if( scalar keys %wits > $maj ) {
1227 unshift( @disp_ac, 'majority' );
1228 return join( ', ', @disp_ac );
1230 return join( ', ', sort keys %wits );
1235 =head2 readings_at_rank( $rank )
1237 Returns a list of readings at a given rank, taken from the alignment table.
1241 sub readings_at_rank {
1242 my( $self, $rank ) = @_;
1243 my $table = $self->alignment_table;
1244 # Table rank is real rank - 1.
1245 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1247 foreach my $e ( @elements ) {
1248 next unless ref( $e ) eq 'HASH';
1249 next unless exists $e->{'t'};
1250 $readings{$e->{'t'}->id} = $e->{'t'};
1252 return values %readings;
1257 Returns a GraphML representation of the collation. The GraphML will contain
1258 two graphs. The first expresses the attributes of the readings and the witness
1259 paths that link them; the second expresses the relationships that link the
1260 readings. This is the native transfer format for a tradition.
1264 use Text::Tradition;
1270 my $datafile = 't/data/florilegium_tei_ps.xml';
1271 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1273 'file' => $datafile,
1276 ok( $tradition, "Got a tradition object" );
1277 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1278 ok( $tradition->collation, "Tradition has a collation" );
1280 my $c = $tradition->collation;
1281 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1282 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1283 is( scalar $c->relationships, 0, "Collation has all relationships" );
1285 # Add a few relationships
1286 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1287 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1288 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1289 'is_significant' => 'yes' } );
1291 # Now write it to GraphML and parse it again.
1293 my $graphml = $c->as_graphml;
1294 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1295 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1296 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1297 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1298 my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1299 is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
1301 # Now add a stemma, write to GraphML, and look at the output.
1303 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1304 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1305 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1306 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1307 $graphml = $c->as_graphml;
1308 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1315 ## TODO MOVE this to Tradition.pm and modularize it better
1317 my( $self, $options ) = @_;
1318 $self->calculate_ranks unless $self->_graphcalc_done;
1320 my $start = $options->{'from'}
1321 ? $self->reading( $options->{'from'} ) : $self->start;
1322 my $end = $options->{'to'}
1323 ? $self->reading( $options->{'to'} ) : $self->end;
1324 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1325 throw( 'Start node must be before end node' );
1327 # The readings need to be ranked for this to work.
1328 $start = $self->start unless $start->has_rank;
1329 $end = $self->end unless $end->has_rank;
1331 unless( $start eq $self->start ) {
1332 $rankoffset = $start->rank - 1;
1337 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1338 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1339 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1340 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1342 # Create the document and root node
1343 require XML::LibXML;
1344 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1345 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1346 $graphml->setDocumentElement( $root );
1347 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1348 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1350 # List of attribute types to save on our objects and their corresponding
1355 'Bool' => 'boolean',
1356 'ReadingID' => 'string',
1357 'RelationshipType' => 'string',
1358 'RelationshipScope' => 'string',
1359 'Ternary' => 'string',
1362 # Add the data keys for the graph. Include an extra key 'version' for the
1363 # GraphML output version.
1364 my %graph_data_keys;
1366 my %graph_attributes = ( 'version' => 'string' );
1367 # Graph attributes include those of Tradition and those of Collation.
1369 # TODO Use meta introspection method from duplicate_reading to do this
1370 # instead of naming custom keys.
1371 my $tmeta = $self->tradition->meta;
1372 my $cmeta = $self->meta;
1373 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1374 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1375 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1376 next if $attr->name =~ /^_/;
1377 next unless $save_types{$attr->type_constraint->name};
1378 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1380 # Extra custom keys for complex objects that should be saved in some form.
1381 # The subroutine should return a string, or undef/empty.
1382 if( $tmeta->has_method('stemmata') ) {
1383 $graph_attributes{'stemmata'} = sub {
1385 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1386 $self->tradition->stemmata;
1387 join( "\n", @stemstrs );
1391 if( $tmeta->has_method('user') ) {
1392 $graph_attributes{'user'} = sub {
1393 $self->tradition->user ? $self->tradition->user->id : undef
1397 foreach my $datum ( sort keys %graph_attributes ) {
1398 $graph_data_keys{$datum} = 'dg'.$gdi++;
1399 my $key = $root->addNewChild( $graphml_ns, 'key' );
1400 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1401 : $graph_attributes{$datum};
1402 $key->setAttribute( 'attr.name', $datum );
1403 $key->setAttribute( 'attr.type', $dtype );
1404 $key->setAttribute( 'for', 'graph' );
1405 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1408 # Add the data keys for reading nodes
1409 my %reading_attributes;
1410 my $rmeta = Text::Tradition::Collation::Reading->meta;
1411 foreach my $attr( $rmeta->get_all_attributes ) {
1412 next if $attr->name =~ /^_/;
1413 next unless $save_types{$attr->type_constraint->name};
1414 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1416 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1417 # Extra custom key for the reading morphology
1418 $reading_attributes{'lexemes'} = 'string';
1423 foreach my $datum ( sort keys %reading_attributes ) {
1424 $node_data_keys{$datum} = 'dn'.$ndi++;
1425 my $key = $root->addNewChild( $graphml_ns, 'key' );
1426 $key->setAttribute( 'attr.name', $datum );
1427 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1428 $key->setAttribute( 'for', 'node' );
1429 $key->setAttribute( 'id', $node_data_keys{$datum} );
1432 # Add the data keys for edges, that is, paths and relationships. Path
1433 # data does not come from a Moose class so is here manually.
1436 my %edge_attributes = (
1437 witness => 'string', # ID/label for a path
1438 extra => 'boolean', # Path key
1440 my @path_attributes = keys %edge_attributes; # track our manual additions
1441 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1442 foreach my $attr( $pmeta->get_all_attributes ) {
1443 next if $attr->name =~ /^_/;
1444 next unless $save_types{$attr->type_constraint->name};
1445 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1447 foreach my $datum ( sort keys %edge_attributes ) {
1448 $edge_data_keys{$datum} = 'de'.$edi++;
1449 my $key = $root->addNewChild( $graphml_ns, 'key' );
1450 $key->setAttribute( 'attr.name', $datum );
1451 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1452 $key->setAttribute( 'for', 'edge' );
1453 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1456 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1457 my $xmlidname = $self->tradition->name;
1458 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1459 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1460 $xmlidname = '_'.$xmlidname;
1462 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1463 $sgraph->setAttribute( 'edgedefault', 'directed' );
1464 $sgraph->setAttribute( 'id', $xmlidname );
1465 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1466 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1467 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1468 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1469 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1471 # Tradition/collation attribute data
1472 foreach my $datum ( keys %graph_attributes ) {
1474 if( $datum eq 'version' ) {
1476 } elsif( ref( $graph_attributes{$datum} ) ) {
1477 my $sub = $graph_attributes{$datum};
1479 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1480 $value = $self->tradition->$datum;
1482 $value = $self->$datum;
1484 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1489 # Add our readings to the graph
1490 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1491 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1492 ( $n->rank < $start->rank || $n->rank > $end->rank );
1493 $use_readings{$n->id} = 1;
1494 # Add to the main graph
1495 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1496 my $node_xmlid = 'n' . $node_ctr++;
1497 $node_hash{ $n->id } = $node_xmlid;
1498 $node_el->setAttribute( 'id', $node_xmlid );
1499 foreach my $d ( keys %reading_attributes ) {
1501 # Custom serialization
1502 if( $d eq 'lexemes' ) {
1503 # If nval is a true value, we have lexemes so we need to
1504 # serialize them. Otherwise set nval to undef so that the
1505 # key is excluded from this reading.
1506 $nval = $nval ? $n->_serialize_lexemes : undef;
1507 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1510 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1511 # Adjust the ranks within the subgraph.
1512 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1513 : $nval - $rankoffset;
1515 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1520 # Add the path edges to the sequence graph
1522 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1523 # We add an edge in the graphml for every witness in $e.
1524 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1525 my @edge_wits = sort $self->path_witnesses( $e );
1526 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1527 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1528 # Skip any path from start to end; that witness is not in the subgraph.
1529 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1530 foreach my $wit ( @edge_wits ) {
1531 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1532 $node_hash{ $e->[0] },
1533 $node_hash{ $e->[1] } );
1534 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1535 $edge_el->setAttribute( 'source', $from );
1536 $edge_el->setAttribute( 'target', $to );
1537 $edge_el->setAttribute( 'id', $id );
1539 # It's a witness path, so add the witness
1541 my $key = $edge_data_keys{'witness'};
1542 # Is this an ante-corr witness?
1543 my $aclabel = $self->ac_label;
1544 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1545 # Keep the base witness
1547 # ...and record that this is an 'extra' reading path
1548 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1550 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1554 # Report the actual number of nodes and edges that went in
1555 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1556 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1558 # Add the relationship graph to the XML
1559 map { delete $edge_data_keys{$_} } @path_attributes;
1560 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1561 $node_data_keys{'id'}, \%edge_data_keys );
1563 # Save and return the thing
1564 my $result = decode_utf8( $graphml->toString(1) );
1568 sub _add_graphml_data {
1569 my( $el, $key, $value ) = @_;
1570 return unless defined $value;
1571 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1572 $data_el->setAttribute( 'key', $key );
1573 $data_el->appendText( $value );
1578 Returns a CSV alignment table representation of the collation graph, one
1579 row per witness (or witness uncorrected.)
1583 Returns a tab-separated alignment table representation of the collation graph,
1584 one row per witness (or witness uncorrected.)
1588 use Text::Tradition;
1596 my $datafile = 't/data/florilegium_tei_ps.xml';
1597 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1599 'file' => $datafile,
1602 my $c = $tradition->collation;
1603 # Export the thing to CSV
1604 my $csvstr = $c->as_csv();
1606 my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1607 my @lines = split(/\n/, $csvstr );
1608 ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1609 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1610 my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1611 ok( @q_ac, "Found a layered witness" );
1613 my $t2 = Text::Tradition->new( input => 'Tabular',
1617 is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1618 is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1620 # Now do it with TSV
1621 my $tsvstr = $c->as_tsv();
1622 my $t3 = Text::Tradition->new( input => 'Tabular',
1626 is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1627 is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1629 my $table = $c->alignment_table;
1630 my $noaccsv = $c->as_csv({ noac => 1 });
1631 my @noaclines = split(/\n/, $noaccsv );
1632 ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1633 is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
1634 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1636 my $safecsv = $c->as_csv({ safe_ac => 1});
1637 my @safelines = split(/\n/, $safecsv );
1638 ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1639 is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1640 @q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1641 ok( @q_ac, "Found a sanitized layered witness" );
1642 is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
1644 # Test relationship collapse
1645 $c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1646 $c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1648 my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1649 my $t4 = Text::Tradition->new( input => 'Tabular',
1651 string => $mergedtsv,
1653 is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1654 is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1661 my( $self, $opts ) = @_;
1662 my $table = $self->alignment_table( $opts );
1663 my $csv_options = { binary => 1, quote_null => 0 };
1664 $csv_options->{'sep_char'} = $opts->{fieldsep};
1665 if( $opts->{fieldsep} eq "\t" ) {
1666 # If it is really tab separated, nothing is an escape char.
1667 $csv_options->{'quote_char'} = undef;
1668 $csv_options->{'escape_char'} = '';
1670 my $csv = Text::CSV->new( $csv_options );
1672 # Make the header row
1673 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1674 push( @result, $csv->string );
1675 # Make the rest of the rows
1676 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1677 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1678 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1679 # Quick and dirty collapse of requested relationship types
1680 if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1681 # Now substitute the reading in the relevant index of @row
1682 # for its merge-related reading
1685 my $thisr = shift @rowobjs;
1687 next if exists $substitutes{$thisr->{t}->text};
1688 # Make sure we don't have A <-> B substitutions.
1689 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1690 foreach my $thatr ( @rowobjs ) {
1692 next if exists $substitutes{$thatr->{t}->text};
1693 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1695 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1696 # If we have got this far then we need to merge them.
1697 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1700 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1702 $csv->combine( @row );
1703 push( @result, $csv->string );
1705 return join( "\n", @result );
1710 my $opts = shift || {};
1711 $opts->{fieldsep} = ',';
1712 return $self->_tabular( $opts );
1717 my $opts = shift || {};
1718 $opts->{fieldsep} = "\t";
1719 return $self->_tabular( $opts );
1722 =head2 alignment_table
1724 Return a reference to an alignment table, in a slightly enhanced CollateX
1725 format which looks like this:
1727 $table = { alignment => [ { witness => "SIGIL",
1728 tokens => [ { t => "TEXT" }, ... ] },
1729 { witness => "SIG2",
1730 tokens => [ { t => "TEXT" }, ... ] },
1732 length => TEXTLEN };
1736 sub alignment_table {
1737 my( $self, $opts ) = @_;
1738 if( $self->has_cached_table ) {
1739 return $self->cached_table
1740 unless $opts->{noac} || $opts->{safe_ac};
1743 # Make sure we can do this
1744 throw( "Need a linear graph in order to make an alignment table" )
1745 unless $self->linear;
1746 $self->calculate_ranks()
1747 unless $self->_graphcalc_done && $self->end->has_rank;
1749 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1750 my @all_pos = ( 1 .. $self->end->rank - 1 );
1751 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1752 # say STDERR "Making witness row(s) for " . $wit->sigil;
1753 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1754 my @row = _make_witness_row( \@wit_path, \@all_pos );
1755 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1756 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1757 push( @{$table->{'alignment'}}, $witobj );
1758 if( $wit->is_layered && !$opts->{noac} ) {
1759 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1760 $wit->sigil.$self->ac_label );
1761 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1762 my $witlabel = $opts->{safe_ac}
1763 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1764 my $witacobj = { 'witness' => $witlabel,
1765 'tokens' => \@ac_row };
1766 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1767 push( @{$table->{'alignment'}}, $witacobj );
1770 unless( $opts->{noac} || $opts->{safe_ac} ) {
1771 $self->cached_table( $table );
1776 sub _make_witness_row {
1777 my( $path, $positions ) = @_;
1779 map { $char_hash{$_} = undef } @$positions;
1781 foreach my $rdg ( @$path ) {
1782 say STDERR "rank " . $rdg->rank if $debug;
1783 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1784 $char_hash{$rdg->rank} = { 't' => $rdg };
1786 my @row = map { $char_hash{$_} } @$positions;
1787 # Fill in lacuna markers for undef spots in the row
1788 my $last_el = shift @row;
1789 my @filled_row = ( $last_el );
1790 foreach my $el ( @row ) {
1791 # If we are using node reference, make the lacuna node appear many times
1792 # in the table. If not, use the lacuna tag.
1793 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1796 push( @filled_row, $el );
1803 =head1 NAVIGATION METHODS
1805 =head2 reading_sequence( $first, $last, $sigil, $backup )
1807 Returns the ordered list of readings, starting with $first and ending
1808 with $last, for the witness given in $sigil. If a $backup sigil is
1809 specified (e.g. when walking a layered witness), it will be used wherever
1810 no $sigil path exists. If there is a base text reading, that will be
1811 used wherever no path exists for $sigil or $backup.
1815 # TODO Think about returning some lazy-eval iterator.
1816 # TODO Get rid of backup; we should know from what witness is whether we need it.
1818 sub reading_sequence {
1819 my( $self, $start, $end, $witness ) = @_;
1821 $witness = $self->baselabel unless $witness;
1822 my @readings = ( $start );
1825 while( $n && $n->id ne $end->id ) {
1826 if( exists( $seen{$n->id} ) ) {
1827 throw( "Detected loop for $witness at " . $n->id );
1831 my $next = $self->next_reading( $n, $witness );
1833 throw( "Did not find any path for $witness from reading " . $n->id );
1835 push( @readings, $next );
1838 # Check that the last reading is our end reading.
1839 my $last = $readings[$#readings];
1840 throw( "Last reading found from " . $start->text .
1841 " for witness $witness is not the end!" ) # TODO do we get this far?
1842 unless $last->id eq $end->id;
1847 =head2 next_reading( $reading, $sigil );
1849 Returns the reading that follows the given reading along the given witness
1855 # Return the successor via the corresponding path.
1857 my $answer = $self->_find_linked_reading( 'next', @_ );
1858 return undef unless $answer;
1859 return $self->reading( $answer );
1862 =head2 prior_reading( $reading, $sigil )
1864 Returns the reading that precedes the given reading along the given witness
1870 # Return the predecessor via the corresponding path.
1872 my $answer = $self->_find_linked_reading( 'prior', @_ );
1873 return $self->reading( $answer );
1876 sub _find_linked_reading {
1877 my( $self, $direction, $node, $path ) = @_;
1879 # Get a backup if we are dealing with a layered witness
1881 my $aclabel = $self->ac_label;
1882 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1886 my @linked_paths = $direction eq 'next'
1887 ? $self->sequence->edges_from( $node )
1888 : $self->sequence->edges_to( $node );
1889 return undef unless scalar( @linked_paths );
1891 # We have to find the linked path that contains all of the
1892 # witnesses supplied in $path.
1893 my( @path_wits, @alt_path_wits );
1894 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1895 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1898 foreach my $le ( @linked_paths ) {
1899 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1902 my @le_wits = sort $self->path_witnesses( $le );
1903 if( _is_within( \@path_wits, \@le_wits ) ) {
1904 # This is the right path.
1905 return $direction eq 'next' ? $le->[1] : $le->[0];
1906 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1910 # Got this far? Return the alternate path if it exists.
1911 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1914 # Got this far? Return the base path if it exists.
1915 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1918 # Got this far? We have no appropriate path.
1919 warn "Could not find $direction node from " . $node->id
1920 . " along path $path";
1926 my( $set1, $set2 ) = @_;
1927 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1928 foreach my $el ( @$set1 ) {
1929 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1934 # Return the string that joins together a list of witnesses for
1935 # display on a single path.
1936 sub _witnesses_of_label {
1937 my( $self, $label ) = @_;
1938 my $regex = $self->wit_list_separator;
1939 my @answer = split( /\Q$regex\E/, $label );
1943 =head2 common_readings
1945 Returns the list of common readings in the graph (i.e. those readings that are
1946 shared by all non-lacunose witnesses.)
1950 sub common_readings {
1952 my @common = grep { $_->is_common } $self->readings;
1956 =head2 path_text( $sigil, [, $start, $end ] )
1958 Returns the text of a witness (plus its backup, if we are using a layer)
1959 as stored in the collation. The text is returned as a string, where the
1960 individual readings are joined with spaces and the meta-readings (e.g.
1961 lacunae) are omitted. Optional specification of $start and $end allows
1962 the generation of a subset of the witness text.
1967 my( $self, $wit, $start, $end ) = @_;
1968 $start = $self->start unless $start;
1969 $end = $self->end unless $end;
1970 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1973 foreach my $r ( @path ) {
1974 unless ( $r->join_prior || !$last || $last->join_next ) {
1977 $pathtext .= $r->text;
1983 =head1 INITIALIZATION METHODS
1985 These are mostly for use by parsers.
1987 =head2 make_witness_path( $witness )
1989 Link the array of readings contained in $witness->path (and in
1990 $witness->uncorrected_path if it exists) into collation paths.
1991 Clear out the arrays when finished.
1993 =head2 make_witness_paths
1995 Call make_witness_path for all witnesses in the tradition.
1999 # For use when a collation is constructed from a base text and an apparatus.
2000 # We have the sequences of readings and just need to add path edges.
2001 # When we are done, clear out the witness path attributes, as they are no
2003 # TODO Find a way to replace the witness path attributes with encapsulated functions?
2005 sub make_witness_paths {
2007 foreach my $wit ( $self->tradition->witnesses ) {
2008 # say STDERR "Making path for " . $wit->sigil;
2009 $self->make_witness_path( $wit );
2013 sub make_witness_path {
2014 my( $self, $wit ) = @_;
2015 my @chain = @{$wit->path};
2016 my $sig = $wit->sigil;
2017 # Add start and end if necessary
2018 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2019 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2020 foreach my $idx ( 0 .. $#chain-1 ) {
2021 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
2023 if( $wit->is_layered ) {
2024 @chain = @{$wit->uncorrected_path};
2025 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2026 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
2027 foreach my $idx( 0 .. $#chain-1 ) {
2028 my $source = $chain[$idx];
2029 my $target = $chain[$idx+1];
2030 $self->add_path( $source, $target, $sig.$self->ac_label )
2031 unless $self->has_path( $source, $target, $sig );
2035 $wit->clear_uncorrected_path;
2038 =head2 calculate_ranks
2040 Calculate the reading ranks (that is, their aligned positions relative
2041 to each other) for the graph. This can only be called on linear collations.
2045 use Text::Tradition;
2047 my $cxfile = 't/data/Collatex-16.xml';
2048 my $t = Text::Tradition->new(
2050 'input' => 'CollateX',
2053 my $c = $t->collation;
2056 my $table = $c->alignment_table;
2057 ok( $c->has_cached_table, "Alignment table was cached" );
2058 is( $c->alignment_table, $table, "Cached table returned upon second call" );
2059 $c->calculate_ranks;
2060 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
2061 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2062 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2063 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2064 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
2070 sub calculate_ranks {
2072 # Save the existing ranks, in case we need to invalidate the cached SVG.
2073 throw( "Cannot calculate ranks on a non-linear graph" )
2074 unless $self->linear;
2076 map { $existing_ranks{$_} = $_->rank } $self->readings;
2078 # Do the rankings based on the relationship equivalence graph, starting
2079 # with the start node.
2080 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2082 # Transfer our rankings from the topological graph to the real one.
2083 foreach my $r ( $self->readings ) {
2084 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
2085 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
2087 # Die. Find the last rank we calculated.
2088 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2089 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
2091 my $last = pop @all_defined;
2092 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
2095 # Do we need to invalidate the cached data?
2096 if( $self->has_cached_table ) {
2097 foreach my $r ( $self->readings ) {
2098 next if defined( $existing_ranks{$r} )
2099 && $existing_ranks{$r} == $r->rank;
2100 # Something has changed, so clear the cache
2101 $self->_clear_cache;
2102 # ...and recalculate the common readings.
2103 $self->calculate_common_readings();
2107 # The graph calculation information is now up to date.
2108 $self->_graphcalc_done(1);
2113 $self->wipe_table if $self->has_cached_table;
2117 =head2 flatten_ranks
2119 A convenience method for parsing collation data. Searches the graph for readings
2120 with the same text at the same rank, and merges any that are found.
2125 my ( $self, %args ) = shift;
2126 my %unique_rank_rdg;
2128 foreach my $p ( $self->identical_readings( %args ) ) {
2129 # say STDERR "Combining readings at same rank: @$p";
2131 $self->merge_readings( @$p );
2132 # TODO see if this now makes a common point.
2134 # If we merged readings, the ranks are still fine but the alignment
2135 # table is wrong. Wipe it.
2136 $self->wipe_table() if $changed;
2139 =head2 identical_readings
2140 =head2 identical_readings( start => $startnode, end => $endnode )
2141 =head2 identical_readings( startrank => $startrank, endrank => $endrank )
2143 Goes through the graph identifying all pairs of readings that appear to be
2144 identical, and therefore able to be merged into a single reading. Returns the
2145 relevant identical pairs. Can be restricted to run over only a part of the
2146 graph, specified either by node or by rank.
2150 sub identical_readings {
2151 my ( $self, %args ) = @_;
2152 # Find where we should start and end.
2153 my $startrank = $args{startrank} || 0;
2154 if( $args{start} ) {
2155 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2156 && $self->reading( $args{start} )->has_rank;
2157 $startrank = $self->reading( $args{start} )->rank;
2159 my $endrank = $args{endrank} || $self->end->rank;
2161 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2162 && $self->reading( $args{end} )->has_rank;
2163 $endrank = $self->reading( $args{end} )->rank;
2166 # Make sure the ranks are correct.
2167 unless( $self->_graphcalc_done ) {
2168 $self->calculate_ranks;
2170 # Go through the readings looking for duplicates.
2171 my %unique_rank_rdg;
2173 foreach my $rdg ( $self->readings ) {
2174 next unless $rdg->has_rank;
2175 my $rk = $rdg->rank;
2176 next if $rk > $endrank || $rk < $startrank;
2177 my $key = $rk . "||" . $rdg->text;
2178 if( exists $unique_rank_rdg{$key} ) {
2179 # Make sure they don't have different grammatical forms
2180 my $ur = $unique_rank_rdg{$key};
2181 if( $rdg->is_identical( $ur ) ) {
2182 push( @pairs, [ $ur, $rdg ] );
2185 $unique_rank_rdg{$key} = $rdg;
2193 =head2 calculate_common_readings
2195 Goes through the graph identifying the readings that appear in every witness
2196 (apart from those with lacunae at that spot.) Marks them as common and returns
2201 use Text::Tradition;
2203 my $cxfile = 't/data/Collatex-16.xml';
2204 my $t = Text::Tradition->new(
2206 'input' => 'CollateX',
2209 my $c = $t->collation;
2211 my @common = $c->calculate_common_readings();
2212 is( scalar @common, 8, "Found correct number of common readings" );
2213 my @marked = sort $c->common_readings();
2214 is( scalar @common, 8, "All common readings got marked as such" );
2215 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
2216 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2222 sub calculate_common_readings {
2225 map { $_->is_common( 0 ) } $self->readings;
2226 # Implicitly calls calculate_ranks
2227 my $table = $self->alignment_table;
2228 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
2229 my @row = map { $_->{'tokens'}->[$idx]
2230 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2231 @{$table->{'alignment'}};
2233 foreach my $r ( @row ) {
2235 $hash{$r->id} = $r unless $r->is_meta;
2237 $hash{'UNDEF'} = $r;
2240 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2241 my( $r ) = values %hash;
2243 push( @common, $r );
2249 =head2 text_from_paths
2251 Calculate the text array for all witnesses from the path, for later consistency
2252 checking. Only to be used if there is no non-graph-based way to know the
2257 sub text_from_paths {
2259 foreach my $wit ( $self->tradition->witnesses ) {
2260 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2262 foreach my $r ( @readings ) {
2263 next if $r->is_meta;
2264 push( @text, $r->text );
2266 $wit->text( \@text );
2267 if( $wit->is_layered ) {
2268 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2269 $wit->sigil.$self->ac_label );
2271 foreach my $r ( @ucrdgs ) {
2272 next if $r->is_meta;
2273 push( @uctext, $r->text );
2275 $wit->layertext( \@uctext );
2280 =head1 UTILITY FUNCTIONS
2282 =head2 common_predecessor( $reading_a, $reading_b )
2284 Find the last reading that occurs in sequence before both the given readings.
2285 At the very least this should be $self->start.
2287 =head2 common_successor( $reading_a, $reading_b )
2289 Find the first reading that occurs in sequence after both the given readings.
2290 At the very least this should be $self->end.
2294 use Text::Tradition;
2296 my $cxfile = 't/data/Collatex-16.xml';
2297 my $t = Text::Tradition->new(
2299 'input' => 'CollateX',
2302 my $c = $t->collation;
2304 is( $c->common_predecessor( 'n24', 'n23' )->id,
2305 'n20', "Found correct common predecessor" );
2306 is( $c->common_successor( 'n24', 'n23' )->id,
2307 '__END__', "Found correct common successor" );
2309 is( $c->common_predecessor( 'n19', 'n17' )->id,
2310 'n16', "Found correct common predecessor for readings on same path" );
2311 is( $c->common_successor( 'n21', 'n10' )->id,
2312 '__END__', "Found correct common successor for readings on same path" );
2318 ## Return the closest reading that is a predecessor of both the given readings.
2319 sub common_predecessor {
2321 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2322 return $self->_common_in_path( $r1, $r2, 'predecessors' );
2325 sub common_successor {
2327 my( $r1, $r2 ) = $self->_objectify_args( @_ );
2328 return $self->_common_in_path( $r1, $r2, 'successors' );
2332 # TODO think about how to do this without ranks...
2333 sub _common_in_path {
2334 my( $self, $r1, $r2, $dir ) = @_;
2335 my $iter = $self->end->rank;
2337 my @last_r1 = ( $r1 );
2338 my @last_r2 = ( $r2 );
2339 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
2341 # say STDERR "Finding common $dir for $r1, $r2";
2342 while( !@candidates ) {
2343 last unless $iter--; # Avoid looping infinitely
2344 # Iterate separately down the graph from r1 and r2
2345 my( @new_lc1, @new_lc2 );
2346 foreach my $lc ( @last_r1 ) {
2347 foreach my $p ( $lc->$dir ) {
2348 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
2349 # say STDERR "Path candidate $p from $lc";
2350 push( @candidates, $p );
2351 } elsif( !$all_seen{$p->id} ) {
2352 $all_seen{$p->id} = 'r1';
2353 push( @new_lc1, $p );
2357 foreach my $lc ( @last_r2 ) {
2358 foreach my $p ( $lc->$dir ) {
2359 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
2360 # say STDERR "Path candidate $p from $lc";
2361 push( @candidates, $p );
2362 } elsif( !$all_seen{$p->id} ) {
2363 $all_seen{$p->id} = 'r2';
2364 push( @new_lc2, $p );
2368 @last_r1 = @new_lc1;
2369 @last_r2 = @new_lc2;
2371 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2372 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2376 Text::Tradition::Error->throw(
2377 'ident' => 'Collation error',
2383 __PACKAGE__->meta->make_immutable;
2389 =item * Rework XML serialization in a more modular way
2395 This package is free software and is provided "as is" without express
2396 or implied warranty. You can redistribute it and/or modify it under
2397 the same terms as Perl itself.
2401 Tara L Andrews E<lt>aurum@cpan.orgE<gt>