1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
10 use Text::Tradition::Collation::Data;
11 use Text::Tradition::Collation::Reading;
12 use Text::Tradition::Collation::RelationshipStore;
13 use Text::Tradition::Error;
14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
16 use XML::LibXML::XPathContext;
20 isa => 'Text::Tradition::Collation::Data',
58 isa => 'Text::Tradition',
59 writer => '_set_tradition',
65 Text::Tradition::Collation - a software model for a text collation
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
80 my $svg_variant_graph = $t->collation->as_svg();
84 Text::Tradition is a library for representation and analysis of collated
85 texts, particularly medieval ones. The Collation is the central feature of
86 a Tradition, where the text, its sequence of readings, and its relationships
87 between readings are actually kept.
93 The constructor. Takes a hash or hashref of the following arguments:
97 =item * tradition - The Text::Tradition object to which the collation
100 =item * linear - Whether the collation should be linear; that is, whether
101 transposed readings should be treated as two linked readings rather than one,
102 and therefore whether the collation graph is acyclic. Defaults to true.
104 =item * baselabel - The default label for the path taken by a base text
105 (if any). Defaults to 'base text'.
107 =item * wit_list_separator - The string to join a list of witnesses for
108 purposes of making labels in display graphs. Defaults to ', '.
110 =item * ac_label - The extra label to tack onto a witness sigil when
111 representing another layer of path for the given witness - that is, when
112 a text has more than one possible reading due to scribal corrections or
113 the like. Defaults to ' (a.c.)'.
115 =item * wordsep - The string used to separate words in the original text.
126 =head2 wit_list_separator
134 Simple accessors for collation attributes.
138 The meta-reading at the start of every witness path.
142 The meta-reading at the end of every witness path.
146 Returns all Reading objects in the graph.
148 =head2 reading( $id )
150 Returns the Reading object corresponding to the given ID.
152 =head2 add_reading( $reading_args )
154 Adds a new reading object to the collation.
155 See L<Text::Tradition::Collation::Reading> for the available arguments.
157 =head2 del_reading( $object_or_id )
159 Removes the given reading from the collation, implicitly removing its
160 paths and relationships.
162 =head2 merge_readings( $main, $second, $concatenate, $with_str )
164 Merges the $second reading into the $main one. If $concatenate is true, then
165 the merged node will carry the text of both readings, concatenated with either
166 $with_str (if specified) or a sensible default (the empty string if the
167 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
169 The first two arguments may be either readings or reading IDs.
171 =head2 has_reading( $id )
173 Predicate to see whether a given reading ID is in the graph.
175 =head2 reading_witnesses( $object_or_id )
177 Returns a list of sigils whose witnesses contain the reading.
181 Returns all reading paths within the document - that is, all edges in the
182 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
184 =head2 add_path( $source, $target, $sigil )
186 Links the given readings in the collation in sequence, under the given witness
187 sigil. The readings may be specified by object or ID.
189 =head2 del_path( $source, $target, $sigil )
191 Links the given readings in the collation in sequence, under the given witness
192 sigil. The readings may be specified by object or ID.
194 =head2 has_path( $source, $target );
196 Returns true if the two readings are linked in sequence in any witness.
197 The readings may be specified by object or ID.
201 Returns all Relationship objects in the collation.
203 =head2 add_relationship( $reading, $other_reading, $options )
205 Adds a new relationship of the type given in $options between the two readings,
206 which may be specified by object or ID. Returns a value of ( $status, @vectors)
207 where $status is true on success, and @vectors is a list of relationship edges
208 that were ultimately added.
209 See L<Text::Tradition::Collation::Relationship> for the available options.
214 my ( $class, @args ) = @_;
215 my %args = @args == 1 ? %{ $args[0] } : @args;
216 # TODO determine these from the Moose::Meta object
217 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218 linear wordsep start end cached_table _graphcalc_done);
220 for my $attr (@delegate_attrs) {
221 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
223 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
229 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
230 $self->_set_start( $self->add_reading(
231 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232 $self->_set_end( $self->add_reading(
233 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
236 sub register_relationship_type {
238 my %args = @_ == 1 ? %{$_[0]} : @_;
239 if( $self->relations->has_type( $args{name} ) ) {
240 throw( 'Relationship type ' . $args{name} . ' already registered' );
242 $self->relations->add_type( %args );
245 ### Reading construct/destruct functions
248 my( $self, $reading ) = @_;
249 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
250 my %args = %$reading;
251 if( $args{'init'} ) {
252 # If we are initializing an empty collation, don't assume that we
253 # have set a tradition.
254 delete $args{'init'};
255 } elsif( $self->tradition->can('language') && $self->tradition->has_language
256 && !exists $args{'language'} ) {
257 $args{'language'} = $self->tradition->language;
259 $reading = Text::Tradition::Collation::Reading->new(
260 'collation' => $self,
263 # First check to see if a reading with this ID exists.
264 if( $self->reading( $reading->id ) ) {
265 throw( "Collation already has a reading with id " . $reading->id );
267 $self->_graphcalc_done(0);
268 $self->_add_reading( $reading->id => $reading );
269 # Once the reading has been added, put it in both graphs.
270 $self->sequence->add_vertex( $reading->id );
271 $self->relations->add_reading( $reading->id );
275 around del_reading => sub {
280 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
283 # Remove the reading from the graphs.
284 $self->_graphcalc_done(0);
285 $self->_clear_cache; # Explicitly clear caches to GC the reading
286 $self->sequence->delete_vertex( $arg );
287 $self->relations->delete_reading( $arg );
290 $self->$orig( $arg );
297 my $cxfile = 't/data/Collatex-16.xml';
298 my $t = Text::Tradition->new(
300 'input' => 'CollateX',
303 my $c = $t->collation;
305 my $rno = scalar $c->readings;
306 # Split n21 for testing purposes
307 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
308 my $old_r = $c->reading( 'n21' );
309 $old_r->alter_text( 'to' );
310 $c->del_path( 'n20', 'n21', 'A' );
311 $c->add_path( 'n20', 'n21p0', 'A' );
312 $c->add_path( 'n21p0', 'n21', 'A' );
314 ok( $c->reading( 'n21p0' ), "New reading exists" );
315 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
317 # Combine n3 and n4 ( with his )
318 $c->merge_readings( 'n3', 'n4', 1 );
319 ok( !$c->reading('n4'), "Reading n4 is gone" );
320 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
322 # Collapse n9 and n10 ( rood / root )
323 $c->merge_readings( 'n9', 'n10' );
324 ok( !$c->reading('n10'), "Reading n10 is gone" );
325 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
327 # Combine n21 and n21p0
328 my $remaining = $c->reading('n21');
329 $remaining ||= $c->reading('n22'); # one of these should still exist
330 $c->merge_readings( 'n21p0', $remaining, 1 );
331 ok( !$c->reading('n21'), "Reading $remaining is gone" );
332 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
342 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
343 my $mergemeta = $kept_obj->is_meta;
344 throw( "Cannot merge meta and non-meta reading" )
345 unless ( $mergemeta && $del_obj->is_meta )
346 || ( !$mergemeta && !$del_obj->is_meta );
348 throw( "Cannot merge with start or end node" )
349 if( $kept_obj eq $self->start || $kept_obj eq $self->end
350 || $del_obj eq $self->start || $del_obj eq $self->end );
351 throw( "Cannot combine text of meta readings" ) if $combine;
353 # We only need the IDs for adding paths to the graph, not the reading
354 # objects themselves.
355 my $kept = $kept_obj->id;
356 my $deleted = $del_obj->id;
357 $self->_graphcalc_done(0);
359 # The kept reading should inherit the paths and the relationships
360 # of the deleted reading.
361 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
362 my @vector = ( $kept );
363 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
364 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
365 next if $vector[0] eq $vector[1]; # Don't add a self loop
366 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
367 $self->sequence->add_edge( @vector );
368 my $fwits = $self->sequence->get_edge_attributes( @vector );
369 @wits{keys %$fwits} = values %$fwits;
370 $self->sequence->set_edge_attributes( @vector, \%wits );
372 $self->relations->merge_readings( $kept, $deleted, $combine );
374 # Do the deletion deed.
376 # Combine the text of the readings
377 my $joinstr = $combine_char;
378 unless( defined $joinstr ) {
379 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
380 $joinstr = $self->wordsep unless defined $joinstr;
382 $kept_obj->_combine( $del_obj, $joinstr );
384 $self->del_reading( $deleted );
387 =head2 compress_readings
389 Where possible in the graph, compresses plain sequences of readings into a
390 single reading. The sequences must consist of readings with no
391 relationships to other readings, with only a single witness path between
392 them and no other witness paths from either that would skip the other. The
393 readings must also not be marked as nonsense or bad grammar.
395 WARNING: This operation cannot be undone.
399 sub compress_readings {
401 # Anywhere in the graph that there is a reading that joins only to a single
402 # successor, and neither of these have any relationships, just join the two
404 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
405 # Now look for readings that can be joined to their successors.
406 next unless $rdg->is_combinable;
408 while( $self->sequence->successors( $rdg ) == 1 ) {
409 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
410 throw( "Infinite loop" ) if $seen{$next->id};
411 $seen{$next->id} = 1;
412 last if $self->sequence->predecessors( $next ) > 1;
413 last unless $next->is_combinable;
414 say "Joining readings $rdg and $next";
415 $self->merge_readings( $rdg, $next, 1 );
418 # Make sure we haven't screwed anything up
419 foreach my $wit ( $self->tradition->witnesses ) {
420 my $pathtext = $self->path_text( $wit->sigil );
421 my $origtext = join( ' ', @{$wit->text} );
422 throw( "Text differs for witness " . $wit->sigil )
423 unless $pathtext eq $origtext;
424 if( $wit->is_layered ) {
425 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
426 $origtext = join( ' ', @{$wit->layertext} );
427 throw( "Ante-corr text differs for witness " . $wit->sigil )
428 unless $pathtext eq $origtext;
432 $self->relations->rebuild_equivalence();
433 $self->calculate_ranks();
436 # Helper function for manipulating the graph.
437 sub _stringify_args {
438 my( $self, $first, $second, @args ) = @_;
440 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
441 $second = $second->id
442 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
443 return( $first, $second, @args );
446 # Helper function for manipulating the graph.
447 sub _objectify_args {
448 my( $self, $first, $second, $arg ) = @_;
449 $first = $self->reading( $first )
450 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
451 $second = $self->reading( $second )
452 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
453 return( $first, $second, $arg );
456 =head2 duplicate_reading( $reading, @witlist )
458 Split the given reading into two, so that the new reading is in the path for
459 the witnesses given in @witlist. If the result is that certain non-colocated
460 relationships (e.g. transpositions) are no longer valid, these will be removed.
461 Returns the newly-created reading.
467 my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
468 is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
469 ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
471 my $sc = $st->collation;
473 ok( $sc->reading('n131'), "Tradition has the affected reading" );
474 is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
475 is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
477 # Detach the erroneously collated reading
478 my $newr = $sc->duplicate_reading( 'n131', 'Ba96' );
479 ok( $newr, "New reading was created" );
480 ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
481 is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
482 is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
484 # Check that the bad transposition is gone
485 is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
488 ok( $sc->add_relationship( 'n124', 'n131_0', { type => 'collated', scope => 'local' } ),
489 "Collated the readings correctly" );
490 $sc->calculate_ranks();
491 $sc->flatten_ranks();
492 is( $sc->end->rank, 11, "The ranks shifted appropriately" );
493 is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
499 sub duplicate_reading {
500 my( $self, $r, @wits ) = @_;
501 # Add the new reading, duplicating $r.
502 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
503 $r = $self->reading( $r );
505 throw( "Cannot duplicate a meta-reading" )
508 # Get all the reading attributes and duplicate them.
509 my $rmeta = Text::Tradition::Collation::Reading->meta;
511 foreach my $attr( $rmeta->get_all_attributes ) {
512 next if $attr->name =~ /^_/;
513 my $acc = $attr->get_read_method;
514 if( !$acc && $attr->has_applied_traits ) {
515 my $tr = $attr->applied_traits;
516 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
518 my %methods = reverse %{$attr->handles};
519 $acc = $methods{elements};
520 $args{$attr->name} = $which eq 'Array'
521 ? [ $r->$acc ] : { $r->$acc };
524 $args{$attr->name} = $r->$acc if $acc;
527 # By definition the new reading will no longer be common.
528 $args{is_common} = 0;
529 # The new reading also needs its own ID.
530 $args{id} = $self->_generate_dup_id( $r->id );
532 # Try to make the new reading.
533 my $newr = $self->add_reading( \%args );
534 # The old reading is also no longer common.
537 # For each of the witnesses, dissociate from the old reading and
538 # associate with the new.
539 foreach my $wit ( @wits ) {
540 my $prior = $self->prior_reading( $r, $wit );
541 my $next = $self->next_reading( $r, $wit );
542 $self->del_path( $prior, $r, $wit );
543 $self->add_path( $prior, $newr, $wit );
544 $self->del_path( $r, $next, $wit );
545 $self->add_path( $newr, $next, $wit );
548 # Hash the reading ranks and find the closest common successor to our
552 if( $self->end->has_rank ) {
553 $succ = $self->common_successor( $r, $newr );
554 foreach my $rdg ( $self->readings ) {
555 $rrk{$rdg->id} = $rdg->rank;
559 # Rebuild the equivalence graph and calculate the new ranks
560 $self->relations->rebuild_equivalence();
561 $self->calculate_ranks();
563 # Check for invalid non-colocated relationships among changed-rank readings
564 # from where the ranks start changing up to $succ
565 if( $self->end->has_rank ) {
566 my $lastrank = $succ->rank;
567 foreach my $rdg ( $self->readings ) {
568 next if $rdg->rank > $lastrank;
569 next if $rdg->rank == $rrk{$rdg->id};
570 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
571 next unless @noncolo;
572 foreach my $nc ( @noncolo ) {
573 $self->relations->verify_or_delete( $rdg, $nc );
580 sub _generate_dup_id {
581 my( $self, $rid ) = @_;
586 if( $self->has_reading( $newid ) ) {
599 # We only need the IDs for adding paths to the graph, not the reading
600 # objects themselves.
601 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
603 $self->_graphcalc_done(0);
604 # Connect the readings
605 unless( $self->sequence->has_edge( $source, $target ) ) {
606 $self->sequence->add_edge( $source, $target );
607 $self->relations->add_equivalence_edge( $source, $target );
609 # Note the witness in question
610 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
616 if( ref( $_[0] ) eq 'ARRAY' ) {
623 # We only need the IDs for removing paths from the graph, not the reading
624 # objects themselves.
625 my( $source, $target, $wit ) = $self->_stringify_args( @args );
627 $self->_graphcalc_done(0);
628 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
629 $self->sequence->delete_edge_attribute( $source, $target, $wit );
631 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
632 $self->sequence->delete_edge( $source, $target );
633 $self->relations->delete_equivalence_edge( $source, $target );
638 # Extra graph-alike utility
641 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
642 return undef unless $self->sequence->has_edge( $source, $target );
643 return $self->sequence->has_edge_attribute( $source, $target, $wit );
646 =head2 clear_witness( @sigil_list )
648 Clear the given witnesses out of the collation entirely, removing references
649 to them in paths, and removing readings that belong only to them. Should only
650 be called via $tradition->del_witness.
655 my( $self, @sigils ) = @_;
657 $self->_graphcalc_done(0);
658 # Clear the witness(es) out of the paths
659 foreach my $e ( $self->paths ) {
660 foreach my $sig ( @sigils ) {
661 $self->del_path( $e, $sig );
665 # Clear out the newly unused readings
666 foreach my $r ( $self->readings ) {
667 unless( $self->reading_witnesses( $r ) ) {
668 $self->del_reading( $r );
673 sub add_relationship {
675 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
676 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
677 foreach my $v ( @vectors ) {
678 next unless $self->get_relationship( $v )->colocated;
679 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
680 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
681 $self->_graphcalc_done(0);
689 around qw/ get_relationship del_relationship / => sub {
693 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
696 my( $source, $target ) = $self->_stringify_args( @args );
697 $self->$orig( $source, $target );
700 =head2 reading_witnesses( $reading )
702 Return a list of sigils corresponding to the witnesses in which the reading appears.
706 sub reading_witnesses {
707 my( $self, $reading ) = @_;
708 # We need only check either the incoming or the outgoing edges; I have
709 # arbitrarily chosen "incoming". Thus, special-case the start node.
710 if( $reading eq $self->start ) {
711 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
714 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
715 my $wits = $self->sequence->get_edge_attributes( @$e );
716 @all_witnesses{ keys %$wits } = 1;
718 my $acstr = $self->ac_label;
719 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
720 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
722 return keys %all_witnesses;
725 =head1 OUTPUT METHODS
727 =head2 as_svg( \%options )
729 Returns an SVG string that represents the graph, via as_dot and graphviz.
730 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
735 my( $self, $opts ) = @_;
736 throw( "Need GraphViz installed to output SVG" )
737 unless File::Which::which( 'dot' );
738 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
739 $self->calculate_ranks()
740 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
741 my @cmd = qw/dot -Tsvg/;
743 my $dotfile = File::Temp->new();
745 # $dotfile->unlink_on_destroy(0);
746 binmode $dotfile, ':utf8';
747 print $dotfile $self->as_dot( $opts );
748 push( @cmd, $dotfile->filename );
749 run( \@cmd, ">", binary(), \$svg );
750 $svg = decode_utf8( $svg );
755 =head2 as_dot( \%options )
757 Returns a string that is the collation graph expressed in dot
758 (i.e. GraphViz) format. Options include:
773 my( $self, $opts ) = @_;
774 my $startrank = $opts->{'from'} if $opts;
775 my $endrank = $opts->{'to'} if $opts;
776 my $color_common = $opts->{'color_common'} if $opts;
777 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
778 && $self->end->rank > 100;
779 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
781 # Check the arguments
783 return if $endrank && $startrank > $endrank;
784 return if $startrank > $self->end->rank;
786 if( defined $endrank ) {
787 return if $endrank < 0;
788 $endrank = undef if $endrank == $self->end->rank;
791 my $graph_name = $self->tradition->name;
792 $graph_name =~ s/[^\w\s]//g;
793 $graph_name = join( '_', split( /\s+/, $graph_name ) );
801 'fillcolor' => 'white',
806 'arrowhead' => 'open',
807 'color' => '#000000',
808 'fontcolor' => '#000000',
811 my $dot = sprintf( "digraph %s {\n", $graph_name );
812 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
813 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
815 # Output substitute start/end readings if necessary
817 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
820 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
822 if( $STRAIGHTENHACK ) {
824 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
825 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
826 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
828 my %used; # Keep track of the readings that actually appear in the graph
829 # Sort the readings by rank if we have ranks; this speeds layout.
830 my @all_readings = $self->end->has_rank
831 ? sort { $a->rank <=> $b->rank } $self->readings
833 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
834 foreach my $reading ( @all_readings ) {
835 # Only output readings within our rank range.
836 next if $startrank && $reading->rank < $startrank;
837 next if $endrank && $reading->rank > $endrank;
838 $used{$reading->id} = 1;
839 # Need not output nodes without separate labels
840 next if $reading->id eq $reading->text;
842 my $label = $reading->text;
843 $label .= '-' if $reading->join_next;
844 $label = "-$label" if $reading->join_prior;
845 $label =~ s/\"/\\\"/g;
846 $rattrs->{'label'} = $label;
847 $rattrs->{'id'} = $reading->id;
848 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
849 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
852 # Add the real edges. Need to weight one edge per rank jump, in a
854 # my $weighted = $self->_add_edge_weights;
855 my @edges = $self->paths;
856 my( %substart, %subend );
857 foreach my $edge ( @edges ) {
858 # Do we need to output this edge?
859 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
860 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
861 my $variables = { %edge_attrs, 'label' => $label };
863 # Account for the rank gap if necessary
864 my $rank0 = $self->reading( $edge->[0] )->rank
865 if $self->reading( $edge->[0] )->has_rank;
866 my $rank1 = $self->reading( $edge->[1] )->rank
867 if $self->reading( $edge->[1] )->has_rank;
868 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
869 $variables->{'minlen'} = $rank1 - $rank0;
872 # Add the calculated edge weights
873 # if( exists $weighted->{$edge->[0]}
874 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
875 # # $variables->{'color'} = 'red';
876 # $variables->{'weight'} = 3.0;
879 # EXPERIMENTAL: make edge width reflect no. of witnesses
880 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
881 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
883 my $varopts = _dot_attr_string( $variables );
884 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
885 $edge->[0], $edge->[1], $varopts );
886 } elsif( $used{$edge->[0]} ) {
887 $subend{$edge->[0]} = $edge->[1];
888 } elsif( $used{$edge->[1]} ) {
889 $substart{$edge->[1]} = $edge->[0];
893 # If we are asked to, add relationship links
894 if( exists $opts->{show_relations} ) {
895 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
896 if( $filter eq 'transposition' ) {
897 $filter =~ qr/^transposition$/;
899 foreach my $redge ( $self->relationships ) {
900 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
901 if( $filter ne 'all' ) {
902 my $rel = $self->get_relationship( $redge );
903 next unless $rel->type =~ /$filter/;
907 constraint => 'false',
908 label => uc( substr( $rel->type, 0, 4 ) ),
911 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
912 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
918 # Add substitute start and end edges if necessary
919 foreach my $node ( keys %substart ) {
920 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
921 my $variables = { %edge_attrs, 'label' => $witstr };
922 my $nrdg = $self->reading( $node );
923 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
924 # Substart is actually one lower than $startrank
925 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
927 my $varopts = _dot_attr_string( $variables );
928 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
930 foreach my $node ( keys %subend ) {
931 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
932 my $variables = { %edge_attrs, 'label' => $witstr };
933 my $varopts = _dot_attr_string( $variables );
934 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
937 if( $STRAIGHTENHACK ) {
938 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
939 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
946 sub _dot_attr_string {
949 foreach my $k ( sort keys %$hash ) {
951 push( @attrs, $k.'="'.$v.'"' );
953 return( '[ ' . join( ', ', @attrs ) . ' ]' );
956 sub _add_edge_weights {
958 # Walk the graph from START to END, choosing the successor node with
959 # the largest number of witness paths each time.
961 my $curr = $self->start->id;
962 my $ranked = $self->end->has_rank;
963 while( $curr ne $self->end->id ) {
964 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
965 my @succ = sort { $self->path_witnesses( $curr, $a )
966 <=> $self->path_witnesses( $curr, $b ) }
967 $self->sequence->successors( $curr );
968 my $next = pop @succ;
969 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
970 # Try to avoid lacunae in the weighted path.
972 ( $self->reading( $next )->is_lacuna ||
973 $nextrank - $rank > 1 ) ){
976 $weighted->{$curr} = $next;
982 =head2 path_witnesses( $edge )
984 Returns the list of sigils whose witnesses are associated with the given edge.
985 The edge can be passed as either an array or an arrayref of ( $source, $target ).
990 my( $self, @edge ) = @_;
991 # If edge is an arrayref, cope.
992 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
996 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
1000 # Helper function. Make a display label for the given witnesses, showing a.c.
1001 # witnesses only where the main witness is not also in the list.
1002 sub _path_display_label {
1005 map { $wits{$_} = 1 } @_;
1007 # If an a.c. wit is listed, remove it if the main wit is also listed.
1008 # Otherwise keep it for explicit listing.
1009 my $aclabel = $self->ac_label;
1011 foreach my $w ( sort keys %wits ) {
1012 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1013 if( exists $wits{$1} ) {
1016 push( @disp_ac, $w );
1021 # See if we are in a majority situation.
1022 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1023 $maj = $maj > 5 ? $maj : 5;
1024 if( scalar keys %wits > $maj ) {
1025 unshift( @disp_ac, 'majority' );
1026 return join( ', ', @disp_ac );
1028 return join( ', ', sort keys %wits );
1032 =head2 readings_at_rank( $rank )
1034 Returns a list of readings at a given rank, taken from the alignment table.
1038 sub readings_at_rank {
1039 my( $self, $rank ) = @_;
1040 my $table = $self->alignment_table;
1041 # Table rank is real rank - 1.
1042 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1044 foreach my $e ( @elements ) {
1045 next unless ref( $e ) eq 'HASH';
1046 next unless exists $e->{'t'};
1047 $readings{$e->{'t'}->id} = $e->{'t'};
1049 return values %readings;
1054 Returns a GraphML representation of the collation. The GraphML will contain
1055 two graphs. The first expresses the attributes of the readings and the witness
1056 paths that link them; the second expresses the relationships that link the
1057 readings. This is the native transfer format for a tradition.
1061 use Text::Tradition;
1067 my $datafile = 't/data/florilegium_tei_ps.xml';
1068 my $tradition = Text::Tradition->new( 'input' => 'TEI',
1070 'file' => $datafile,
1073 ok( $tradition, "Got a tradition object" );
1074 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1075 ok( $tradition->collation, "Tradition has a collation" );
1077 my $c = $tradition->collation;
1078 is( scalar $c->readings, $READINGS, "Collation has all readings" );
1079 is( scalar $c->paths, $PATHS, "Collation has all paths" );
1080 is( scalar $c->relationships, 0, "Collation has all relationships" );
1082 # Add a few relationships
1083 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1084 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1085 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
1087 # Now write it to GraphML and parse it again.
1089 my $graphml = $c->as_graphml;
1090 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1091 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1092 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1093 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1095 # Now add a stemma, write to GraphML, and look at the output.
1097 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
1098 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1099 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1100 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1101 $graphml = $c->as_graphml;
1102 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1109 ## TODO MOVE this to Tradition.pm and modularize it better
1111 my( $self, $options ) = @_;
1112 $self->calculate_ranks unless $self->_graphcalc_done;
1114 my $start = $options->{'from'}
1115 ? $self->reading( $options->{'from'} ) : $self->start;
1116 my $end = $options->{'to'}
1117 ? $self->reading( $options->{'to'} ) : $self->end;
1118 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1119 throw( 'Start node must be before end node' );
1121 # The readings need to be ranked for this to work.
1122 $start = $self->start unless $start->has_rank;
1123 $end = $self->end unless $end->has_rank;
1125 unless( $start eq $self->start ) {
1126 $rankoffset = $start->rank - 1;
1131 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1132 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1133 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
1134 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
1136 # Create the document and root node
1137 require XML::LibXML;
1138 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1139 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1140 $graphml->setDocumentElement( $root );
1141 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1142 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1144 # List of attribute types to save on our objects and their corresponding
1149 'Bool' => 'boolean',
1150 'ReadingID' => 'string',
1151 'RelationshipType' => 'string',
1152 'RelationshipScope' => 'string',
1155 # Add the data keys for the graph. Include an extra key 'version' for the
1156 # GraphML output version.
1157 my %graph_data_keys;
1159 my %graph_attributes = ( 'version' => 'string' );
1160 # Graph attributes include those of Tradition and those of Collation.
1162 # TODO Use meta introspection method from duplicate_reading to do this
1163 # instead of naming custom keys.
1164 my $tmeta = $self->tradition->meta;
1165 my $cmeta = $self->meta;
1166 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1167 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1168 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1169 next if $attr->name =~ /^_/;
1170 next unless $save_types{$attr->type_constraint->name};
1171 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1173 # Extra custom keys for complex objects that should be saved in some form.
1174 # The subroutine should return a string, or undef/empty.
1175 if( $tmeta->has_method('stemmata') ) {
1176 $graph_attributes{'stemmata'} = sub {
1178 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1179 $self->tradition->stemmata;
1180 join( "\n", @stemstrs );
1184 if( $tmeta->has_method('user') ) {
1185 $graph_attributes{'user'} = sub {
1186 $self->tradition->user ? $self->tradition->user->id : undef
1190 foreach my $datum ( sort keys %graph_attributes ) {
1191 $graph_data_keys{$datum} = 'dg'.$gdi++;
1192 my $key = $root->addNewChild( $graphml_ns, 'key' );
1193 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1194 : $graph_attributes{$datum};
1195 $key->setAttribute( 'attr.name', $datum );
1196 $key->setAttribute( 'attr.type', $dtype );
1197 $key->setAttribute( 'for', 'graph' );
1198 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1201 # Add the data keys for reading nodes
1202 my %reading_attributes;
1203 my $rmeta = Text::Tradition::Collation::Reading->meta;
1204 foreach my $attr( $rmeta->get_all_attributes ) {
1205 next if $attr->name =~ /^_/;
1206 next unless $save_types{$attr->type_constraint->name};
1207 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1209 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1210 # Extra custom key for the reading morphology
1211 $reading_attributes{'lexemes'} = 'string';
1216 foreach my $datum ( sort keys %reading_attributes ) {
1217 $node_data_keys{$datum} = 'dn'.$ndi++;
1218 my $key = $root->addNewChild( $graphml_ns, 'key' );
1219 $key->setAttribute( 'attr.name', $datum );
1220 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1221 $key->setAttribute( 'for', 'node' );
1222 $key->setAttribute( 'id', $node_data_keys{$datum} );
1225 # Add the data keys for edges, that is, paths and relationships. Path
1226 # data does not come from a Moose class so is here manually.
1229 my %edge_attributes = (
1230 witness => 'string', # ID/label for a path
1231 extra => 'boolean', # Path key
1233 my @path_attributes = keys %edge_attributes; # track our manual additions
1234 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1235 foreach my $attr( $pmeta->get_all_attributes ) {
1236 next if $attr->name =~ /^_/;
1237 next unless $save_types{$attr->type_constraint->name};
1238 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1240 foreach my $datum ( sort keys %edge_attributes ) {
1241 $edge_data_keys{$datum} = 'de'.$edi++;
1242 my $key = $root->addNewChild( $graphml_ns, 'key' );
1243 $key->setAttribute( 'attr.name', $datum );
1244 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1245 $key->setAttribute( 'for', 'edge' );
1246 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1249 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1250 my $xmlidname = $self->tradition->name;
1251 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1252 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1253 $xmlidname = '_'.$xmlidname;
1255 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1256 $sgraph->setAttribute( 'edgedefault', 'directed' );
1257 $sgraph->setAttribute( 'id', $xmlidname );
1258 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1259 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1260 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1261 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1262 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1264 # Tradition/collation attribute data
1265 foreach my $datum ( keys %graph_attributes ) {
1267 if( $datum eq 'version' ) {
1269 } elsif( ref( $graph_attributes{$datum} ) ) {
1270 my $sub = $graph_attributes{$datum};
1272 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1273 $value = $self->tradition->$datum;
1275 $value = $self->$datum;
1277 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1282 # Add our readings to the graph
1283 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1284 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1285 ( $n->rank < $start->rank || $n->rank > $end->rank );
1286 $use_readings{$n->id} = 1;
1287 # Add to the main graph
1288 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1289 my $node_xmlid = 'n' . $node_ctr++;
1290 $node_hash{ $n->id } = $node_xmlid;
1291 $node_el->setAttribute( 'id', $node_xmlid );
1292 foreach my $d ( keys %reading_attributes ) {
1294 # Custom serialization
1295 if( $d eq 'lexemes' ) {
1296 # If nval is a true value, we have lexemes so we need to
1297 # serialize them. Otherwise set nval to undef so that the
1298 # key is excluded from this reading.
1299 $nval = $nval ? $n->_serialize_lexemes : undef;
1300 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1303 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1304 # Adjust the ranks within the subgraph.
1305 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1306 : $nval - $rankoffset;
1308 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1313 # Add the path edges to the sequence graph
1315 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1316 # We add an edge in the graphml for every witness in $e.
1317 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1318 my @edge_wits = sort $self->path_witnesses( $e );
1319 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1320 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1321 # Skip any path from start to end; that witness is not in the subgraph.
1322 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1323 foreach my $wit ( @edge_wits ) {
1324 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1325 $node_hash{ $e->[0] },
1326 $node_hash{ $e->[1] } );
1327 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1328 $edge_el->setAttribute( 'source', $from );
1329 $edge_el->setAttribute( 'target', $to );
1330 $edge_el->setAttribute( 'id', $id );
1332 # It's a witness path, so add the witness
1334 my $key = $edge_data_keys{'witness'};
1335 # Is this an ante-corr witness?
1336 my $aclabel = $self->ac_label;
1337 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1338 # Keep the base witness
1340 # ...and record that this is an 'extra' reading path
1341 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1343 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1347 # Report the actual number of nodes and edges that went in
1348 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1349 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1351 # Add the relationship graph to the XML
1352 map { delete $edge_data_keys{$_} } @path_attributes;
1353 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1354 $node_data_keys{'id'}, \%edge_data_keys );
1356 # Save and return the thing
1357 my $result = decode_utf8( $graphml->toString(1) );
1361 sub _add_graphml_data {
1362 my( $el, $key, $value ) = @_;
1363 return unless defined $value;
1364 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1365 $data_el->setAttribute( 'key', $key );
1366 $data_el->appendText( $value );
1371 Returns a CSV alignment table representation of the collation graph, one
1372 row per witness (or witness uncorrected.)
1378 my $table = $self->alignment_table;
1379 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1381 # Make the header row
1382 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1383 push( @result, decode_utf8( $csv->string ) );
1384 # Make the rest of the rows
1385 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1386 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1387 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1388 $csv->combine( @row );
1389 push( @result, decode_utf8( $csv->string ) );
1391 return join( "\n", @result );
1394 =head2 alignment_table
1396 Return a reference to an alignment table, in a slightly enhanced CollateX
1397 format which looks like this:
1399 $table = { alignment => [ { witness => "SIGIL",
1400 tokens => [ { t => "TEXT" }, ... ] },
1401 { witness => "SIG2",
1402 tokens => [ { t => "TEXT" }, ... ] },
1404 length => TEXTLEN };
1408 sub alignment_table {
1410 return $self->cached_table if $self->has_cached_table;
1412 # Make sure we can do this
1413 throw( "Need a linear graph in order to make an alignment table" )
1414 unless $self->linear;
1415 $self->calculate_ranks()
1416 unless $self->_graphcalc_done && $self->end->has_rank;
1418 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1419 my @all_pos = ( 1 .. $self->end->rank - 1 );
1420 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1421 # say STDERR "Making witness row(s) for " . $wit->sigil;
1422 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1423 my @row = _make_witness_row( \@wit_path, \@all_pos );
1424 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1425 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1426 push( @{$table->{'alignment'}}, $witobj );
1427 if( $wit->is_layered ) {
1428 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1429 $wit->sigil.$self->ac_label );
1430 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1431 my $witacobj = { 'witness' => $wit->sigil.$self->ac_label,
1432 'tokens' => \@ac_row };
1433 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1434 push( @{$table->{'alignment'}}, $witacobj );
1437 $self->cached_table( $table );
1441 sub _make_witness_row {
1442 my( $path, $positions ) = @_;
1444 map { $char_hash{$_} = undef } @$positions;
1446 foreach my $rdg ( @$path ) {
1447 say STDERR "rank " . $rdg->rank if $debug;
1448 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1449 $char_hash{$rdg->rank} = { 't' => $rdg };
1451 my @row = map { $char_hash{$_} } @$positions;
1452 # Fill in lacuna markers for undef spots in the row
1453 my $last_el = shift @row;
1454 my @filled_row = ( $last_el );
1455 foreach my $el ( @row ) {
1456 # If we are using node reference, make the lacuna node appear many times
1457 # in the table. If not, use the lacuna tag.
1458 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1461 push( @filled_row, $el );
1468 =head1 NAVIGATION METHODS
1470 =head2 reading_sequence( $first, $last, $sigil, $backup )
1472 Returns the ordered list of readings, starting with $first and ending
1473 with $last, for the witness given in $sigil. If a $backup sigil is
1474 specified (e.g. when walking a layered witness), it will be used wherever
1475 no $sigil path exists. If there is a base text reading, that will be
1476 used wherever no path exists for $sigil or $backup.
1480 # TODO Think about returning some lazy-eval iterator.
1481 # TODO Get rid of backup; we should know from what witness is whether we need it.
1483 sub reading_sequence {
1484 my( $self, $start, $end, $witness ) = @_;
1486 $witness = $self->baselabel unless $witness;
1487 my @readings = ( $start );
1490 while( $n && $n->id ne $end->id ) {
1491 if( exists( $seen{$n->id} ) ) {
1492 throw( "Detected loop for $witness at " . $n->id );
1496 my $next = $self->next_reading( $n, $witness );
1498 throw( "Did not find any path for $witness from reading " . $n->id );
1500 push( @readings, $next );
1503 # Check that the last reading is our end reading.
1504 my $last = $readings[$#readings];
1505 throw( "Last reading found from " . $start->text .
1506 " for witness $witness is not the end!" ) # TODO do we get this far?
1507 unless $last->id eq $end->id;
1512 =head2 next_reading( $reading, $sigil );
1514 Returns the reading that follows the given reading along the given witness
1520 # Return the successor via the corresponding path.
1522 my $answer = $self->_find_linked_reading( 'next', @_ );
1523 return undef unless $answer;
1524 return $self->reading( $answer );
1527 =head2 prior_reading( $reading, $sigil )
1529 Returns the reading that precedes the given reading along the given witness
1535 # Return the predecessor via the corresponding path.
1537 my $answer = $self->_find_linked_reading( 'prior', @_ );
1538 return $self->reading( $answer );
1541 sub _find_linked_reading {
1542 my( $self, $direction, $node, $path ) = @_;
1544 # Get a backup if we are dealing with a layered witness
1546 my $aclabel = $self->ac_label;
1547 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1551 my @linked_paths = $direction eq 'next'
1552 ? $self->sequence->edges_from( $node )
1553 : $self->sequence->edges_to( $node );
1554 return undef unless scalar( @linked_paths );
1556 # We have to find the linked path that contains all of the
1557 # witnesses supplied in $path.
1558 my( @path_wits, @alt_path_wits );
1559 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1560 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1563 foreach my $le ( @linked_paths ) {
1564 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1567 my @le_wits = sort $self->path_witnesses( $le );
1568 if( _is_within( \@path_wits, \@le_wits ) ) {
1569 # This is the right path.
1570 return $direction eq 'next' ? $le->[1] : $le->[0];
1571 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1575 # Got this far? Return the alternate path if it exists.
1576 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1579 # Got this far? Return the base path if it exists.
1580 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1583 # Got this far? We have no appropriate path.
1584 warn "Could not find $direction node from " . $node->id
1585 . " along path $path";
1591 my( $set1, $set2 ) = @_;
1592 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1593 foreach my $el ( @$set1 ) {
1594 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1599 # Return the string that joins together a list of witnesses for
1600 # display on a single path.
1601 sub _witnesses_of_label {
1602 my( $self, $label ) = @_;
1603 my $regex = $self->wit_list_separator;
1604 my @answer = split( /\Q$regex\E/, $label );
1608 =head2 common_readings
1610 Returns the list of common readings in the graph (i.e. those readings that are
1611 shared by all non-lacunose witnesses.)
1615 sub common_readings {
1617 my @common = grep { $_->is_common } $self->readings;
1621 =head2 path_text( $sigil, [, $start, $end ] )
1623 Returns the text of a witness (plus its backup, if we are using a layer)
1624 as stored in the collation. The text is returned as a string, where the
1625 individual readings are joined with spaces and the meta-readings (e.g.
1626 lacunae) are omitted. Optional specification of $start and $end allows
1627 the generation of a subset of the witness text.
1632 my( $self, $wit, $start, $end ) = @_;
1633 $start = $self->start unless $start;
1634 $end = $self->end unless $end;
1635 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1638 foreach my $r ( @path ) {
1639 unless ( $r->join_prior || !$last || $last->join_next ) {
1642 $pathtext .= $r->text;
1648 =head1 INITIALIZATION METHODS
1650 These are mostly for use by parsers.
1652 =head2 make_witness_path( $witness )
1654 Link the array of readings contained in $witness->path (and in
1655 $witness->uncorrected_path if it exists) into collation paths.
1656 Clear out the arrays when finished.
1658 =head2 make_witness_paths
1660 Call make_witness_path for all witnesses in the tradition.
1664 # For use when a collation is constructed from a base text and an apparatus.
1665 # We have the sequences of readings and just need to add path edges.
1666 # When we are done, clear out the witness path attributes, as they are no
1668 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1670 sub make_witness_paths {
1672 foreach my $wit ( $self->tradition->witnesses ) {
1673 # say STDERR "Making path for " . $wit->sigil;
1674 $self->make_witness_path( $wit );
1678 sub make_witness_path {
1679 my( $self, $wit ) = @_;
1680 my @chain = @{$wit->path};
1681 my $sig = $wit->sigil;
1682 # Add start and end if necessary
1683 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1684 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1685 foreach my $idx ( 0 .. $#chain-1 ) {
1686 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1688 if( $wit->is_layered ) {
1689 @chain = @{$wit->uncorrected_path};
1690 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1691 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1692 foreach my $idx( 0 .. $#chain-1 ) {
1693 my $source = $chain[$idx];
1694 my $target = $chain[$idx+1];
1695 $self->add_path( $source, $target, $sig.$self->ac_label )
1696 unless $self->has_path( $source, $target, $sig );
1700 $wit->clear_uncorrected_path;
1703 =head2 calculate_ranks
1705 Calculate the reading ranks (that is, their aligned positions relative
1706 to each other) for the graph. This can only be called on linear collations.
1710 use Text::Tradition;
1712 my $cxfile = 't/data/Collatex-16.xml';
1713 my $t = Text::Tradition->new(
1715 'input' => 'CollateX',
1718 my $c = $t->collation;
1721 my $table = $c->alignment_table;
1722 ok( $c->has_cached_table, "Alignment table was cached" );
1723 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1724 $c->calculate_ranks;
1725 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1726 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
1727 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
1728 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
1729 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
1735 sub calculate_ranks {
1737 # Save the existing ranks, in case we need to invalidate the cached SVG.
1739 map { $existing_ranks{$_} = $_->rank } $self->readings;
1741 # Do the rankings based on the relationship equivalence graph, starting
1742 # with the start node.
1743 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1745 # Transfer our rankings from the topological graph to the real one.
1746 foreach my $r ( $self->readings ) {
1747 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1748 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1750 # Die. Find the last rank we calculated.
1751 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1752 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1754 my $last = pop @all_defined;
1755 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1758 # Do we need to invalidate the cached data?
1759 if( $self->has_cached_table ) {
1760 foreach my $r ( $self->readings ) {
1761 next if defined( $existing_ranks{$r} )
1762 && $existing_ranks{$r} == $r->rank;
1763 # Something has changed, so clear the cache
1764 $self->_clear_cache;
1765 # ...and recalculate the common readings.
1766 $self->calculate_common_readings();
1770 # The graph calculation information is now up to date.
1771 $self->_graphcalc_done(1);
1776 $self->wipe_table if $self->has_cached_table;
1780 =head2 flatten_ranks
1782 A convenience method for parsing collation data. Searches the graph for readings
1783 with the same text at the same rank, and merges any that are found.
1789 my %unique_rank_rdg;
1791 foreach my $rdg ( $self->readings ) {
1792 next unless $rdg->has_rank;
1793 my $key = $rdg->rank . "||" . $rdg->text;
1794 if( exists $unique_rank_rdg{$key} ) {
1795 # Make sure they don't have different grammatical forms
1796 my $ur = $unique_rank_rdg{$key};
1797 if( $rdg->is_identical( $ur ) ) {
1799 #say STDERR "Combining readings at same rank: $key";
1801 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1802 # TODO see if this now makes a common point.
1805 $unique_rank_rdg{$key} = $rdg;
1808 # If we merged readings, the ranks are still fine but the alignment
1809 # table is wrong. Wipe it.
1810 $self->wipe_table() if $changed;
1814 =head2 calculate_common_readings
1816 Goes through the graph identifying the readings that appear in every witness
1817 (apart from those with lacunae at that spot.) Marks them as common and returns
1822 use Text::Tradition;
1824 my $cxfile = 't/data/Collatex-16.xml';
1825 my $t = Text::Tradition->new(
1827 'input' => 'CollateX',
1830 my $c = $t->collation;
1832 my @common = $c->calculate_common_readings();
1833 is( scalar @common, 8, "Found correct number of common readings" );
1834 my @marked = sort $c->common_readings();
1835 is( scalar @common, 8, "All common readings got marked as such" );
1836 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1837 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1843 sub calculate_common_readings {
1846 map { $_->is_common( 0 ) } $self->readings;
1847 # Implicitly calls calculate_ranks
1848 my $table = $self->alignment_table;
1849 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1850 my @row = map { $_->{'tokens'}->[$idx]
1851 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1852 @{$table->{'alignment'}};
1854 foreach my $r ( @row ) {
1856 $hash{$r->id} = $r unless $r->is_meta;
1858 $hash{'UNDEF'} = $r;
1861 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1862 my( $r ) = values %hash;
1864 push( @common, $r );
1870 =head2 text_from_paths
1872 Calculate the text array for all witnesses from the path, for later consistency
1873 checking. Only to be used if there is no non-graph-based way to know the
1878 sub text_from_paths {
1880 foreach my $wit ( $self->tradition->witnesses ) {
1881 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1883 foreach my $r ( @readings ) {
1884 next if $r->is_meta;
1885 push( @text, $r->text );
1887 $wit->text( \@text );
1888 if( $wit->is_layered ) {
1889 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1890 $wit->sigil.$self->ac_label );
1892 foreach my $r ( @ucrdgs ) {
1893 next if $r->is_meta;
1894 push( @uctext, $r->text );
1896 $wit->layertext( \@uctext );
1901 =head1 UTILITY FUNCTIONS
1903 =head2 common_predecessor( $reading_a, $reading_b )
1905 Find the last reading that occurs in sequence before both the given readings.
1906 At the very least this should be $self->start.
1908 =head2 common_successor( $reading_a, $reading_b )
1910 Find the first reading that occurs in sequence after both the given readings.
1911 At the very least this should be $self->end.
1915 use Text::Tradition;
1917 my $cxfile = 't/data/Collatex-16.xml';
1918 my $t = Text::Tradition->new(
1920 'input' => 'CollateX',
1923 my $c = $t->collation;
1925 is( $c->common_predecessor( 'n24', 'n23' )->id,
1926 'n20', "Found correct common predecessor" );
1927 is( $c->common_successor( 'n24', 'n23' )->id,
1928 '__END__', "Found correct common successor" );
1930 is( $c->common_predecessor( 'n19', 'n17' )->id,
1931 'n16', "Found correct common predecessor for readings on same path" );
1932 is( $c->common_successor( 'n21', 'n10' )->id,
1933 '__END__', "Found correct common successor for readings on same path" );
1939 ## Return the closest reading that is a predecessor of both the given readings.
1940 sub common_predecessor {
1942 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1943 return $self->_common_in_path( $r1, $r2, 'predecessors' );
1946 sub common_successor {
1948 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1949 return $self->_common_in_path( $r1, $r2, 'successors' );
1953 # TODO think about how to do this without ranks...
1954 sub _common_in_path {
1955 my( $self, $r1, $r2, $dir ) = @_;
1956 my $iter = $self->end->rank;
1958 my @last_r1 = ( $r1 );
1959 my @last_r2 = ( $r2 );
1960 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1962 # say STDERR "Finding common $dir for $r1, $r2";
1963 while( !@candidates ) {
1964 last unless $iter--; # Avoid looping infinitely
1965 # Iterate separately down the graph from r1 and r2
1966 my( @new_lc1, @new_lc2 );
1967 foreach my $lc ( @last_r1 ) {
1968 foreach my $p ( $lc->$dir ) {
1969 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1970 # say STDERR "Path candidate $p from $lc";
1971 push( @candidates, $p );
1972 } elsif( !$all_seen{$p->id} ) {
1973 $all_seen{$p->id} = 'r1';
1974 push( @new_lc1, $p );
1978 foreach my $lc ( @last_r2 ) {
1979 foreach my $p ( $lc->$dir ) {
1980 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1981 # say STDERR "Path candidate $p from $lc";
1982 push( @candidates, $p );
1983 } elsif( !$all_seen{$p->id} ) {
1984 $all_seen{$p->id} = 'r2';
1985 push( @new_lc2, $p );
1989 @last_r1 = @new_lc1;
1990 @last_r2 = @new_lc2;
1992 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1993 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1997 Text::Tradition::Error->throw(
1998 'ident' => 'Collation error',
2004 __PACKAGE__->meta->make_immutable;
2010 =item * Rework XML serialization in a more modular way
2016 This package is free software and is provided "as is" without express
2017 or implied warranty. You can redistribute it and/or modify it under
2018 the same terms as Perl itself.
2022 Tara L Andrews E<lt>aurum@cpan.orgE<gt>