1 package Text::Tradition::Collation;
3 use Encode qw( decode_utf8 );
7 use IPC::Run qw( run binary );
9 use Text::Tradition::Collation::Reading;
10 use Text::Tradition::Collation::RelationshipStore;
11 use Text::Tradition::Error;
13 use XML::LibXML::XPathContext;
19 default => sub { Graph->new() },
27 isa => 'Text::Tradition::Collation::RelationshipStore',
29 relationships => 'relationships',
30 related_readings => 'related_readings',
31 get_relationship => 'get_relationship',
32 del_relationship => 'del_relationship',
34 writer => '_set_relations',
39 isa => 'Text::Tradition',
40 writer => '_set_tradition',
45 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 _add_reading => 'set',
50 del_reading => 'delete',
51 has_reading => 'exists',
54 default => sub { {} },
57 has 'wit_list_separator' => (
66 default => 'base text',
89 isa => 'Text::Tradition::Collation::Reading',
90 writer => '_set_start',
96 isa => 'Text::Tradition::Collation::Reading',
101 has 'cached_svg' => (
104 predicate => 'has_cached_svg',
105 clearer => 'wipe_svg',
108 has 'cached_table' => (
111 predicate => 'has_cached_table',
112 clearer => 'wipe_table',
115 has '_graphcalc_done' => (
123 Text::Tradition::Collation - a software model for a text collation
128 my $t = Text::Tradition->new(
129 'name' => 'this is a text',
131 'file' => '/path/to/tei_parallel_seg_file.xml' );
133 my $c = $t->collation;
134 my @readings = $c->readings;
135 my @paths = $c->paths;
136 my @relationships = $c->relationships;
138 my $svg_variant_graph = $t->collation->as_svg();
142 Text::Tradition is a library for representation and analysis of collated
143 texts, particularly medieval ones. The Collation is the central feature of
144 a Tradition, where the text, its sequence of readings, and its relationships
145 between readings are actually kept.
151 The constructor. Takes a hash or hashref of the following arguments:
155 =item * tradition - The Text::Tradition object to which the collation
158 =item * linear - Whether the collation should be linear; that is, whether
159 transposed readings should be treated as two linked readings rather than one,
160 and therefore whether the collation graph is acyclic. Defaults to true.
162 =item * baselabel - The default label for the path taken by a base text
163 (if any). Defaults to 'base text'.
165 =item * wit_list_separator - The string to join a list of witnesses for
166 purposes of making labels in display graphs. Defaults to ', '.
168 =item * ac_label - The extra label to tack onto a witness sigil when
169 representing another layer of path for the given witness - that is, when
170 a text has more than one possible reading due to scribal corrections or
171 the like. Defaults to ' (a.c.)'.
173 =item * wordsep - The string used to separate words in the original text.
184 =head2 wit_list_separator
192 Simple accessors for collation attributes.
196 The meta-reading at the start of every witness path.
200 The meta-reading at the end of every witness path.
204 Returns all Reading objects in the graph.
206 =head2 reading( $id )
208 Returns the Reading object corresponding to the given ID.
210 =head2 add_reading( $reading_args )
212 Adds a new reading object to the collation.
213 See L<Text::Tradition::Collation::Reading> for the available arguments.
215 =head2 del_reading( $object_or_id )
217 Removes the given reading from the collation, implicitly removing its
218 paths and relationships.
220 =head2 merge_readings( $main, $second, $concatenate, $with_str )
222 Merges the $second reading into the $main one. If $concatenate is true, then
223 the merged node will carry the text of both readings, concatenated with either
224 $with_str (if specified) or a sensible default (the empty string if the
225 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
227 The first two arguments may be either readings or reading IDs.
229 =head2 has_reading( $id )
231 Predicate to see whether a given reading ID is in the graph.
233 =head2 reading_witnesses( $object_or_id )
235 Returns a list of sigils whose witnesses contain the reading.
239 Returns all reading paths within the document - that is, all edges in the
240 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
242 =head2 add_path( $source, $target, $sigil )
244 Links the given readings in the collation in sequence, under the given witness
245 sigil. The readings may be specified by object or ID.
247 =head2 del_path( $source, $target, $sigil )
249 Links the given readings in the collation in sequence, under the given witness
250 sigil. The readings may be specified by object or ID.
252 =head2 has_path( $source, $target );
254 Returns true if the two readings are linked in sequence in any witness.
255 The readings may be specified by object or ID.
259 Returns all Relationship objects in the collation.
261 =head2 add_relationship( $reading, $other_reading, $options )
263 Adds a new relationship of the type given in $options between the two readings,
264 which may be specified by object or ID. Returns a value of ( $status, @vectors)
265 where $status is true on success, and @vectors is a list of relationship edges
266 that were ultimately added.
267 See L<Text::Tradition::Collation::Relationship> for the available options.
273 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
274 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
275 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
278 ### Reading construct/destruct functions
281 my( $self, $reading ) = @_;
282 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
283 my %args = %$reading;
284 $reading = Text::Tradition::Collation::Reading->new(
285 'collation' => $self,
288 # First check to see if a reading with this ID exists.
289 if( $self->reading( $reading->id ) ) {
290 throw( "Collation already has a reading with id " . $reading->id );
292 $self->_graphcalc_done(0);
293 $self->_add_reading( $reading->id => $reading );
294 # Once the reading has been added, put it in both graphs.
295 $self->sequence->add_vertex( $reading->id );
296 $self->relations->add_reading( $reading->id );
300 around del_reading => sub {
305 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
308 # Remove the reading from the graphs.
309 $self->_graphcalc_done(0);
310 $self->_clear_cache; # Explicitly clear caches to GC the reading
311 $self->sequence->delete_vertex( $arg );
312 $self->relations->delete_reading( $arg );
315 $self->$orig( $arg );
322 my $cxfile = 't/data/Collatex-16.xml';
323 my $t = Text::Tradition->new(
325 'input' => 'CollateX',
328 my $c = $t->collation;
330 my $rno = scalar $c->readings;
331 # Split n21 for testing purposes
332 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
333 my $old_r = $c->reading( 'n21' );
334 $old_r->alter_text( 'to' );
335 $c->del_path( 'n20', 'n21', 'A' );
336 $c->add_path( 'n20', 'n21p0', 'A' );
337 $c->add_path( 'n21p0', 'n21', 'A' );
339 ok( $c->reading( 'n21p0' ), "New reading exists" );
340 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
342 # Combine n3 and n4 ( with his )
343 $c->merge_readings( 'n3', 'n4', 1 );
344 ok( !$c->reading('n4'), "Reading n4 is gone" );
345 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
347 # Collapse n9 and n10 ( rood / root )
348 $c->merge_readings( 'n9', 'n10' );
349 ok( !$c->reading('n10'), "Reading n10 is gone" );
350 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
352 # Combine n21 and n21p0
353 my $remaining = $c->reading('n21');
354 $remaining ||= $c->reading('n22'); # one of these should still exist
355 $c->merge_readings( 'n21p0', $remaining, 1 );
356 ok( !$c->reading('n21'), "Reading $remaining is gone" );
357 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
366 # We only need the IDs for adding paths to the graph, not the reading
367 # objects themselves.
368 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
369 $self->_graphcalc_done(0);
371 # The kept reading should inherit the paths and the relationships
372 # of the deleted reading.
373 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
374 my @vector = ( $kept );
375 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
376 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
377 next if $vector[0] eq $vector[1]; # Don't add a self loop
378 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
379 $self->sequence->add_edge( @vector );
380 my $fwits = $self->sequence->get_edge_attributes( @vector );
381 @wits{keys %$fwits} = values %$fwits;
382 $self->sequence->set_edge_attributes( @vector, \%wits );
384 $self->relations->merge_readings( $kept, $deleted, $combine_char );
386 # Do the deletion deed.
388 my $kept_obj = $self->reading( $kept );
389 my $del_obj = $self->reading( $deleted );
390 my $joinstr = $combine_char;
391 unless( defined $joinstr ) {
392 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
393 $joinstr = $self->wordsep unless defined $joinstr;
395 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
397 $self->del_reading( $deleted );
401 # Helper function for manipulating the graph.
402 sub _stringify_args {
403 my( $self, $first, $second, @args ) = @_;
405 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
406 $second = $second->id
407 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
408 return( $first, $second, @args );
411 # Helper function for manipulating the graph.
412 sub _objectify_args {
413 my( $self, $first, $second, $arg ) = @_;
414 $first = $self->reading( $first )
415 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
416 $second = $self->reading( $second )
417 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
418 return( $first, $second, $arg );
425 # We only need the IDs for adding paths to the graph, not the reading
426 # objects themselves.
427 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
429 $self->_graphcalc_done(0);
430 # Connect the readings
431 $self->sequence->add_edge( $source, $target );
432 # Note the witness in question
433 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
439 if( ref( $_[0] ) eq 'ARRAY' ) {
446 # We only need the IDs for adding paths to the graph, not the reading
447 # objects themselves.
448 my( $source, $target, $wit ) = $self->_stringify_args( @args );
450 $self->_graphcalc_done(0);
451 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
452 $self->sequence->delete_edge_attribute( $source, $target, $wit );
454 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
455 $self->sequence->delete_edge( $source, $target );
460 # Extra graph-alike utility
463 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
464 return undef unless $self->sequence->has_edge( $source, $target );
465 return $self->sequence->has_edge_attribute( $source, $target, $wit );
468 =head2 clear_witness( @sigil_list )
470 Clear the given witnesses out of the collation entirely, removing references
471 to them in paths, and removing readings that belong only to them. Should only
472 be called via $tradition->del_witness.
477 my( $self, @sigils ) = @_;
479 $self->_graphcalc_done(0);
480 # Clear the witness(es) out of the paths
481 foreach my $e ( $self->paths ) {
482 foreach my $sig ( @sigils ) {
483 $self->del_path( $e, $sig );
487 # Clear out the newly unused readings
488 foreach my $r ( $self->readings ) {
489 unless( $self->reading_witnesses( $r ) ) {
490 $self->del_reading( $r );
495 sub add_relationship {
497 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
498 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
499 $self->_graphcalc_done(0);
503 around qw/ get_relationship del_relationship / => sub {
507 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
510 my( $source, $target ) = $self->_stringify_args( @args );
511 $self->$orig( $source, $target );
514 =head2 reading_witnesses( $reading )
516 Return a list of sigils corresponding to the witnesses in which the reading appears.
520 sub reading_witnesses {
521 my( $self, $reading ) = @_;
522 # We need only check either the incoming or the outgoing edges; I have
523 # arbitrarily chosen "incoming". Thus, special-case the start node.
524 if( $reading eq $self->start ) {
525 return map { $_->sigil } $self->tradition->witnesses;
528 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
529 my $wits = $self->sequence->get_edge_attributes( @$e );
530 @all_witnesses{ keys %$wits } = 1;
532 my $acstr = $self->ac_label;
533 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
534 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
536 return keys %all_witnesses;
539 =head1 OUTPUT METHODS
541 =head2 as_svg( \%options )
543 Returns an SVG string that represents the graph, via as_dot and graphviz.
544 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
549 my( $self, $opts ) = @_;
550 throw( "Need GraphViz installed to output SVG" )
551 unless File::Which::which( 'dot' );
552 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
553 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
554 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
555 my @cmd = qw/dot -Tsvg/;
557 my $dotfile = File::Temp->new();
559 # $dotfile->unlink_on_destroy(0);
560 binmode $dotfile, ':utf8';
561 print $dotfile $self->as_dot( $opts );
562 push( @cmd, $dotfile->filename );
563 run( \@cmd, ">", binary(), \$svg );
564 $svg = decode_utf8( $svg );
565 $self->cached_svg( $svg ) unless $want_subgraph;
568 return $self->cached_svg;
573 =head2 as_dot( \%options )
575 Returns a string that is the collation graph expressed in dot
576 (i.e. GraphViz) format. Options include:
591 my( $self, $opts ) = @_;
592 my $startrank = $opts->{'from'} if $opts;
593 my $endrank = $opts->{'to'} if $opts;
594 my $color_common = $opts->{'color_common'} if $opts;
595 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
596 && $self->end->rank > 100;
597 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
599 # Check the arguments
601 return if $endrank && $startrank > $endrank;
602 return if $startrank > $self->end->rank;
604 if( defined $endrank ) {
605 return if $endrank < 0;
606 $endrank = undef if $endrank == $self->end->rank;
609 my $graph_name = $self->tradition->name;
610 $graph_name =~ s/[^\w\s]//g;
611 $graph_name = join( '_', split( /\s+/, $graph_name ) );
619 'fillcolor' => 'white',
624 'arrowhead' => 'open',
625 'color' => '#000000',
626 'fontcolor' => '#000000',
629 my $dot = sprintf( "digraph %s {\n", $graph_name );
630 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
631 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
633 # Output substitute start/end readings if necessary
635 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
638 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
640 if( $STRAIGHTENHACK ) {
642 my $startlabel = $startrank ? 'SUBSTART' : 'START';
643 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
644 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
646 my %used; # Keep track of the readings that actually appear in the graph
647 # Sort the readings by rank if we have ranks; this speeds layout.
648 my @all_readings = $self->end->has_rank
649 ? sort { $a->rank <=> $b->rank } $self->readings
651 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
652 foreach my $reading ( @all_readings ) {
653 # Only output readings within our rank range.
654 next if $startrank && $reading->rank < $startrank;
655 next if $endrank && $reading->rank > $endrank;
656 $used{$reading->id} = 1;
657 # Need not output nodes without separate labels
658 next if $reading->id eq $reading->text;
660 my $label = $reading->text;
661 $label .= '-' if $reading->join_next;
662 $label = "-$label" if $reading->join_prior;
663 $label =~ s/\"/\\\"/g;
664 $rattrs->{'label'} = $label;
665 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
666 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
669 # Add the real edges. Need to weight one edge per rank jump, in a
671 # my $weighted = $self->_add_edge_weights;
672 my @edges = $self->paths;
673 my( %substart, %subend );
674 foreach my $edge ( @edges ) {
675 # Do we need to output this edge?
676 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
677 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
678 my $variables = { %edge_attrs, 'label' => $label };
680 # Account for the rank gap if necessary
681 my $rank0 = $self->reading( $edge->[0] )->rank
682 if $self->reading( $edge->[0] )->has_rank;
683 my $rank1 = $self->reading( $edge->[1] )->rank
684 if $self->reading( $edge->[1] )->has_rank;
685 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
686 $variables->{'minlen'} = $rank1 - $rank0;
689 # Add the calculated edge weights
690 # if( exists $weighted->{$edge->[0]}
691 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
692 # # $variables->{'color'} = 'red';
693 # $variables->{'weight'} = 3.0;
696 # EXPERIMENTAL: make edge width reflect no. of witnesses
697 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
698 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
700 my $varopts = _dot_attr_string( $variables );
701 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
702 $edge->[0], $edge->[1], $varopts );
703 } elsif( $used{$edge->[0]} ) {
704 $subend{$edge->[0]} = 1;
705 } elsif( $used{$edge->[1]} ) {
706 $substart{$edge->[1]} = 1;
709 # Add substitute start and end edges if necessary
710 foreach my $node ( keys %substart ) {
711 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
712 my $variables = { %edge_attrs, 'label' => $witstr };
713 my $varopts = _dot_attr_string( $variables );
714 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
716 foreach my $node ( keys %subend ) {
717 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
718 my $variables = { %edge_attrs, 'label' => $witstr };
719 my $varopts = _dot_attr_string( $variables );
720 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
723 if( $STRAIGHTENHACK ) {
724 my $endlabel = $endrank ? 'SUBEND' : 'END';
725 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
732 sub _dot_attr_string {
735 foreach my $k ( sort keys %$hash ) {
737 push( @attrs, $k.'="'.$v.'"' );
739 return( '[ ' . join( ', ', @attrs ) . ' ]' );
742 sub _add_edge_weights {
744 # Walk the graph from START to END, choosing the successor node with
745 # the largest number of witness paths each time.
747 my $curr = $self->start->id;
748 my $ranked = $self->end->has_rank;
749 while( $curr ne $self->end->id ) {
750 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
751 my @succ = sort { $self->path_witnesses( $curr, $a )
752 <=> $self->path_witnesses( $curr, $b ) }
753 $self->sequence->successors( $curr );
754 my $next = pop @succ;
755 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
756 # Try to avoid lacunae in the weighted path.
758 ( $self->reading( $next )->is_lacuna ||
759 $nextrank - $rank > 1 ) ){
762 $weighted->{$curr} = $next;
768 =head2 path_witnesses( $edge )
770 Returns the list of sigils whose witnesses are associated with the given edge.
771 The edge can be passed as either an array or an arrayref of ( $source, $target ).
776 my( $self, @edge ) = @_;
777 # If edge is an arrayref, cope.
778 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
782 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
786 # Helper function. Make a display label for the given witnesses, showing a.c.
787 # witnesses only where the main witness is not also in the list.
788 sub _path_display_label {
791 map { $wits{$_} = 1 } @_;
793 # If an a.c. wit is listed, remove it if the main wit is also listed.
794 # Otherwise keep it for explicit listing.
795 my $aclabel = $self->ac_label;
797 foreach my $w ( sort keys %wits ) {
798 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
799 if( exists $wits{$1} ) {
802 push( @disp_ac, $w );
807 # See if we are in a majority situation.
808 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
809 if( scalar keys %wits > $maj ) {
810 unshift( @disp_ac, 'majority' );
811 return join( ', ', @disp_ac );
813 return join( ', ', sort keys %wits );
817 =head2 readings_at_rank( $rank )
819 Returns a list of readings at a given rank, taken from the alignment table.
823 sub readings_at_rank {
824 my( $self, $rank ) = @_;
825 my $table = $self->alignment_table;
826 # Table rank is real rank - 1.
827 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
829 foreach my $e ( @elements ) {
830 next unless ref( $e ) eq 'HASH';
831 next unless exists $e->{'t'};
832 $readings{$e->{'t'}->id} = $e->{'t'};
834 return values %readings;
839 Returns a GraphML representation of the collation. The GraphML will contain
840 two graphs. The first expresses the attributes of the readings and the witness
841 paths that link them; the second expresses the relationships that link the
842 readings. This is the native transfer format for a tradition.
851 my $datafile = 't/data/florilegium_tei_ps.xml';
852 my $tradition = Text::Tradition->new( 'input' => 'TEI',
857 ok( $tradition, "Got a tradition object" );
858 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
859 ok( $tradition->collation, "Tradition has a collation" );
861 my $c = $tradition->collation;
862 is( scalar $c->readings, $READINGS, "Collation has all readings" );
863 is( scalar $c->paths, $PATHS, "Collation has all paths" );
864 is( scalar $c->relationships, 0, "Collation has all relationships" );
866 # Add a few relationships
867 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
868 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
869 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
871 # Now write it to GraphML and parse it again.
873 my $graphml = $c->as_graphml;
874 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
875 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
876 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
877 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
884 my( $self, $options ) = @_;
885 $self->calculate_ranks unless $self->_graphcalc_done;
887 my $start = $options->{'from'}
888 ? $self->reading( $options->{'from'} ) : $self->start;
889 my $end = $options->{'to'}
890 ? $self->reading( $options->{'to'} ) : $self->end;
891 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
892 throw( 'Start node must be before end node' );
894 # The readings need to be ranked for this to work.
895 $start = $self->start unless $start->has_rank;
896 $end = $self->end unless $end->has_rank;
898 unless( $start eq $self->start ) {
899 $rankoffset = $start->rank - 1;
904 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
905 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
906 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
907 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
909 # Create the document and root node
910 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
911 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
912 $graphml->setDocumentElement( $root );
913 $root->setNamespace( $xsi_ns, 'xsi', 0 );
914 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
916 # List of attribute types to save on our objects and their corresponding
922 'RelationshipType' => 'string',
923 'RelationshipScope' => 'string',
926 # List of attribute names *not* to save on our objects.
927 # We will also not save any attribute beginning with _.
929 map { $skipsave{$_} = 1 } qw/ cached_svg /;
931 # Add the data keys for the graph. Include an extra key 'version' for the
932 # GraphML output version.
935 my %graph_attributes = ( 'version' => 'string' );
936 # Graph attributes include those of Tradition and those of Collation.
938 my $tmeta = $self->tradition->meta;
939 my $cmeta = $self->meta;
940 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
941 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
942 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
943 next if $attr->name =~ /^_/;
944 next if $skipsave{$attr->name};
945 next unless $save_types{$attr->type_constraint->name};
946 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
949 foreach my $datum ( sort keys %graph_attributes ) {
950 $graph_data_keys{$datum} = 'dg'.$gdi++;
951 my $key = $root->addNewChild( $graphml_ns, 'key' );
952 $key->setAttribute( 'attr.name', $datum );
953 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
954 $key->setAttribute( 'for', 'graph' );
955 $key->setAttribute( 'id', $graph_data_keys{$datum} );
958 # Add the data keys for reading nodes
959 my %reading_attributes;
960 my $rmeta = Text::Tradition::Collation::Reading->meta;
961 foreach my $attr( $rmeta->get_all_attributes ) {
962 next if $attr->name =~ /^_/;
963 next if $skipsave{$attr->name};
964 next unless $save_types{$attr->type_constraint->name};
965 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
969 foreach my $datum ( sort keys %reading_attributes ) {
970 $node_data_keys{$datum} = 'dn'.$ndi++;
971 my $key = $root->addNewChild( $graphml_ns, 'key' );
972 $key->setAttribute( 'attr.name', $datum );
973 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
974 $key->setAttribute( 'for', 'node' );
975 $key->setAttribute( 'id', $node_data_keys{$datum} );
978 # Add the data keys for edges, that is, paths and relationships. Path
979 # data does not come from a Moose class so is here manually.
982 my %edge_attributes = (
983 witness => 'string', # ID/label for a path
984 extra => 'boolean', # Path key
986 my @path_attributes = keys %edge_attributes; # track our manual additions
987 my $pmeta = Text::Tradition::Collation::Relationship->meta;
988 foreach my $attr( $pmeta->get_all_attributes ) {
989 next if $attr->name =~ /^_/;
990 next if $skipsave{$attr->name};
991 next unless $save_types{$attr->type_constraint->name};
992 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
994 foreach my $datum ( sort keys %edge_attributes ) {
995 $edge_data_keys{$datum} = 'de'.$edi++;
996 my $key = $root->addNewChild( $graphml_ns, 'key' );
997 $key->setAttribute( 'attr.name', $datum );
998 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
999 $key->setAttribute( 'for', 'edge' );
1000 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1003 # Add the collation graph itself
1004 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1005 $sgraph->setAttribute( 'edgedefault', 'directed' );
1006 $sgraph->setAttribute( 'id', $self->tradition->name );
1007 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1008 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
1009 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1010 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
1011 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1013 # Collation attribute data
1014 foreach my $datum ( keys %graph_attributes ) {
1016 if( $datum eq 'version' ) {
1018 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1019 $value = $self->tradition->$datum;
1021 $value = $self->$datum;
1023 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1028 # Add our readings to the graph
1029 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1030 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1031 ( $n->rank < $start->rank || $n->rank > $end->rank );
1032 $use_readings{$n->id} = 1;
1033 # Add to the main graph
1034 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1035 my $node_xmlid = 'n' . $node_ctr++;
1036 $node_hash{ $n->id } = $node_xmlid;
1037 $node_el->setAttribute( 'id', $node_xmlid );
1038 foreach my $d ( keys %reading_attributes ) {
1040 if( $rankoffset && $d eq 'rank' ) {
1041 # Adjust the ranks within the subgraph.
1042 $nval = $n eq $self->end ? $end->rank + 1 : $nval - $rankoffset;
1044 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1049 # Add the path edges to the sequence graph
1051 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1052 # We add an edge in the graphml for every witness in $e.
1053 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1054 my @edge_wits = sort $self->path_witnesses( $e );
1055 $e->[0] = $self->start unless $use_readings{$e->[0]};
1056 $e->[1] = $self->end unless $use_readings{$e->[1]};
1057 foreach my $wit ( @edge_wits ) {
1058 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1059 $node_hash{ $e->[0] },
1060 $node_hash{ $e->[1] } );
1061 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1062 $edge_el->setAttribute( 'source', $from );
1063 $edge_el->setAttribute( 'target', $to );
1064 $edge_el->setAttribute( 'id', $id );
1066 # It's a witness path, so add the witness
1068 my $key = $edge_data_keys{'witness'};
1069 # Is this an ante-corr witness?
1070 my $aclabel = $self->ac_label;
1071 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1072 # Keep the base witness
1074 # ...and record that this is an 'extra' reading path
1075 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1077 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1081 # Add the relationship graph to the XML
1082 map { delete $edge_data_keys{$_} } @path_attributes;
1083 # $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1084 # $node_data_keys{'id'}, \%edge_data_keys );
1086 # Save and return the thing
1087 my $result = decode_utf8( $graphml->toString(1) );
1091 sub _add_graphml_data {
1092 my( $el, $key, $value ) = @_;
1093 return unless defined $value;
1094 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1095 $data_el->setAttribute( 'key', $key );
1096 $data_el->appendText( $value );
1101 Returns a CSV alignment table representation of the collation graph, one
1102 row per witness (or witness uncorrected.)
1108 my $table = $self->alignment_table;
1109 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1111 # Make the header row
1112 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1113 push( @result, decode_utf8( $csv->string ) );
1114 # Make the rest of the rows
1115 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1116 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1117 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1118 $csv->combine( @row );
1119 push( @result, decode_utf8( $csv->string ) );
1121 return join( "\n", @result );
1124 =head2 alignment_table( $use_refs, $include_witnesses )
1126 Return a reference to an alignment table, in a slightly enhanced CollateX
1127 format which looks like this:
1129 $table = { alignment => [ { witness => "SIGIL",
1130 tokens => [ { t => "TEXT" }, ... ] },
1131 { witness => "SIG2",
1132 tokens => [ { t => "TEXT" }, ... ] },
1134 length => TEXTLEN };
1136 If $use_refs is set to 1, the reading object is returned in the table
1137 instead of READINGTEXT; if not, the text of the reading is returned.
1139 If $include_witnesses is set to a hashref, only the witnesses whose sigil
1140 keys have a true hash value will be included.
1144 sub alignment_table {
1146 $self->calculate_ranks() unless $self->_graphcalc_done;
1147 return $self->cached_table if $self->has_cached_table;
1149 # Make sure we can do this
1150 throw( "Need a linear graph in order to make an alignment table" )
1151 unless $self->linear;
1152 $self->calculate_ranks unless $self->end->has_rank;
1154 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1155 my @all_pos = ( 1 .. $self->end->rank - 1 );
1156 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1157 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1158 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1159 my @row = _make_witness_row( \@wit_path, \@all_pos );
1160 push( @{$table->{'alignment'}},
1161 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1162 if( $wit->is_layered ) {
1163 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1164 $wit->sigil.$self->ac_label );
1165 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1166 push( @{$table->{'alignment'}},
1167 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
1170 $self->cached_table( $table );
1174 sub _make_witness_row {
1175 my( $path, $positions ) = @_;
1177 map { $char_hash{$_} = undef } @$positions;
1179 foreach my $rdg ( @$path ) {
1180 my $rtext = $rdg->text;
1181 $rtext = '#LACUNA#' if $rdg->is_lacuna;
1182 print STDERR "rank " . $rdg->rank . "\n" if $debug;
1183 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1184 $char_hash{$rdg->rank} = { 't' => $rdg };
1186 my @row = map { $char_hash{$_} } @$positions;
1187 # Fill in lacuna markers for undef spots in the row
1188 my $last_el = shift @row;
1189 my @filled_row = ( $last_el );
1190 foreach my $el ( @row ) {
1191 # If we are using node reference, make the lacuna node appear many times
1192 # in the table. If not, use the lacuna tag.
1193 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1196 push( @filled_row, $el );
1202 =head1 NAVIGATION METHODS
1204 =head2 reading_sequence( $first, $last, $sigil, $backup )
1206 Returns the ordered list of readings, starting with $first and ending
1207 with $last, for the witness given in $sigil. If a $backup sigil is
1208 specified (e.g. when walking a layered witness), it will be used wherever
1209 no $sigil path exists. If there is a base text reading, that will be
1210 used wherever no path exists for $sigil or $backup.
1214 # TODO Think about returning some lazy-eval iterator.
1215 # TODO Get rid of backup; we should know from what witness is whether we need it.
1217 sub reading_sequence {
1218 my( $self, $start, $end, $witness ) = @_;
1220 $witness = $self->baselabel unless $witness;
1221 my @readings = ( $start );
1224 while( $n && $n->id ne $end->id ) {
1225 if( exists( $seen{$n->id} ) ) {
1226 throw( "Detected loop for $witness at " . $n->id );
1230 my $next = $self->next_reading( $n, $witness );
1232 throw( "Did not find any path for $witness from reading " . $n->id );
1234 push( @readings, $next );
1237 # Check that the last reading is our end reading.
1238 my $last = $readings[$#readings];
1239 throw( "Last reading found from " . $start->text .
1240 " for witness $witness is not the end!" ) # TODO do we get this far?
1241 unless $last->id eq $end->id;
1246 =head2 next_reading( $reading, $sigil );
1248 Returns the reading that follows the given reading along the given witness
1254 # Return the successor via the corresponding path.
1256 my $answer = $self->_find_linked_reading( 'next', @_ );
1257 return undef unless $answer;
1258 return $self->reading( $answer );
1261 =head2 prior_reading( $reading, $sigil )
1263 Returns the reading that precedes the given reading along the given witness
1269 # Return the predecessor via the corresponding path.
1271 my $answer = $self->_find_linked_reading( 'prior', @_ );
1272 return $self->reading( $answer );
1275 sub _find_linked_reading {
1276 my( $self, $direction, $node, $path ) = @_;
1278 # Get a backup if we are dealing with a layered witness
1280 my $aclabel = $self->ac_label;
1281 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1285 my @linked_paths = $direction eq 'next'
1286 ? $self->sequence->edges_from( $node )
1287 : $self->sequence->edges_to( $node );
1288 return undef unless scalar( @linked_paths );
1290 # We have to find the linked path that contains all of the
1291 # witnesses supplied in $path.
1292 my( @path_wits, @alt_path_wits );
1293 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1294 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1297 foreach my $le ( @linked_paths ) {
1298 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1301 my @le_wits = sort $self->path_witnesses( $le );
1302 if( _is_within( \@path_wits, \@le_wits ) ) {
1303 # This is the right path.
1304 return $direction eq 'next' ? $le->[1] : $le->[0];
1305 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1309 # Got this far? Return the alternate path if it exists.
1310 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1313 # Got this far? Return the base path if it exists.
1314 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1317 # Got this far? We have no appropriate path.
1318 warn "Could not find $direction node from " . $node->id
1319 . " along path $path";
1325 my( $set1, $set2 ) = @_;
1326 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1327 foreach my $el ( @$set1 ) {
1328 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1333 # Return the string that joins together a list of witnesses for
1334 # display on a single path.
1335 sub _witnesses_of_label {
1336 my( $self, $label ) = @_;
1337 my $regex = $self->wit_list_separator;
1338 my @answer = split( /\Q$regex\E/, $label );
1342 =head2 common_readings
1344 Returns the list of common readings in the graph (i.e. those readings that are
1345 shared by all non-lacunose witnesses.)
1349 sub common_readings {
1351 my @common = grep { $_->is_common } $self->readings;
1355 =head2 path_text( $sigil, [, $start, $end ] )
1357 Returns the text of a witness (plus its backup, if we are using a layer)
1358 as stored in the collation. The text is returned as a string, where the
1359 individual readings are joined with spaces and the meta-readings (e.g.
1360 lacunae) are omitted. Optional specification of $start and $end allows
1361 the generation of a subset of the witness text.
1366 my( $self, $wit, $start, $end ) = @_;
1367 $start = $self->start unless $start;
1368 $end = $self->end unless $end;
1369 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1372 foreach my $r ( @path ) {
1373 if( $r->join_prior || !$last || $last->join_next ) {
1374 $pathtext .= $r->text;
1376 $pathtext .= ' ' . $r->text;
1383 =head1 INITIALIZATION METHODS
1385 These are mostly for use by parsers.
1387 =head2 make_witness_path( $witness )
1389 Link the array of readings contained in $witness->path (and in
1390 $witness->uncorrected_path if it exists) into collation paths.
1391 Clear out the arrays when finished.
1393 =head2 make_witness_paths
1395 Call make_witness_path for all witnesses in the tradition.
1399 # For use when a collation is constructed from a base text and an apparatus.
1400 # We have the sequences of readings and just need to add path edges.
1401 # When we are done, clear out the witness path attributes, as they are no
1403 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1405 sub make_witness_paths {
1407 foreach my $wit ( $self->tradition->witnesses ) {
1408 # print STDERR "Making path for " . $wit->sigil . "\n";
1409 $self->make_witness_path( $wit );
1413 sub make_witness_path {
1414 my( $self, $wit ) = @_;
1415 my @chain = @{$wit->path};
1416 my $sig = $wit->sigil;
1417 # Add start and end if necessary
1418 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1419 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1420 foreach my $idx ( 0 .. $#chain-1 ) {
1421 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1423 if( $wit->is_layered ) {
1424 @chain = @{$wit->uncorrected_path};
1425 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1426 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1427 foreach my $idx( 0 .. $#chain-1 ) {
1428 my $source = $chain[$idx];
1429 my $target = $chain[$idx+1];
1430 $self->add_path( $source, $target, $sig.$self->ac_label )
1431 unless $self->has_path( $source, $target, $sig );
1435 $wit->clear_uncorrected_path;
1438 =head2 equivalence_graph( \%readingmap, $startrank, $endrank, @testvector )
1440 Returns an equivalence graph of the collation, in which all readings
1441 related via a 'colocated' relationship are transformed into a single
1442 vertex. Can be used to determine the validity of a new relationship. The
1443 mapping between equivalence vertices and reading IDs will be stored in the
1444 hash whose reference is passed as readingmap. For a subset of the graph,
1445 pass in a start and/or an ending rank (this only works if L<calculate_ranks>
1446 has been called at least once.)
1448 It is also possible to pass in a test relationship in @testvector, and get
1449 the resulting equivalence graph before the relationship has been made.
1453 sub equivalence_graph {
1454 my( $self, $map, $start, $end, @newvector ) = @_;
1455 $start = undef unless $self->end->has_rank;
1456 $end = undef unless $self->end->has_rank;
1457 my $eqgraph = Graph->new();
1460 foreach my $r ( $self->readings ) {
1461 unless( $r eq $self->start || $r eq $self->end ) {
1462 next if $start && $r->rank < $start;
1463 next if $end && $r->rank > $end;
1465 next if exists $map->{$r->id};
1466 my @rels = $self->related_readings( $r->id, 'colocated' );
1467 push( @rels, $r->id );
1468 # Make an equivalence vertex
1469 my $rn = 'equivalence_' . $rel_ctr++;
1470 $eqgraph->add_vertex( $rn );
1471 # Note which readings belong to this vertex.
1472 push( @rels, $r->id );
1479 foreach my $p ( $self->paths ) {
1480 my $efrom = exists $map->{$p->[0]} ? $map->{$p->[0]}
1481 : $map->{$self->start->id};
1482 my $eto = exists $map->{$p->[1]} ? $map->{$p->[1]}
1483 : $map->{$self->end->id};
1484 $eqgraph->add_edge( $efrom, $eto );
1487 # Collapse the vertices in @newvector if applicable.
1489 my( $eqs, $eqt ) = map { $map->{$_} } @newvector;
1490 $DB::single = 1 unless $eqs && $eqt;
1491 unless( $eqs eq $eqt ) {
1492 # Combine the vertices.
1493 map { $eqgraph->add_edge( $eqs, $_ ) } $eqgraph->successors( $eqt );
1494 map { $eqgraph->add_edge( $_, $eqs ) } $eqgraph->predecessors( $eqt );
1495 $eqgraph->delete_vertex( $eqt );
1501 =head2 calculate_ranks
1503 Calculate the reading ranks (that is, their aligned positions relative
1504 to each other) for the graph. This can only be called on linear collations.
1508 use Text::Tradition;
1510 my $cxfile = 't/data/Collatex-16.xml';
1511 my $t = Text::Tradition->new(
1513 'input' => 'CollateX',
1516 my $c = $t->collation;
1519 my $table = $c->alignment_table;
1520 ok( $c->has_cached_table, "Alignment table was cached" );
1521 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1522 $c->calculate_ranks;
1523 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1524 $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
1525 isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
1531 sub calculate_ranks {
1533 # Save the existing ranks, in case we need to invalidate the cached SVG.
1535 map { $existing_ranks{$_} = $_->rank } $self->readings;
1536 # Walk a version of the graph where every node linked by a relationship
1537 # edge is fundamentally the same node, and do a topological ranking on
1538 # the nodes in this graph.
1540 my $topo_graph = $self->equivalence_graph( \%rel_containers );
1542 # Now do the rankings, starting with the start node.
1543 my $topo_start = $rel_containers{$self->start->id};
1544 my $node_ranks = { $topo_start => 0 };
1545 my @curr_origin = ( $topo_start );
1546 # A little iterative function.
1547 while( @curr_origin ) {
1548 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1550 # Transfer our rankings from the topological graph to the real one.
1551 foreach my $r ( $self->readings ) {
1552 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1553 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1555 # Die. Find the last rank we calculated.
1556 my @all_defined = sort { ( $node_ranks->{$rel_containers{$a->id}}||-1 )
1557 <=> ( $node_ranks->{$rel_containers{$b->id}}||-1 ) }
1559 my $last = pop @all_defined;
1560 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1563 # Do we need to invalidate the cached data?
1564 if( $self->has_cached_svg || $self->has_cached_table ) {
1565 foreach my $r ( $self->readings ) {
1566 next if defined( $existing_ranks{$r} )
1567 && $existing_ranks{$r} == $r->rank;
1568 # Something has changed, so clear the cache
1569 $self->_clear_cache;
1570 # ...and recalculate the common readings.
1571 $self->calculate_common_readings();
1575 # The graph calculation information is now up to date.
1576 $self->_graphcalc_done(1);
1580 my( $graph, $node_ranks, @current_nodes ) = @_;
1581 # Look at each of the children of @current_nodes. If all the child's
1582 # parents have a rank, assign it the highest rank + 1 and add it to
1583 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1584 # parent gets a rank.
1586 foreach my $c ( @current_nodes ) {
1587 warn "Current reading $c has no rank!"
1588 unless exists $node_ranks->{$c};
1589 # print STDERR "Looking at child of node $c, rank "
1590 # . $node_ranks->{$c} . "\n";
1591 foreach my $child ( $graph->successors( $c ) ) {
1592 next if exists $node_ranks->{$child};
1593 my $highest_rank = -1;
1595 foreach my $parent ( $graph->predecessors( $child ) ) {
1596 if( exists $node_ranks->{$parent} ) {
1597 $highest_rank = $node_ranks->{$parent}
1598 if $highest_rank <= $node_ranks->{$parent};
1605 my $c_rank = $highest_rank + 1;
1606 # print STDERR "Assigning rank $c_rank to node $child \n";
1607 $node_ranks->{$child} = $c_rank;
1608 push( @next_nodes, $child );
1616 $self->wipe_svg if $self->has_cached_svg;
1617 $self->wipe_table if $self->has_cached_table;
1621 =head2 flatten_ranks
1623 A convenience method for parsing collation data. Searches the graph for readings
1624 with the same text at the same rank, and merges any that are found.
1630 my %unique_rank_rdg;
1632 foreach my $rdg ( $self->readings ) {
1633 next unless $rdg->has_rank;
1634 my $key = $rdg->rank . "||" . $rdg->text;
1635 if( exists $unique_rank_rdg{$key} ) {
1637 # print STDERR "Combining readings at same rank: $key\n";
1639 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1640 # TODO see if this now makes a common point.
1642 $unique_rank_rdg{$key} = $rdg;
1645 # If we merged readings, the ranks are still fine but the alignment
1646 # table is wrong. Wipe it.
1647 $self->wipe_table() if $changed;
1651 =head2 calculate_common_readings
1653 Goes through the graph identifying the readings that appear in every witness
1654 (apart from those with lacunae at that spot.) Marks them as common and returns
1659 use Text::Tradition;
1661 my $cxfile = 't/data/Collatex-16.xml';
1662 my $t = Text::Tradition->new(
1664 'input' => 'CollateX',
1667 my $c = $t->collation;
1669 my @common = $c->calculate_common_readings();
1670 is( scalar @common, 8, "Found correct number of common readings" );
1671 my @marked = sort $c->common_readings();
1672 is( scalar @common, 8, "All common readings got marked as such" );
1673 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1674 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1680 sub calculate_common_readings {
1683 map { $_->is_common( 0 ) } $self->readings;
1684 # Implicitly calls calculate_ranks
1685 my $table = $self->alignment_table;
1686 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1687 my @row = map { $_->{'tokens'}->[$idx]
1688 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1689 @{$table->{'alignment'}};
1691 foreach my $r ( @row ) {
1693 $hash{$r->id} = $r unless $r->is_meta;
1695 $hash{'UNDEF'} = $r;
1698 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1699 my( $r ) = values %hash;
1701 push( @common, $r );
1707 =head2 text_from_paths
1709 Calculate the text array for all witnesses from the path, for later consistency
1710 checking. Only to be used if there is no non-graph-based way to know the
1715 sub text_from_paths {
1717 foreach my $wit ( $self->tradition->witnesses ) {
1718 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1720 foreach my $r ( @readings ) {
1721 next if $r->is_meta;
1722 push( @text, $r->text );
1724 $wit->text( \@text );
1725 if( $wit->is_layered ) {
1726 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1727 $wit->sigil.$self->ac_label );
1729 foreach my $r ( @ucrdgs ) {
1730 next if $r->is_meta;
1731 push( @uctext, $r->text );
1733 $wit->layertext( \@uctext );
1738 =head1 UTILITY FUNCTIONS
1740 =head2 common_predecessor( $reading_a, $reading_b )
1742 Find the last reading that occurs in sequence before both the given readings.
1743 At the very least this should be $self->start.
1745 =head2 common_successor( $reading_a, $reading_b )
1747 Find the first reading that occurs in sequence after both the given readings.
1748 At the very least this should be $self->end.
1752 use Text::Tradition;
1754 my $cxfile = 't/data/Collatex-16.xml';
1755 my $t = Text::Tradition->new(
1757 'input' => 'CollateX',
1760 my $c = $t->collation;
1762 is( $c->common_predecessor( 'n24', 'n23' )->id,
1763 'n20', "Found correct common predecessor" );
1764 is( $c->common_successor( 'n24', 'n23' )->id,
1765 '#END#', "Found correct common successor" );
1767 is( $c->common_predecessor( 'n19', 'n17' )->id,
1768 'n16', "Found correct common predecessor for readings on same path" );
1769 is( $c->common_successor( 'n21', 'n10' )->id,
1770 '#END#', "Found correct common successor for readings on same path" );
1776 ## Return the closest reading that is a predecessor of both the given readings.
1777 sub common_predecessor {
1779 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1780 return $self->_common_in_path( $r1, $r2, 'predecessors' );
1783 sub common_successor {
1785 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1786 return $self->_common_in_path( $r1, $r2, 'successors' );
1790 # TODO think about how to do this without ranks...
1791 sub _common_in_path {
1792 my( $self, $r1, $r2, $dir ) = @_;
1793 my $iter = $self->end->rank;
1795 my @last_r1 = ( $r1 );
1796 my @last_r2 = ( $r2 );
1797 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1799 # print STDERR "Finding common $dir for $r1, $r2\n";
1800 while( !@candidates ) {
1801 last unless $iter--; # Avoid looping infinitely
1802 # Iterate separately down the graph from r1 and r2
1803 my( @new_lc1, @new_lc2 );
1804 foreach my $lc ( @last_r1 ) {
1805 foreach my $p ( $lc->$dir ) {
1806 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1807 # print STDERR "Path candidate $p from $lc\n";
1808 push( @candidates, $p );
1810 $all_seen{$p->id} = 'r1';
1811 push( @new_lc1, $p );
1815 foreach my $lc ( @last_r2 ) {
1816 foreach my $p ( $lc->$dir ) {
1817 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1818 # print STDERR "Path candidate $p from $lc\n";
1819 push( @candidates, $p );
1821 $all_seen{$p->id} = 'r2';
1822 push( @new_lc2, $p );
1826 @last_r1 = @new_lc1;
1827 @last_r2 = @new_lc2;
1829 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1830 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1834 Text::Tradition::Error->throw(
1835 'ident' => 'Collation error',
1841 __PACKAGE__->meta->make_immutable;
1845 This package is free software and is provided "as is" without express
1846 or implied warranty. You can redistribute it and/or modify it under
1847 the same terms as Perl itself.
1851 Tara L Andrews E<lt>aurum@cpan.orgE<gt>