1 package Text::Tradition::Collation;
4 use Encode qw( decode_utf8 );
8 use IPC::Run qw( run binary );
10 use Text::Tradition::Collation::Data;
11 use Text::Tradition::Collation::Reading;
12 use Text::Tradition::Collation::RelationshipStore;
13 use Text::Tradition::Error;
14 use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
16 use XML::LibXML::XPathContext;
20 isa => 'Text::Tradition::Collation::Data',
58 isa => 'Text::Tradition',
59 writer => '_set_tradition',
65 Text::Tradition::Collation - a software model for a text collation
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
80 my $svg_variant_graph = $t->collation->as_svg();
84 Text::Tradition is a library for representation and analysis of collated
85 texts, particularly medieval ones. The Collation is the central feature of
86 a Tradition, where the text, its sequence of readings, and its relationships
87 between readings are actually kept.
93 The constructor. Takes a hash or hashref of the following arguments:
97 =item * tradition - The Text::Tradition object to which the collation
100 =item * linear - Whether the collation should be linear; that is, whether
101 transposed readings should be treated as two linked readings rather than one,
102 and therefore whether the collation graph is acyclic. Defaults to true.
104 =item * baselabel - The default label for the path taken by a base text
105 (if any). Defaults to 'base text'.
107 =item * wit_list_separator - The string to join a list of witnesses for
108 purposes of making labels in display graphs. Defaults to ', '.
110 =item * ac_label - The extra label to tack onto a witness sigil when
111 representing another layer of path for the given witness - that is, when
112 a text has more than one possible reading due to scribal corrections or
113 the like. Defaults to ' (a.c.)'.
115 =item * wordsep - The string used to separate words in the original text.
126 =head2 wit_list_separator
134 Simple accessors for collation attributes.
138 The meta-reading at the start of every witness path.
142 The meta-reading at the end of every witness path.
146 Returns all Reading objects in the graph.
148 =head2 reading( $id )
150 Returns the Reading object corresponding to the given ID.
152 =head2 add_reading( $reading_args )
154 Adds a new reading object to the collation.
155 See L<Text::Tradition::Collation::Reading> for the available arguments.
157 =head2 del_reading( $object_or_id )
159 Removes the given reading from the collation, implicitly removing its
160 paths and relationships.
162 =head2 merge_readings( $main, $second, $concatenate, $with_str )
164 Merges the $second reading into the $main one. If $concatenate is true, then
165 the merged node will carry the text of both readings, concatenated with either
166 $with_str (if specified) or a sensible default (the empty string if the
167 appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
169 The first two arguments may be either readings or reading IDs.
171 =head2 has_reading( $id )
173 Predicate to see whether a given reading ID is in the graph.
175 =head2 reading_witnesses( $object_or_id )
177 Returns a list of sigils whose witnesses contain the reading.
181 Returns all reading paths within the document - that is, all edges in the
182 collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
184 =head2 add_path( $source, $target, $sigil )
186 Links the given readings in the collation in sequence, under the given witness
187 sigil. The readings may be specified by object or ID.
189 =head2 del_path( $source, $target, $sigil )
191 Links the given readings in the collation in sequence, under the given witness
192 sigil. The readings may be specified by object or ID.
194 =head2 has_path( $source, $target );
196 Returns true if the two readings are linked in sequence in any witness.
197 The readings may be specified by object or ID.
201 Returns all Relationship objects in the collation.
203 =head2 add_relationship( $reading, $other_reading, $options )
205 Adds a new relationship of the type given in $options between the two readings,
206 which may be specified by object or ID. Returns a value of ( $status, @vectors)
207 where $status is true on success, and @vectors is a list of relationship edges
208 that were ultimately added.
209 See L<Text::Tradition::Collation::Relationship> for the available options.
214 my ( $class, @args ) = @_;
215 my %args = @args == 1 ? %{ $args[0] } : @args;
216 # TODO determine these from the Moose::Meta object
217 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218 linear wordsep start end cached_table _graphcalc_done);
220 for my $attr (@delegate_attrs) {
221 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
223 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
229 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
230 $self->_set_start( $self->add_reading(
231 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232 $self->_set_end( $self->add_reading(
233 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
236 sub register_relationship_type {
238 my %args = @_ == 1 ? %{$_[0]} : @_;
239 if( $self->relations->has_type( $args{name} ) ) {
240 throw( 'Relationship type ' . $args{name} . ' already registered' );
242 $self->relations->add_type( %args );
245 ### Reading construct/destruct functions
248 my( $self, $reading ) = @_;
249 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
250 my %args = %$reading;
251 if( $args{'init'} ) {
252 # If we are initializing an empty collation, don't assume that we
253 # have set a tradition.
254 delete $args{'init'};
255 } elsif( $self->tradition->can('language') && $self->tradition->has_language
256 && !exists $args{'language'} ) {
257 $args{'language'} = $self->tradition->language;
259 $reading = Text::Tradition::Collation::Reading->new(
260 'collation' => $self,
263 # First check to see if a reading with this ID exists.
264 if( $self->reading( $reading->id ) ) {
265 throw( "Collation already has a reading with id " . $reading->id );
267 $self->_graphcalc_done(0);
268 $self->_add_reading( $reading->id => $reading );
269 # Once the reading has been added, put it in both graphs.
270 $self->sequence->add_vertex( $reading->id );
271 $self->relations->add_reading( $reading->id );
275 around del_reading => sub {
280 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
283 # Remove the reading from the graphs.
284 $self->_graphcalc_done(0);
285 $self->_clear_cache; # Explicitly clear caches to GC the reading
286 $self->sequence->delete_vertex( $arg );
287 $self->relations->delete_reading( $arg );
290 $self->$orig( $arg );
297 my $cxfile = 't/data/Collatex-16.xml';
298 my $t = Text::Tradition->new(
300 'input' => 'CollateX',
303 my $c = $t->collation;
305 my $rno = scalar $c->readings;
306 # Split n21 for testing purposes
307 my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
308 my $old_r = $c->reading( 'n21' );
309 $old_r->alter_text( 'to' );
310 $c->del_path( 'n20', 'n21', 'A' );
311 $c->add_path( 'n20', 'n21p0', 'A' );
312 $c->add_path( 'n21p0', 'n21', 'A' );
314 ok( $c->reading( 'n21p0' ), "New reading exists" );
315 is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
317 # Combine n3 and n4 ( with his )
318 $c->merge_readings( 'n3', 'n4', 1 );
319 ok( !$c->reading('n4'), "Reading n4 is gone" );
320 is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
322 # Collapse n9 and n10 ( rood / root )
323 $c->merge_readings( 'n9', 'n10' );
324 ok( !$c->reading('n10'), "Reading n10 is gone" );
325 is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
327 # Combine n21 and n21p0
328 my $remaining = $c->reading('n21');
329 $remaining ||= $c->reading('n22'); # one of these should still exist
330 $c->merge_readings( 'n21p0', $remaining, 1 );
331 ok( !$c->reading('n21'), "Reading $remaining is gone" );
332 is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
342 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
343 my $mergemeta = $kept_obj->is_meta;
344 throw( "Cannot merge meta and non-meta reading" )
345 unless ( $mergemeta && $del_obj->is_meta )
346 || ( !$mergemeta && !$del_obj->is_meta );
348 throw( "Cannot merge with start or end node" )
349 if( $kept_obj eq $self->start || $kept_obj eq $self->end
350 || $del_obj eq $self->start || $del_obj eq $self->end );
351 throw( "Cannot combine text of meta readings" ) if $combine;
353 # We only need the IDs for adding paths to the graph, not the reading
354 # objects themselves.
355 my $kept = $kept_obj->id;
356 my $deleted = $del_obj->id;
357 $self->_graphcalc_done(0);
359 # The kept reading should inherit the paths and the relationships
360 # of the deleted reading.
361 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
362 my @vector = ( $kept );
363 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
364 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
365 next if $vector[0] eq $vector[1]; # Don't add a self loop
366 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
367 $self->sequence->add_edge( @vector );
368 my $fwits = $self->sequence->get_edge_attributes( @vector );
369 @wits{keys %$fwits} = values %$fwits;
370 $self->sequence->set_edge_attributes( @vector, \%wits );
372 $self->relations->merge_readings( $kept, $deleted, $combine );
374 # Do the deletion deed.
376 # Combine the text of the readings
377 my $joinstr = $combine_char;
378 unless( defined $joinstr ) {
379 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
380 $joinstr = $self->wordsep unless defined $joinstr;
382 $kept_obj->_combine( $del_obj, $joinstr );
384 $self->del_reading( $deleted );
387 =head2 compress_readings
389 Where possible in the graph, compresses plain sequences of readings into a
390 single reading. The sequences must consist of readings with no
391 relationships to other readings, with only a single witness path between
392 them and no other witness paths from either that would skip the other. The
393 readings must also not be marked as nonsense or bad grammar.
395 WARNING: This operation cannot be undone.
399 sub compress_readings {
401 # Anywhere in the graph that there is a reading that joins only to a single
402 # successor, and neither of these have any relationships, just join the two
404 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
405 # Now look for readings that can be joined to their successors.
406 next unless $rdg->is_combinable;
408 while( $self->sequence->successors( $rdg ) == 1 ) {
409 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
410 throw( "Infinite loop" ) if $seen{$next->id};
411 $seen{$next->id} = 1;
412 last if $self->sequence->predecessors( $next ) > 1;
413 last unless $next->is_combinable;
414 say "Joining readings $rdg and $next";
415 $self->merge_readings( $rdg, $next, 1 );
418 # Make sure we haven't screwed anything up
419 foreach my $wit ( $self->tradition->witnesses ) {
420 my $pathtext = $self->path_text( $wit->sigil );
421 my $origtext = join( ' ', @{$wit->text} );
422 throw( "Text differs for witness " . $wit->sigil )
423 unless $pathtext eq $origtext;
424 if( $wit->is_layered ) {
425 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
426 $origtext = join( ' ', @{$wit->layertext} );
427 throw( "Ante-corr text differs for witness " . $wit->sigil )
428 unless $pathtext eq $origtext;
432 $self->relations->rebuild_equivalence();
433 $self->calculate_ranks();
436 # Helper function for manipulating the graph.
437 sub _stringify_args {
438 my( $self, $first, $second, @args ) = @_;
440 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
441 $second = $second->id
442 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
443 return( $first, $second, @args );
446 # Helper function for manipulating the graph.
447 sub _objectify_args {
448 my( $self, $first, $second, $arg ) = @_;
449 $first = $self->reading( $first )
450 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
451 $second = $self->reading( $second )
452 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
453 return( $first, $second, $arg );
460 # We only need the IDs for adding paths to the graph, not the reading
461 # objects themselves.
462 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
464 $self->_graphcalc_done(0);
465 # Connect the readings
466 unless( $self->sequence->has_edge( $source, $target ) ) {
467 $self->sequence->add_edge( $source, $target );
468 $self->relations->add_equivalence_edge( $source, $target );
470 # Note the witness in question
471 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
477 if( ref( $_[0] ) eq 'ARRAY' ) {
484 # We only need the IDs for adding paths to the graph, not the reading
485 # objects themselves.
486 my( $source, $target, $wit ) = $self->_stringify_args( @args );
488 $self->_graphcalc_done(0);
489 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
490 $self->sequence->delete_edge_attribute( $source, $target, $wit );
492 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
493 $self->sequence->delete_edge( $source, $target );
494 $self->relations->delete_equivalence_edge( $source, $target );
499 # Extra graph-alike utility
502 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
503 return undef unless $self->sequence->has_edge( $source, $target );
504 return $self->sequence->has_edge_attribute( $source, $target, $wit );
507 =head2 clear_witness( @sigil_list )
509 Clear the given witnesses out of the collation entirely, removing references
510 to them in paths, and removing readings that belong only to them. Should only
511 be called via $tradition->del_witness.
516 my( $self, @sigils ) = @_;
518 $self->_graphcalc_done(0);
519 # Clear the witness(es) out of the paths
520 foreach my $e ( $self->paths ) {
521 foreach my $sig ( @sigils ) {
522 $self->del_path( $e, $sig );
526 # Clear out the newly unused readings
527 foreach my $r ( $self->readings ) {
528 unless( $self->reading_witnesses( $r ) ) {
529 $self->del_reading( $r );
534 sub add_relationship {
536 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
537 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
538 foreach my $v ( @vectors ) {
539 next unless $self->get_relationship( $v )->colocated;
540 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
541 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
542 $self->_graphcalc_done(0);
550 around qw/ get_relationship del_relationship / => sub {
554 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
557 my( $source, $target ) = $self->_stringify_args( @args );
558 $self->$orig( $source, $target );
561 =head2 reading_witnesses( $reading )
563 Return a list of sigils corresponding to the witnesses in which the reading appears.
567 sub reading_witnesses {
568 my( $self, $reading ) = @_;
569 # We need only check either the incoming or the outgoing edges; I have
570 # arbitrarily chosen "incoming". Thus, special-case the start node.
571 if( $reading eq $self->start ) {
572 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
575 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
576 my $wits = $self->sequence->get_edge_attributes( @$e );
577 @all_witnesses{ keys %$wits } = 1;
579 my $acstr = $self->ac_label;
580 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
581 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
583 return keys %all_witnesses;
586 =head1 OUTPUT METHODS
588 =head2 as_svg( \%options )
590 Returns an SVG string that represents the graph, via as_dot and graphviz.
591 See as_dot for a list of options. Must have GraphViz (dot) installed to run.
596 my( $self, $opts ) = @_;
597 throw( "Need GraphViz installed to output SVG" )
598 unless File::Which::which( 'dot' );
599 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
600 $self->calculate_ranks()
601 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
602 my @cmd = qw/dot -Tsvg/;
604 my $dotfile = File::Temp->new();
606 # $dotfile->unlink_on_destroy(0);
607 binmode $dotfile, ':utf8';
608 print $dotfile $self->as_dot( $opts );
609 push( @cmd, $dotfile->filename );
610 run( \@cmd, ">", binary(), \$svg );
611 $svg = decode_utf8( $svg );
616 =head2 as_dot( \%options )
618 Returns a string that is the collation graph expressed in dot
619 (i.e. GraphViz) format. Options include:
634 my( $self, $opts ) = @_;
635 my $startrank = $opts->{'from'} if $opts;
636 my $endrank = $opts->{'to'} if $opts;
637 my $color_common = $opts->{'color_common'} if $opts;
638 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
639 && $self->end->rank > 100;
640 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
642 # Check the arguments
644 return if $endrank && $startrank > $endrank;
645 return if $startrank > $self->end->rank;
647 if( defined $endrank ) {
648 return if $endrank < 0;
649 $endrank = undef if $endrank == $self->end->rank;
652 my $graph_name = $self->tradition->name;
653 $graph_name =~ s/[^\w\s]//g;
654 $graph_name = join( '_', split( /\s+/, $graph_name ) );
662 'fillcolor' => 'white',
667 'arrowhead' => 'open',
668 'color' => '#000000',
669 'fontcolor' => '#000000',
672 my $dot = sprintf( "digraph %s {\n", $graph_name );
673 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
674 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
676 # Output substitute start/end readings if necessary
678 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
681 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
683 if( $STRAIGHTENHACK ) {
685 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
686 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
687 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
689 my %used; # Keep track of the readings that actually appear in the graph
690 # Sort the readings by rank if we have ranks; this speeds layout.
691 my @all_readings = $self->end->has_rank
692 ? sort { $a->rank <=> $b->rank } $self->readings
694 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
695 foreach my $reading ( @all_readings ) {
696 # Only output readings within our rank range.
697 next if $startrank && $reading->rank < $startrank;
698 next if $endrank && $reading->rank > $endrank;
699 $used{$reading->id} = 1;
700 # Need not output nodes without separate labels
701 next if $reading->id eq $reading->text;
703 my $label = $reading->text;
704 $label .= '-' if $reading->join_next;
705 $label = "-$label" if $reading->join_prior;
706 $label =~ s/\"/\\\"/g;
707 $rattrs->{'label'} = $label;
708 $rattrs->{'id'} = $reading->id;
709 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
710 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
713 # Add the real edges. Need to weight one edge per rank jump, in a
715 # my $weighted = $self->_add_edge_weights;
716 my @edges = $self->paths;
717 my( %substart, %subend );
718 foreach my $edge ( @edges ) {
719 # Do we need to output this edge?
720 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
721 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
722 my $variables = { %edge_attrs, 'label' => $label };
724 # Account for the rank gap if necessary
725 my $rank0 = $self->reading( $edge->[0] )->rank
726 if $self->reading( $edge->[0] )->has_rank;
727 my $rank1 = $self->reading( $edge->[1] )->rank
728 if $self->reading( $edge->[1] )->has_rank;
729 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
730 $variables->{'minlen'} = $rank1 - $rank0;
733 # Add the calculated edge weights
734 # if( exists $weighted->{$edge->[0]}
735 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
736 # # $variables->{'color'} = 'red';
737 # $variables->{'weight'} = 3.0;
740 # EXPERIMENTAL: make edge width reflect no. of witnesses
741 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
742 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
744 my $varopts = _dot_attr_string( $variables );
745 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
746 $edge->[0], $edge->[1], $varopts );
747 } elsif( $used{$edge->[0]} ) {
748 $subend{$edge->[0]} = $edge->[1];
749 } elsif( $used{$edge->[1]} ) {
750 $substart{$edge->[1]} = $edge->[0];
754 # If we are asked to, add relationship links
755 if( exists $opts->{show_relations} ) {
756 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
757 if( $filter eq 'transposition' ) {
758 $filter =~ qr/^transposition$/;
760 foreach my $redge ( $self->relationships ) {
761 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
762 if( $filter ne 'all' ) {
763 my $rel = $self->get_relationship( $redge );
764 next unless $rel->type =~ /$filter/;
768 constraint => 'false',
769 label => uc( substr( $rel->type, 0, 4 ) ),
772 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
773 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
779 # Add substitute start and end edges if necessary
780 foreach my $node ( keys %substart ) {
781 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
782 my $variables = { %edge_attrs, 'label' => $witstr };
783 my $nrdg = $self->reading( $node );
784 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
785 # Substart is actually one lower than $startrank
786 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
788 my $varopts = _dot_attr_string( $variables );
789 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
791 foreach my $node ( keys %subend ) {
792 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
793 my $variables = { %edge_attrs, 'label' => $witstr };
794 my $varopts = _dot_attr_string( $variables );
795 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
798 if( $STRAIGHTENHACK ) {
799 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
800 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
807 sub _dot_attr_string {
810 foreach my $k ( sort keys %$hash ) {
812 push( @attrs, $k.'="'.$v.'"' );
814 return( '[ ' . join( ', ', @attrs ) . ' ]' );
817 sub _add_edge_weights {
819 # Walk the graph from START to END, choosing the successor node with
820 # the largest number of witness paths each time.
822 my $curr = $self->start->id;
823 my $ranked = $self->end->has_rank;
824 while( $curr ne $self->end->id ) {
825 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
826 my @succ = sort { $self->path_witnesses( $curr, $a )
827 <=> $self->path_witnesses( $curr, $b ) }
828 $self->sequence->successors( $curr );
829 my $next = pop @succ;
830 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
831 # Try to avoid lacunae in the weighted path.
833 ( $self->reading( $next )->is_lacuna ||
834 $nextrank - $rank > 1 ) ){
837 $weighted->{$curr} = $next;
843 =head2 path_witnesses( $edge )
845 Returns the list of sigils whose witnesses are associated with the given edge.
846 The edge can be passed as either an array or an arrayref of ( $source, $target ).
851 my( $self, @edge ) = @_;
852 # If edge is an arrayref, cope.
853 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
857 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
861 # Helper function. Make a display label for the given witnesses, showing a.c.
862 # witnesses only where the main witness is not also in the list.
863 sub _path_display_label {
866 map { $wits{$_} = 1 } @_;
868 # If an a.c. wit is listed, remove it if the main wit is also listed.
869 # Otherwise keep it for explicit listing.
870 my $aclabel = $self->ac_label;
872 foreach my $w ( sort keys %wits ) {
873 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
874 if( exists $wits{$1} ) {
877 push( @disp_ac, $w );
882 # See if we are in a majority situation.
883 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
884 $maj = $maj > 5 ? $maj : 5;
885 if( scalar keys %wits > $maj ) {
886 unshift( @disp_ac, 'majority' );
887 return join( ', ', @disp_ac );
889 return join( ', ', sort keys %wits );
893 =head2 readings_at_rank( $rank )
895 Returns a list of readings at a given rank, taken from the alignment table.
899 sub readings_at_rank {
900 my( $self, $rank ) = @_;
901 my $table = $self->alignment_table;
902 # Table rank is real rank - 1.
903 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
905 foreach my $e ( @elements ) {
906 next unless ref( $e ) eq 'HASH';
907 next unless exists $e->{'t'};
908 $readings{$e->{'t'}->id} = $e->{'t'};
910 return values %readings;
915 Returns a GraphML representation of the collation. The GraphML will contain
916 two graphs. The first expresses the attributes of the readings and the witness
917 paths that link them; the second expresses the relationships that link the
918 readings. This is the native transfer format for a tradition.
928 my $datafile = 't/data/florilegium_tei_ps.xml';
929 my $tradition = Text::Tradition->new( 'input' => 'TEI',
934 ok( $tradition, "Got a tradition object" );
935 is( scalar $tradition->witnesses, 13, "Found all witnesses" );
936 ok( $tradition->collation, "Tradition has a collation" );
938 my $c = $tradition->collation;
939 is( scalar $c->readings, $READINGS, "Collation has all readings" );
940 is( scalar $c->paths, $PATHS, "Collation has all paths" );
941 is( scalar $c->relationships, 0, "Collation has all relationships" );
943 # Add a few relationships
944 $c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
945 $c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
946 $c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
948 # Now write it to GraphML and parse it again.
950 my $graphml = $c->as_graphml;
951 my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
952 is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
953 is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
954 is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
956 # Now add a stemma, write to GraphML, and look at the output.
958 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
959 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
960 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
961 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
962 $graphml = $c->as_graphml;
963 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
970 ## TODO MOVE this to Tradition.pm and modularize it better
972 my( $self, $options ) = @_;
973 $self->calculate_ranks unless $self->_graphcalc_done;
975 my $start = $options->{'from'}
976 ? $self->reading( $options->{'from'} ) : $self->start;
977 my $end = $options->{'to'}
978 ? $self->reading( $options->{'to'} ) : $self->end;
979 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
980 throw( 'Start node must be before end node' );
982 # The readings need to be ranked for this to work.
983 $start = $self->start unless $start->has_rank;
984 $end = $self->end unless $end->has_rank;
986 unless( $start eq $self->start ) {
987 $rankoffset = $start->rank - 1;
992 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
993 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
994 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
995 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
997 # Create the document and root node
999 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1000 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1001 $graphml->setDocumentElement( $root );
1002 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1003 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
1005 # List of attribute types to save on our objects and their corresponding
1010 'Bool' => 'boolean',
1011 'ReadingID' => 'string',
1012 'RelationshipType' => 'string',
1013 'RelationshipScope' => 'string',
1016 # Add the data keys for the graph. Include an extra key 'version' for the
1017 # GraphML output version.
1018 my %graph_data_keys;
1020 my %graph_attributes = ( 'version' => 'string' );
1021 # Graph attributes include those of Tradition and those of Collation.
1023 my $tmeta = $self->tradition->meta;
1024 my $cmeta = $self->meta;
1025 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1026 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1027 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1028 next if $attr->name =~ /^_/;
1029 next unless $save_types{$attr->type_constraint->name};
1030 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1032 # Extra custom keys for complex objects that should be saved in some form.
1033 # The subroutine should return a string, or undef/empty.
1034 if( $tmeta->has_method('stemmata') ) {
1035 $graph_attributes{'stemmata'} = sub {
1037 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1038 $self->tradition->stemmata;
1039 join( "\n", @stemstrs );
1043 if( $tmeta->has_method('user') ) {
1044 $graph_attributes{'user'} = sub {
1045 $self->tradition->user ? $self->tradition->user->id : undef
1049 foreach my $datum ( sort keys %graph_attributes ) {
1050 $graph_data_keys{$datum} = 'dg'.$gdi++;
1051 my $key = $root->addNewChild( $graphml_ns, 'key' );
1052 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1053 : $graph_attributes{$datum};
1054 $key->setAttribute( 'attr.name', $datum );
1055 $key->setAttribute( 'attr.type', $dtype );
1056 $key->setAttribute( 'for', 'graph' );
1057 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1060 # Add the data keys for reading nodes
1061 my %reading_attributes;
1062 my $rmeta = Text::Tradition::Collation::Reading->meta;
1063 foreach my $attr( $rmeta->get_all_attributes ) {
1064 next if $attr->name =~ /^_/;
1065 next unless $save_types{$attr->type_constraint->name};
1066 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1068 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1069 # Extra custom key for the reading morphology
1070 $reading_attributes{'lexemes'} = 'string';
1075 foreach my $datum ( sort keys %reading_attributes ) {
1076 $node_data_keys{$datum} = 'dn'.$ndi++;
1077 my $key = $root->addNewChild( $graphml_ns, 'key' );
1078 $key->setAttribute( 'attr.name', $datum );
1079 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
1080 $key->setAttribute( 'for', 'node' );
1081 $key->setAttribute( 'id', $node_data_keys{$datum} );
1084 # Add the data keys for edges, that is, paths and relationships. Path
1085 # data does not come from a Moose class so is here manually.
1088 my %edge_attributes = (
1089 witness => 'string', # ID/label for a path
1090 extra => 'boolean', # Path key
1092 my @path_attributes = keys %edge_attributes; # track our manual additions
1093 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1094 foreach my $attr( $pmeta->get_all_attributes ) {
1095 next if $attr->name =~ /^_/;
1096 next unless $save_types{$attr->type_constraint->name};
1097 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1099 foreach my $datum ( sort keys %edge_attributes ) {
1100 $edge_data_keys{$datum} = 'de'.$edi++;
1101 my $key = $root->addNewChild( $graphml_ns, 'key' );
1102 $key->setAttribute( 'attr.name', $datum );
1103 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
1104 $key->setAttribute( 'for', 'edge' );
1105 $key->setAttribute( 'id', $edge_data_keys{$datum} );
1108 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1109 my $xmlidname = $self->tradition->name;
1110 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1111 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1112 $xmlidname = '_'.$xmlidname;
1114 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1115 $sgraph->setAttribute( 'edgedefault', 'directed' );
1116 $sgraph->setAttribute( 'id', $xmlidname );
1117 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1118 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
1119 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1120 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
1121 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
1123 # Tradition/collation attribute data
1124 foreach my $datum ( keys %graph_attributes ) {
1126 if( $datum eq 'version' ) {
1128 } elsif( ref( $graph_attributes{$datum} ) ) {
1129 my $sub = $graph_attributes{$datum};
1131 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1132 $value = $self->tradition->$datum;
1134 $value = $self->$datum;
1136 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
1141 # Add our readings to the graph
1142 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
1143 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1144 ( $n->rank < $start->rank || $n->rank > $end->rank );
1145 $use_readings{$n->id} = 1;
1146 # Add to the main graph
1147 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
1148 my $node_xmlid = 'n' . $node_ctr++;
1149 $node_hash{ $n->id } = $node_xmlid;
1150 $node_el->setAttribute( 'id', $node_xmlid );
1151 foreach my $d ( keys %reading_attributes ) {
1153 # Custom serialization
1154 if( $d eq 'lexemes' ) {
1155 # If nval is a true value, we have lexemes so we need to
1156 # serialize them. Otherwise set nval to undef so that the
1157 # key is excluded from this reading.
1158 $nval = $nval ? $n->_serialize_lexemes : undef;
1159 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1162 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
1163 # Adjust the ranks within the subgraph.
1164 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1165 : $nval - $rankoffset;
1167 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1172 # Add the path edges to the sequence graph
1174 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1175 # We add an edge in the graphml for every witness in $e.
1176 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1177 my @edge_wits = sort $self->path_witnesses( $e );
1178 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1179 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1180 # Skip any path from start to end; that witness is not in the subgraph.
1181 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
1182 foreach my $wit ( @edge_wits ) {
1183 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1184 $node_hash{ $e->[0] },
1185 $node_hash{ $e->[1] } );
1186 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
1187 $edge_el->setAttribute( 'source', $from );
1188 $edge_el->setAttribute( 'target', $to );
1189 $edge_el->setAttribute( 'id', $id );
1191 # It's a witness path, so add the witness
1193 my $key = $edge_data_keys{'witness'};
1194 # Is this an ante-corr witness?
1195 my $aclabel = $self->ac_label;
1196 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1197 # Keep the base witness
1199 # ...and record that this is an 'extra' reading path
1200 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1202 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1206 # Report the actual number of nodes and edges that went in
1207 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1208 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1210 # Add the relationship graph to the XML
1211 map { delete $edge_data_keys{$_} } @path_attributes;
1212 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1213 $node_data_keys{'id'}, \%edge_data_keys );
1215 # Save and return the thing
1216 my $result = decode_utf8( $graphml->toString(1) );
1220 sub _add_graphml_data {
1221 my( $el, $key, $value ) = @_;
1222 return unless defined $value;
1223 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1224 $data_el->setAttribute( 'key', $key );
1225 $data_el->appendText( $value );
1230 Returns a CSV alignment table representation of the collation graph, one
1231 row per witness (or witness uncorrected.)
1237 my $table = $self->alignment_table;
1238 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
1240 # Make the header row
1241 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1242 push( @result, decode_utf8( $csv->string ) );
1243 # Make the rest of the rows
1244 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1245 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1246 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
1247 $csv->combine( @row );
1248 push( @result, decode_utf8( $csv->string ) );
1250 return join( "\n", @result );
1253 =head2 alignment_table
1255 Return a reference to an alignment table, in a slightly enhanced CollateX
1256 format which looks like this:
1258 $table = { alignment => [ { witness => "SIGIL",
1259 tokens => [ { t => "TEXT" }, ... ] },
1260 { witness => "SIG2",
1261 tokens => [ { t => "TEXT" }, ... ] },
1263 length => TEXTLEN };
1267 sub alignment_table {
1269 return $self->cached_table if $self->has_cached_table;
1271 # Make sure we can do this
1272 throw( "Need a linear graph in order to make an alignment table" )
1273 unless $self->linear;
1274 $self->calculate_ranks()
1275 unless $self->_graphcalc_done && $self->end->has_rank;
1277 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
1278 my @all_pos = ( 1 .. $self->end->rank - 1 );
1279 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
1280 # say STDERR "Making witness row(s) for " . $wit->sigil;
1281 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1282 my @row = _make_witness_row( \@wit_path, \@all_pos );
1283 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1284 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1285 push( @{$table->{'alignment'}}, $witobj );
1286 if( $wit->is_layered ) {
1287 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
1288 $wit->sigil.$self->ac_label );
1289 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
1290 my $witacobj = { 'witness' => $wit->sigil.$self->ac_label,
1291 'tokens' => \@ac_row };
1292 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1293 push( @{$table->{'alignment'}}, $witacobj );
1296 $self->cached_table( $table );
1300 sub _make_witness_row {
1301 my( $path, $positions ) = @_;
1303 map { $char_hash{$_} = undef } @$positions;
1305 foreach my $rdg ( @$path ) {
1306 say STDERR "rank " . $rdg->rank if $debug;
1307 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1308 $char_hash{$rdg->rank} = { 't' => $rdg };
1310 my @row = map { $char_hash{$_} } @$positions;
1311 # Fill in lacuna markers for undef spots in the row
1312 my $last_el = shift @row;
1313 my @filled_row = ( $last_el );
1314 foreach my $el ( @row ) {
1315 # If we are using node reference, make the lacuna node appear many times
1316 # in the table. If not, use the lacuna tag.
1317 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1320 push( @filled_row, $el );
1327 =head1 NAVIGATION METHODS
1329 =head2 reading_sequence( $first, $last, $sigil, $backup )
1331 Returns the ordered list of readings, starting with $first and ending
1332 with $last, for the witness given in $sigil. If a $backup sigil is
1333 specified (e.g. when walking a layered witness), it will be used wherever
1334 no $sigil path exists. If there is a base text reading, that will be
1335 used wherever no path exists for $sigil or $backup.
1339 # TODO Think about returning some lazy-eval iterator.
1340 # TODO Get rid of backup; we should know from what witness is whether we need it.
1342 sub reading_sequence {
1343 my( $self, $start, $end, $witness ) = @_;
1345 $witness = $self->baselabel unless $witness;
1346 my @readings = ( $start );
1349 while( $n && $n->id ne $end->id ) {
1350 if( exists( $seen{$n->id} ) ) {
1351 throw( "Detected loop for $witness at " . $n->id );
1355 my $next = $self->next_reading( $n, $witness );
1357 throw( "Did not find any path for $witness from reading " . $n->id );
1359 push( @readings, $next );
1362 # Check that the last reading is our end reading.
1363 my $last = $readings[$#readings];
1364 throw( "Last reading found from " . $start->text .
1365 " for witness $witness is not the end!" ) # TODO do we get this far?
1366 unless $last->id eq $end->id;
1371 =head2 next_reading( $reading, $sigil );
1373 Returns the reading that follows the given reading along the given witness
1379 # Return the successor via the corresponding path.
1381 my $answer = $self->_find_linked_reading( 'next', @_ );
1382 return undef unless $answer;
1383 return $self->reading( $answer );
1386 =head2 prior_reading( $reading, $sigil )
1388 Returns the reading that precedes the given reading along the given witness
1394 # Return the predecessor via the corresponding path.
1396 my $answer = $self->_find_linked_reading( 'prior', @_ );
1397 return $self->reading( $answer );
1400 sub _find_linked_reading {
1401 my( $self, $direction, $node, $path ) = @_;
1403 # Get a backup if we are dealing with a layered witness
1405 my $aclabel = $self->ac_label;
1406 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1410 my @linked_paths = $direction eq 'next'
1411 ? $self->sequence->edges_from( $node )
1412 : $self->sequence->edges_to( $node );
1413 return undef unless scalar( @linked_paths );
1415 # We have to find the linked path that contains all of the
1416 # witnesses supplied in $path.
1417 my( @path_wits, @alt_path_wits );
1418 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1419 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
1422 foreach my $le ( @linked_paths ) {
1423 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
1426 my @le_wits = sort $self->path_witnesses( $le );
1427 if( _is_within( \@path_wits, \@le_wits ) ) {
1428 # This is the right path.
1429 return $direction eq 'next' ? $le->[1] : $le->[0];
1430 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1434 # Got this far? Return the alternate path if it exists.
1435 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
1438 # Got this far? Return the base path if it exists.
1439 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
1442 # Got this far? We have no appropriate path.
1443 warn "Could not find $direction node from " . $node->id
1444 . " along path $path";
1450 my( $set1, $set2 ) = @_;
1451 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
1452 foreach my $el ( @$set1 ) {
1453 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
1458 # Return the string that joins together a list of witnesses for
1459 # display on a single path.
1460 sub _witnesses_of_label {
1461 my( $self, $label ) = @_;
1462 my $regex = $self->wit_list_separator;
1463 my @answer = split( /\Q$regex\E/, $label );
1467 =head2 common_readings
1469 Returns the list of common readings in the graph (i.e. those readings that are
1470 shared by all non-lacunose witnesses.)
1474 sub common_readings {
1476 my @common = grep { $_->is_common } $self->readings;
1480 =head2 path_text( $sigil, [, $start, $end ] )
1482 Returns the text of a witness (plus its backup, if we are using a layer)
1483 as stored in the collation. The text is returned as a string, where the
1484 individual readings are joined with spaces and the meta-readings (e.g.
1485 lacunae) are omitted. Optional specification of $start and $end allows
1486 the generation of a subset of the witness text.
1491 my( $self, $wit, $start, $end ) = @_;
1492 $start = $self->start unless $start;
1493 $end = $self->end unless $end;
1494 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
1497 foreach my $r ( @path ) {
1498 unless ( $r->join_prior || !$last || $last->join_next ) {
1501 $pathtext .= $r->text;
1507 =head1 INITIALIZATION METHODS
1509 These are mostly for use by parsers.
1511 =head2 make_witness_path( $witness )
1513 Link the array of readings contained in $witness->path (and in
1514 $witness->uncorrected_path if it exists) into collation paths.
1515 Clear out the arrays when finished.
1517 =head2 make_witness_paths
1519 Call make_witness_path for all witnesses in the tradition.
1523 # For use when a collation is constructed from a base text and an apparatus.
1524 # We have the sequences of readings and just need to add path edges.
1525 # When we are done, clear out the witness path attributes, as they are no
1527 # TODO Find a way to replace the witness path attributes with encapsulated functions?
1529 sub make_witness_paths {
1531 foreach my $wit ( $self->tradition->witnesses ) {
1532 # say STDERR "Making path for " . $wit->sigil;
1533 $self->make_witness_path( $wit );
1537 sub make_witness_path {
1538 my( $self, $wit ) = @_;
1539 my @chain = @{$wit->path};
1540 my $sig = $wit->sigil;
1541 # Add start and end if necessary
1542 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1543 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1544 foreach my $idx ( 0 .. $#chain-1 ) {
1545 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
1547 if( $wit->is_layered ) {
1548 @chain = @{$wit->uncorrected_path};
1549 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1550 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
1551 foreach my $idx( 0 .. $#chain-1 ) {
1552 my $source = $chain[$idx];
1553 my $target = $chain[$idx+1];
1554 $self->add_path( $source, $target, $sig.$self->ac_label )
1555 unless $self->has_path( $source, $target, $sig );
1559 $wit->clear_uncorrected_path;
1562 =head2 calculate_ranks
1564 Calculate the reading ranks (that is, their aligned positions relative
1565 to each other) for the graph. This can only be called on linear collations.
1569 use Text::Tradition;
1571 my $cxfile = 't/data/Collatex-16.xml';
1572 my $t = Text::Tradition->new(
1574 'input' => 'CollateX',
1577 my $c = $t->collation;
1580 my $table = $c->alignment_table;
1581 ok( $c->has_cached_table, "Alignment table was cached" );
1582 is( $c->alignment_table, $table, "Cached table returned upon second call" );
1583 $c->calculate_ranks;
1584 is( $c->alignment_table, $table, "Cached table retained with no rank change" );
1585 $c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
1586 is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
1587 $c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
1588 isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
1594 sub calculate_ranks {
1596 # Save the existing ranks, in case we need to invalidate the cached SVG.
1598 map { $existing_ranks{$_} = $_->rank } $self->readings;
1600 # Do the rankings based on the relationship equivalence graph, starting
1601 # with the start node.
1602 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1604 # Transfer our rankings from the topological graph to the real one.
1605 foreach my $r ( $self->readings ) {
1606 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1607 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
1609 # Die. Find the last rank we calculated.
1610 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1611 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
1613 my $last = pop @all_defined;
1614 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
1617 # Do we need to invalidate the cached data?
1618 if( $self->has_cached_table ) {
1619 foreach my $r ( $self->readings ) {
1620 next if defined( $existing_ranks{$r} )
1621 && $existing_ranks{$r} == $r->rank;
1622 # Something has changed, so clear the cache
1623 $self->_clear_cache;
1624 # ...and recalculate the common readings.
1625 $self->calculate_common_readings();
1629 # The graph calculation information is now up to date.
1630 $self->_graphcalc_done(1);
1635 $self->wipe_table if $self->has_cached_table;
1639 =head2 flatten_ranks
1641 A convenience method for parsing collation data. Searches the graph for readings
1642 with the same text at the same rank, and merges any that are found.
1648 my %unique_rank_rdg;
1650 foreach my $rdg ( $self->readings ) {
1651 next unless $rdg->has_rank;
1652 my $key = $rdg->rank . "||" . $rdg->text;
1653 if( exists $unique_rank_rdg{$key} ) {
1654 # Make sure they don't have different grammatical forms
1655 my $ur = $unique_rank_rdg{$key};
1656 if( $rdg->is_identical( $ur ) ) {
1658 #say STDERR "Combining readings at same rank: $key";
1660 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1661 # TODO see if this now makes a common point.
1664 $unique_rank_rdg{$key} = $rdg;
1667 # If we merged readings, the ranks are still fine but the alignment
1668 # table is wrong. Wipe it.
1669 $self->wipe_table() if $changed;
1673 =head2 calculate_common_readings
1675 Goes through the graph identifying the readings that appear in every witness
1676 (apart from those with lacunae at that spot.) Marks them as common and returns
1681 use Text::Tradition;
1683 my $cxfile = 't/data/Collatex-16.xml';
1684 my $t = Text::Tradition->new(
1686 'input' => 'CollateX',
1689 my $c = $t->collation;
1691 my @common = $c->calculate_common_readings();
1692 is( scalar @common, 8, "Found correct number of common readings" );
1693 my @marked = sort $c->common_readings();
1694 is( scalar @common, 8, "All common readings got marked as such" );
1695 my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
1696 is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1702 sub calculate_common_readings {
1705 map { $_->is_common( 0 ) } $self->readings;
1706 # Implicitly calls calculate_ranks
1707 my $table = $self->alignment_table;
1708 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1709 my @row = map { $_->{'tokens'}->[$idx]
1710 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1711 @{$table->{'alignment'}};
1713 foreach my $r ( @row ) {
1715 $hash{$r->id} = $r unless $r->is_meta;
1717 $hash{'UNDEF'} = $r;
1720 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1721 my( $r ) = values %hash;
1723 push( @common, $r );
1729 =head2 text_from_paths
1731 Calculate the text array for all witnesses from the path, for later consistency
1732 checking. Only to be used if there is no non-graph-based way to know the
1737 sub text_from_paths {
1739 foreach my $wit ( $self->tradition->witnesses ) {
1740 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1742 foreach my $r ( @readings ) {
1743 next if $r->is_meta;
1744 push( @text, $r->text );
1746 $wit->text( \@text );
1747 if( $wit->is_layered ) {
1748 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1749 $wit->sigil.$self->ac_label );
1751 foreach my $r ( @ucrdgs ) {
1752 next if $r->is_meta;
1753 push( @uctext, $r->text );
1755 $wit->layertext( \@uctext );
1760 =head1 UTILITY FUNCTIONS
1762 =head2 common_predecessor( $reading_a, $reading_b )
1764 Find the last reading that occurs in sequence before both the given readings.
1765 At the very least this should be $self->start.
1767 =head2 common_successor( $reading_a, $reading_b )
1769 Find the first reading that occurs in sequence after both the given readings.
1770 At the very least this should be $self->end.
1774 use Text::Tradition;
1776 my $cxfile = 't/data/Collatex-16.xml';
1777 my $t = Text::Tradition->new(
1779 'input' => 'CollateX',
1782 my $c = $t->collation;
1784 is( $c->common_predecessor( 'n24', 'n23' )->id,
1785 'n20', "Found correct common predecessor" );
1786 is( $c->common_successor( 'n24', 'n23' )->id,
1787 '__END__', "Found correct common successor" );
1789 is( $c->common_predecessor( 'n19', 'n17' )->id,
1790 'n16', "Found correct common predecessor for readings on same path" );
1791 is( $c->common_successor( 'n21', 'n10' )->id,
1792 '__END__', "Found correct common successor for readings on same path" );
1798 ## Return the closest reading that is a predecessor of both the given readings.
1799 sub common_predecessor {
1801 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1802 return $self->_common_in_path( $r1, $r2, 'predecessors' );
1805 sub common_successor {
1807 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1808 return $self->_common_in_path( $r1, $r2, 'successors' );
1812 # TODO think about how to do this without ranks...
1813 sub _common_in_path {
1814 my( $self, $r1, $r2, $dir ) = @_;
1815 my $iter = $self->end->rank;
1817 my @last_r1 = ( $r1 );
1818 my @last_r2 = ( $r2 );
1819 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
1821 # say STDERR "Finding common $dir for $r1, $r2";
1822 while( !@candidates ) {
1823 last unless $iter--; # Avoid looping infinitely
1824 # Iterate separately down the graph from r1 and r2
1825 my( @new_lc1, @new_lc2 );
1826 foreach my $lc ( @last_r1 ) {
1827 foreach my $p ( $lc->$dir ) {
1828 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1829 # say STDERR "Path candidate $p from $lc";
1830 push( @candidates, $p );
1831 } elsif( !$all_seen{$p->id} ) {
1832 $all_seen{$p->id} = 'r1';
1833 push( @new_lc1, $p );
1837 foreach my $lc ( @last_r2 ) {
1838 foreach my $p ( $lc->$dir ) {
1839 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1840 # say STDERR "Path candidate $p from $lc";
1841 push( @candidates, $p );
1842 } elsif( !$all_seen{$p->id} ) {
1843 $all_seen{$p->id} = 'r2';
1844 push( @new_lc2, $p );
1848 @last_r1 = @new_lc1;
1849 @last_r2 = @new_lc2;
1851 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1852 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1856 Text::Tradition::Error->throw(
1857 'ident' => 'Collation error',
1863 __PACKAGE__->meta->make_immutable;
1869 =item * Rework XML serialization in a more modular way
1875 This package is free software and is provided "as is" without express
1876 or implied warranty. You can redistribute it and/or modify it under
1877 the same terms as Perl itself.
1881 Tara L Andrews E<lt>aurum@cpan.orgE<gt>