1 package Text::Tradition::Collation;
3 use Encode qw( decode_utf8 );
6 use IPC::Run qw( run binary );
8 use Text::Tradition::Collation::Reading;
15 default => sub { Graph->new() },
24 default => sub { Graph->new( undirected => 1 ) },
26 relationships => 'edges',
32 isa => 'Text::Tradition',
37 isa => 'HashRef[Text::Tradition::Collation::Reading]',
41 _add_reading => 'set',
42 del_reading => 'delete',
43 has_reading => 'exists',
46 default => sub { {} },
49 has 'wit_list_separator' => (
58 default => 'base text',
75 isa => 'Text::Tradition::Collation::Reading',
76 writer => '_set_start',
82 isa => 'Text::Tradition::Collation::Reading',
87 # The collation can be created two ways:
88 # 1. Collate a set of witnesses (with CollateX I guess) and process
89 # the results as in 2.
90 # 2. Read a pre-prepared collation in one of a variety of formats,
91 # and make the graph from that.
93 # The graph itself will (for now) be immutable, and the positions
94 # within the graph will also be immutable. We need to calculate those
95 # positions upon graph construction. The equivalences between graph
96 # nodes will be mutable, entirely determined by the user (or possibly
97 # by some semantic pre-processing provided by the user.) So the
98 # constructor should just make an empty equivalences object. The
99 # constructor will also need to make the witness objects, if we didn't
100 # come through option 1.
104 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
105 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
108 ### Reading construct/destruct functions
111 my( $self, $reading ) = @_;
112 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
113 my %args = %$reading;
114 $reading = Text::Tradition::Collation::Reading->new(
115 'collation' => $self,
118 # First check to see if a reading with this ID exists.
119 if( $self->reading( $reading->id ) ) {
120 warn "Collation already has a reading with id " . $reading->id;
123 $self->_add_reading( $reading->id => $reading );
124 # Once the reading has been added, put it in both graphs.
125 $self->sequence->add_vertex( $reading->id );
126 $self->relations->add_vertex( $reading->id );
130 around del_reading => sub {
135 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
138 # Remove the reading from the graphs.
139 $self->sequence->delete_vertex( $arg );
140 $self->relations->delete_vertex( $arg );
143 $self->$orig( $arg );
146 # merge_readings( $main, $to_be_deleted );
151 # We only need the IDs for adding paths to the graph, not the reading
152 # objects themselves.
153 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
155 # The kept reading should inherit the paths and the relationships
156 # of the deleted reading.
157 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
158 my @vector = ( $kept );
159 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
160 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
161 next if $vector[0] eq $vector[1]; # Don't add a self loop
162 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
163 $self->sequence->add_edge( @vector );
164 my $fwits = $self->sequence->get_edge_attributes( @vector );
165 @wits{keys %$fwits} = values %$fwits;
166 $self->sequence->set_edge_attributes( @vector, \%wits );
168 foreach my $rel ( $self->relations->edges_at( $deleted ) ) {
169 my @vector = ( $kept );
170 push( @vector, $rel->[0] eq $deleted ? $rel->[1] : $rel->[0] );
171 next if $vector[0] eq $vector[1]; # Don't add a self loop
172 # Is there a relationship here already? If so, keep it.
173 # TODO Warn about conflicting relationships
174 next if $self->relations->has_edge( @vector );
175 # If not, adopt the relationship that would be deleted.
176 $self->relations->add_edge( @vector );
177 my $attr = $self->relations->get_edge_attributes( @$rel );
178 $self->relations->set_edge_attributes( @vector, $attr );
181 # Do the deletion deed.
182 if( $combine_char ) {
183 my $kept_obj = $self->reading( $kept );
184 my $new_text = join( $combine_char, $kept_obj->text,
185 $self->reading( $deleted )->text );
186 $kept_obj->alter_text( $new_text );
188 $self->del_reading( $deleted );
192 # Helper function for manipulating the graph.
193 sub _stringify_args {
194 my( $self, $first, $second, $arg ) = @_;
196 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
197 $second = $second->id
198 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
199 return( $first, $second, $arg );
207 # We only need the IDs for adding paths to the graph, not the reading
208 # objects themselves.
209 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
211 # Connect the readings
212 $self->sequence->add_edge( $source, $target );
213 # Note the witness in question
214 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
220 if( ref( $_[0] ) eq 'ARRAY' ) {
227 # We only need the IDs for adding paths to the graph, not the reading
228 # objects themselves.
229 my( $source, $target, $wit ) = $self->_stringify_args( @args );
231 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
232 $self->sequence->delete_edge_attribute( $source, $target, $wit );
234 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
235 $self->sequence->delete_edge( $source, $target );
240 # Extra graph-alike utility
243 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
244 return undef unless $self->sequence->has_edge( $source, $target );
245 return $self->sequence->has_edge_attribute( $source, $target, $wit );
248 ### Relationship logic
250 =head2 add_relationship( $reading1, $reading2, $definition )
252 Adds the specified relationship between the two readings. A relationship
253 is transitive (i.e. undirected), and must have the following attributes
254 specified in the hashref $definition:
258 =item * type - Can be one of spelling, orthographic, grammatical, meaning, lexical, collated, repetition, transposition. All but the last two are only valid relationships between readings that occur at the same point in the text.
260 =item * non_correctable - (Optional) True if the reading would not have been corrected independently.
262 =item * non_independent - (Optional) True if the variant is unlikely to have occurred independently in unrelated witnesses.
264 =item * global - (Optional) A meta-attribute, to set the same relationship between readings with the same text whenever they occur in the same place.
270 # Wouldn't it be lovely if edges could be objects, and all this type checking
271 # and attribute management could be done via Moose?
273 sub add_relationship {
275 my( $source, $target, $options ) = $self->_stringify_args( @_ );
278 if( !defined $options->{'type'} ||
279 $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|lexical|collated|repetition|transposition)$/i ) {
280 my $t = $options->{'type'} ? $options->{'type'} : '';
281 return( undef, "Invalid or missing type " . $options->{'type'} );
283 unless ( $options->{'type'} =~ /^(repetition|transposition)$/ ) {
284 $options->{'colocated'} = 1;
287 # Make sure there is not another relationship between these two
289 if( $self->relations->has_edge( $source, $target ) ) {
290 return ( undef, "Relationship already exists between these readings" );
292 if( !$self->relationship_valid( $source, $target, $options->{'type'} ) ) {
293 return ( undef, 'Relationship creates witness loop' );
296 my @vector = ( $source, $target );
297 $self->relations->add_edge( @vector );
298 $self->relations->set_edge_attributes( @vector, $options );
300 # TODO Handle global relationship setting
302 return( 1, @vector );
305 sub relationship_valid {
306 my( $self, $source, $target, $rel ) = @_;
307 if( $rel eq 'repetition' ) {
309 } elsif ( $rel eq 'transposition' ) {
310 # Check that the two readings do not appear in the same witness.
312 map { $seen_wits{$_} = 1 } $self->reading_witnesses( $source );
313 foreach my $w ( $self->reading_witnesses( $target ) ) {
314 return 0 if $seen_wits{$w};
318 # Check that linking the source and target in a relationship won't lead
319 # to a path loop for any witness. First make a lookup table of all the
320 # readings related to either the source or the target.
321 my @proposed_related = ( $source, $target );
322 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
323 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
325 map { $pr_ids{ $_ } = 1 } @proposed_related;
327 # None of these proposed related readings should have a neighbor that
328 # is also in proposed_related.
329 foreach my $pr ( keys %pr_ids ) {
330 foreach my $neighbor( $self->sequence->neighbors( $pr ) ) {
331 return 0 if exists $pr_ids{$neighbor};
338 # Return a list of the witnesses in which the reading appears.
339 sub reading_witnesses {
340 my( $self, $reading ) = @_;
341 # We need only check either the incoming or the outgoing edges; I have
342 # arbitrarily chosen "incoming".
344 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
345 my $wits = $self->sequence->get_edge_attributes( @$e );
346 @all_witnesses{ keys %$wits } = 1;
348 return keys %all_witnesses;
351 sub related_readings {
352 my( $self, $reading, $colocated ) = @_;
354 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
355 $reading = $reading->id;
357 # print STDERR "Returning related objects\n";
359 # print STDERR "Returning related object names\n";
361 my @related = $self->relations->all_reachable( $reading );
363 my @colo = grep { $self->relations->has_edge_attribute( $reading, $_, 'colocated' ) } @related;
366 return $return_object ? map { $self->reading( $_ ) } @related : @related;
369 =head2 Output method(s)
375 print $graph->as_svg( $recalculate );
377 Returns an SVG string that represents the graph, via as_dot and graphviz.
384 my @cmd = qw/dot -Tsvg/;
386 my $dotfile = File::Temp->new();
388 # $dotfile->unlink_on_destroy(0);
389 binmode $dotfile, ':utf8';
390 print $dotfile $self->as_dot();
391 push( @cmd, $dotfile->filename );
392 run( \@cmd, ">", binary(), \$svg );
393 $svg = decode_utf8( $svg );
399 print $graph->as_dot( $view, $recalculate );
401 Returns a string that is the collation graph expressed in dot
402 (i.e. GraphViz) format. The 'view' argument determines what kind of
404 * 'path': a graph of witness paths through the collation (DEFAULT)
405 * 'relationship': a graph of how collation readings relate to
411 my( $self, $view ) = @_;
412 $view = 'sequence' unless $view;
413 # TODO consider making some of these things configurable
414 my $graph_name = $self->tradition->name;
415 $graph_name =~ s/[^\w\s]//g;
416 $graph_name = join( '_', split( /\s+/, $graph_name ) );
417 my $dot = sprintf( "digraph %s {\n", $graph_name );
418 $dot .= "\tedge [ arrowhead=open ];\n";
419 $dot .= "\tgraph [ rankdir=LR ];\n";
420 $dot .= sprintf( "\tnode [ fontsize=%d, fillcolor=%s, style=%s, shape=%s ];\n",
421 11, "white", "filled", "ellipse" );
423 foreach my $reading ( $self->readings ) {
424 # Need not output nodes without separate labels
425 next if $reading->id eq $reading->text;
426 my $label = $reading->text;
427 $label =~ s/\"/\\\"/g;
428 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
431 # TODO do something sensible for relationships
433 my @edges = $self->paths;
434 foreach my $edge ( @edges ) {
435 my %variables = ( 'color' => '#000000',
436 'fontcolor' => '#000000',
437 'label' => join( ', ', $self->path_display_label( $edge ) ),
439 my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
440 # Account for the rank gap if necessary
441 my $rankgap = $self->reading( $edge->[1] )->rank
442 - $self->reading( $edge->[0] )->rank;
443 $varopts .= ", minlen=$rankgap" if $rankgap > 1;
444 $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n",
445 $edge->[0], $edge->[1], $varopts );
452 my( $self, @edge ) = @_;
453 # If edge is an arrayref, cope.
454 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
458 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
462 sub path_display_label {
463 my( $self, $edge ) = @_;
464 my @wits = $self->path_witnesses( $edge );
465 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
466 if( scalar @wits > $maj ) {
469 return join( ', ', @wits );
476 print $graph->as_graphml( $recalculate )
478 Returns a GraphML representation of the collation graph, with
479 transposition information and position information. Unless
480 $recalculate is passed (and is a true value), the method will return a
481 cached copy of the SVG after the first call to the method.
489 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
490 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
491 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
492 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
494 # Create the document and root node
495 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
496 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
497 $graphml->setDocumentElement( $root );
498 $root->setNamespace( $xsi_ns, 'xsi', 0 );
499 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
501 # Add the data keys for the graph
504 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
505 foreach my $datum ( @graph_attributes ) {
506 $graph_data_keys{$datum} = 'dg'.$gdi++;
507 my $key = $root->addNewChild( $graphml_ns, 'key' );
508 $key->setAttribute( 'attr.name', $datum );
509 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
510 $key->setAttribute( 'for', 'graph' );
511 $key->setAttribute( 'id', $graph_data_keys{$datum} );
514 # Add the data keys for nodes
521 is_start => 'boolean',
523 is_lacuna => 'boolean',
525 foreach my $datum ( keys %node_data ) {
526 $node_data_keys{$datum} = 'dn'.$ndi++;
527 my $key = $root->addNewChild( $graphml_ns, 'key' );
528 $key->setAttribute( 'attr.name', $datum );
529 $key->setAttribute( 'attr.type', $node_data{$datum} );
530 $key->setAttribute( 'for', 'node' );
531 $key->setAttribute( 'id', $node_data_keys{$datum} );
534 # Add the data keys for edges, i.e. witnesses
538 witness => 'string', # ID/label for a path
539 relationship => 'string', # ID/label for a relationship
540 extra => 'boolean', # Path key
541 colocated => 'boolean', # Relationship key
542 non_correctable => 'boolean', # Relationship key
543 non_independent => 'boolean', # Relationship key
545 foreach my $datum ( keys %edge_data ) {
546 $edge_data_keys{$datum} = 'de'.$edi++;
547 my $key = $root->addNewChild( $graphml_ns, 'key' );
548 $key->setAttribute( 'attr.name', $datum );
549 $key->setAttribute( 'attr.type', $edge_data{$datum} );
550 $key->setAttribute( 'for', 'edge' );
551 $key->setAttribute( 'id', $edge_data_keys{$datum} );
554 # Add the collation graphs themselves
555 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
556 $sgraph->setAttribute( 'edgedefault', 'directed' );
557 $sgraph->setAttribute( 'id', $self->tradition->name );
558 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
559 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
560 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
561 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
562 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
564 my $rgraph = $root->addNewChild( $graphml_ns, 'graph' );
565 $rgraph->setAttribute( 'edgedefault', 'undirected' );
566 $rgraph->setAttribute( 'id', 'relationships' );
567 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
568 $rgraph->setAttribute( 'parse.edges', scalar($self->relationships) );
569 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
570 $rgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
571 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
573 # Collation attribute data
574 foreach my $datum ( @graph_attributes ) {
575 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
576 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
581 # Add our readings to the graphs
582 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
583 # Add to the main graph
584 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
585 my $node_xmlid = 'n' . $node_ctr++;
586 $node_hash{ $n->id } = $node_xmlid;
587 $node_el->setAttribute( 'id', $node_xmlid );
588 foreach my $d ( keys %node_data ) {
590 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
593 # Add to the relationships graph
594 my $rnode_el = $rgraph->addNewChild( $graphml_ns, 'node' );
595 $rnode_el->setAttribute( 'id', $node_xmlid );
598 # Add the path edges to the sequence graph
600 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
601 # We add an edge in the graphml for every witness in $e.
602 foreach my $wit ( $self->path_witnesses( $e ) ) {
603 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
604 $node_hash{ $e->[0] },
605 $node_hash{ $e->[1] } );
606 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
607 $edge_el->setAttribute( 'source', $from );
608 $edge_el->setAttribute( 'target', $to );
609 $edge_el->setAttribute( 'id', $id );
611 # It's a witness path, so add the witness
613 my $key = $edge_data_keys{'witness'};
614 # Is this an ante-corr witness?
615 my $aclabel = $self->ac_label;
616 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
617 # Keep the base witness
619 # ...and record that this is an 'extra' reading path
620 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
622 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
626 # Add the relationship edges to the relationships graph
627 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->relationships ) {
628 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
629 $node_hash{ $e->[0] },
630 $node_hash{ $e->[1] } );
631 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
632 $edge_el->setAttribute( 'source', $from );
633 $edge_el->setAttribute( 'target', $to );
634 $edge_el->setAttribute( 'id', $id );
636 my $data = $self->relations->get_edge_attributes( @$e );
637 # It's a relationship, so save the relationship data
638 _add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $data->{type} );
639 _add_graphml_data( $edge_el, $edge_data_keys{'colocated'}, $data->{colocated} );
640 if( exists $data->{non_correctable} ) {
641 _add_graphml_data( $edge_el, $edge_data_keys{'non_correctable'},
642 $data->{non_correctable} );
644 if( exists $data->{non_independent} ) {
645 _add_graphml_data( $edge_el, $edge_data_keys{'non_independent'},
646 $data->{non_independent} );
650 # Save and return the thing
651 my $result = decode_utf8( $graphml->toString(1) );
655 sub _add_graphml_data {
656 my( $el, $key, $value ) = @_;
657 return unless defined $value;
658 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
659 $data_el->setAttribute( 'key', $key );
660 $data_el->appendText( $value );
665 print $graph->as_csv( $recalculate )
667 Returns a CSV alignment table representation of the collation graph, one
668 row per witness (or witness uncorrected.)
674 my $table = $self->make_alignment_table;
675 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
677 # Make the header row
678 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
679 push( @result, decode_utf8( $csv->string ) );
680 # Make the rest of the rows
681 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
682 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
683 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
684 $csv->combine( @row );
685 push( @result, decode_utf8( $csv->string ) );
687 return join( "\n", @result );
690 =item B<make_alignment_table>
692 my $table = $graph->make_alignment_table( $use_refs, \@wits_to_include )
694 Return a reference to an alignment table, in a slightly enhanced CollateX
695 format which looks like this:
697 $table = { alignment => [ { witness => "SIGIL",
698 tokens => [ { t => "READINGTEXT" }, ... ] },
700 tokens => [ { t => "READINGTEXT" }, ... ] },
704 If $use_refs is set to 1, the reading object is returned in the table
705 instead of READINGTEXT; if not, the text of the reading is returned.
706 If $wits_to_include is set to a hashref, only the witnesses whose sigil
707 keys have a true hash value will be included.
711 sub make_alignment_table {
712 my( $self, $noderefs, $include ) = @_;
713 unless( $self->linear ) {
714 warn "Need a linear graph in order to make an alignment table";
717 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
718 my @all_pos = ( 1 .. $self->end->rank - 1 );
719 foreach my $wit ( $self->tradition->witnesses ) {
721 next unless $include->{$wit->sigil};
723 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
724 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
725 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
726 push( @{$table->{'alignment'}},
727 { 'witness' => $wit->sigil, 'tokens' => \@row } );
728 if( $wit->is_layered ) {
729 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
730 $wit->sigil.$self->ac_label, $wit->sigil );
731 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
732 push( @{$table->{'alignment'}},
733 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
739 sub _make_witness_row {
740 my( $path, $positions, $noderefs ) = @_;
742 map { $char_hash{$_} = undef } @$positions;
744 foreach my $rdg ( @$path ) {
745 my $rtext = $rdg->text;
746 $rtext = '#LACUNA#' if $rdg->is_lacuna;
747 print STDERR "rank " . $rdg->rank . "\n" if $debug;
748 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
749 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
752 my @row = map { $char_hash{$_} } @$positions;
753 # Fill in lacuna markers for undef spots in the row
754 my $last_el = shift @row;
755 my @filled_row = ( $last_el );
756 foreach my $el ( @row ) {
757 # If we are using node reference, make the lacuna node appear many times
758 # in the table. If not, use the lacuna tag.
759 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
760 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
762 push( @filled_row, $el );
768 # Tiny utility function to say if a table element is a lacuna
771 return 1 if $el->{'t'} eq '#LACUNA#';
772 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
773 && $el->{'t'}->is_lacuna;
777 # Helper to turn the witnesses along columns rather than rows. Assumes
782 return $result unless scalar @$table;
783 my $nrows = scalar @{$table->[0]};
784 foreach my $idx ( 0 .. $nrows - 1 ) {
785 foreach my $wit ( 0 .. $#{$table} ) {
786 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
794 =head2 Navigation methods
800 my $beginning = $collation->start();
802 Returns the beginning of the collation, a meta-reading with label '#START#'.
806 my $end = $collation->end();
808 Returns the end of the collation, a meta-reading with label '#END#'.
811 =item B<reading_sequence>
813 my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] );
815 Returns the ordered list of readings, starting with $first and ending
816 with $last, along the given witness path. If no path is specified,
817 assume that the path is that of the base text (if any.)
821 # TODO Think about returning some lazy-eval iterator.
823 sub reading_sequence {
824 my( $self, $start, $end, $witness, $backup ) = @_;
826 $witness = $self->baselabel unless $witness;
827 my @readings = ( $start );
830 while( $n && $n->id ne $end->id ) {
831 if( exists( $seen{$n->id} ) ) {
832 warn "Detected loop at " . $n->id;
837 my $next = $self->next_reading( $n, $witness, $backup );
839 warn "Did not find any path for $witness from reading " . $n->id;
842 push( @readings, $next );
845 # Check that the last reading is our end reading.
846 my $last = $readings[$#readings];
847 warn "Last reading found from " . $start->text .
848 " for witness $witness is not the end!"
849 unless $last->id eq $end->id;
854 =item B<next_reading>
856 my $next_reading = $graph->next_reading( $reading, $witpath );
858 Returns the reading that follows the given reading along the given witness
864 # Return the successor via the corresponding path.
866 my $answer = $self->_find_linked_reading( 'next', @_ );
867 return undef unless $answer;
868 return $self->reading( $answer );
871 =item B<prior_reading>
873 my $prior_reading = $graph->prior_reading( $reading, $witpath );
875 Returns the reading that precedes the given reading along the given witness
881 # Return the predecessor via the corresponding path.
883 my $answer = $self->_find_linked_reading( 'prior', @_ );
884 return $self->reading( $answer );
887 sub _find_linked_reading {
888 my( $self, $direction, $node, $path, $alt_path ) = @_;
889 my @linked_paths = $direction eq 'next'
890 ? $self->sequence->edges_from( $node )
891 : $self->sequence->edges_to( $node );
892 return undef unless scalar( @linked_paths );
894 # We have to find the linked path that contains all of the
895 # witnesses supplied in $path.
896 my( @path_wits, @alt_path_wits );
897 @path_wits = sort( $self->witnesses_of_label( $path ) ) if $path;
898 @alt_path_wits = sort( $self->witnesses_of_label( $alt_path ) ) if $alt_path;
901 foreach my $le ( @linked_paths ) {
902 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
905 my @le_wits = $self->path_witnesses( $le );
906 if( _is_within( \@path_wits, \@le_wits ) ) {
907 # This is the right path.
908 return $direction eq 'next' ? $le->[1] : $le->[0];
909 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
913 # Got this far? Return the alternate path if it exists.
914 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
917 # Got this far? Return the base path if it exists.
918 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
921 # Got this far? We have no appropriate path.
922 warn "Could not find $direction node from " . $node->id
923 . " along path $path";
929 my( $set1, $set2 ) = @_;
930 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
931 foreach my $el ( @$set1 ) {
932 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
938 ## INITIALIZATION METHODS - for use by parsers
940 # For use when a collation is constructed from a base text and an apparatus.
941 # We have the sequences of readings and just need to add path edges.
942 # When we are done, clear out the witness path attributes, as they are no
944 # TODO Find a way to replace the witness path attributes with encapsulated functions?
946 sub make_witness_paths {
948 foreach my $wit ( $self->tradition->witnesses ) {
949 # print STDERR "Making path for " . $wit->sigil . "\n";
950 $self->make_witness_path( $wit );
954 sub make_witness_path {
955 my( $self, $wit ) = @_;
956 my @chain = @{$wit->path};
957 my $sig = $wit->sigil;
958 foreach my $idx ( 0 .. $#chain-1 ) {
959 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
961 if( $wit->is_layered ) {
962 @chain = @{$wit->uncorrected_path};
963 foreach my $idx( 0 .. $#chain-1 ) {
964 my $source = $chain[$idx];
965 my $target = $chain[$idx+1];
966 $self->add_path( $source, $target, $sig.$self->ac_label )
967 unless $self->has_path( $source, $target, $sig );
971 $wit->clear_uncorrected_path;
974 sub calculate_ranks {
976 # Walk a version of the graph where every node linked by a relationship
977 # edge is fundamentally the same node, and do a topological ranking on
978 # the nodes in this graph.
979 my $topo_graph = Graph->new();
983 foreach my $r ( $self->readings ) {
984 next if exists $rel_containers{$r->id};
985 my @rels = $r->related_readings( 'colocated' );
987 # Make a relationship container.
989 my $rn = 'rel_container_' . $rel_ctr++;
990 $topo_graph->add_vertex( $rn );
992 $rel_containers{$_->id} = $rn;
995 # Add a new node to mirror the old node.
996 $rel_containers{$r->id} = $r->id;
997 $topo_graph->add_vertex( $r->id );
1002 foreach my $r ( $self->readings ) {
1003 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1004 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1005 $rel_containers{$n} );
1006 $DB::single = 1 unless $tfrom && $tto;
1007 $topo_graph->add_edge( $tfrom, $tto );
1011 # Now do the rankings, starting with the start node.
1012 my $topo_start = $rel_containers{$self->start->id};
1013 my $node_ranks = { $topo_start => 0 };
1014 my @curr_origin = ( $topo_start );
1015 # A little iterative function.
1016 while( @curr_origin ) {
1017 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1019 # Transfer our rankings from the topological graph to the real one.
1020 foreach my $r ( $self->readings ) {
1021 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1022 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1025 die "No rank calculated for node " . $r->id
1026 . " - do you have a cycle in the graph?";
1032 my( $graph, $node_ranks, @current_nodes ) = @_;
1033 # Look at each of the children of @current_nodes. If all the child's
1034 # parents have a rank, assign it the highest rank + 1 and add it to
1035 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1036 # parent gets a rank.
1038 foreach my $c ( @current_nodes ) {
1039 warn "Current reading $c has no rank!"
1040 unless exists $node_ranks->{$c};
1041 # print STDERR "Looking at child of node $c, rank "
1042 # . $node_ranks->{$c} . "\n";
1043 foreach my $child ( $graph->successors( $c ) ) {
1044 next if exists $node_ranks->{$child};
1045 my $highest_rank = -1;
1047 foreach my $parent ( $graph->predecessors( $child ) ) {
1048 if( exists $node_ranks->{$parent} ) {
1049 $highest_rank = $node_ranks->{$parent}
1050 if $highest_rank <= $node_ranks->{$parent};
1057 my $c_rank = $highest_rank + 1;
1058 # print STDERR "Assigning rank $c_rank to node $child \n";
1059 $node_ranks->{$child} = $c_rank;
1060 push( @next_nodes, $child );
1066 # Another method to make up for rough collation methods. If the same reading
1067 # appears multiple times at the same rank, collapse the nodes.
1070 my %unique_rank_rdg;
1071 foreach my $rdg ( $self->readings ) {
1072 next unless $rdg->has_rank;
1073 my $key = $rdg->rank . "||" . $rdg->text;
1074 if( exists $unique_rank_rdg{$key} ) {
1076 # print STDERR "Combining readings at same rank: $key\n";
1077 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1079 $unique_rank_rdg{$key} = $rdg;
1085 ## Utility functions
1087 # Return the string that joins together a list of witnesses for
1088 # display on a single path.
1089 sub witnesses_of_label {
1090 my( $self, $label ) = @_;
1091 my $regex = $self->wit_list_separator;
1092 my @answer = split( /\Q$regex\E/, $label );
1097 __PACKAGE__->meta->make_immutable;
1103 =item * Think about making Relationship objects again