1 package Text::Tradition::Collation;
3 use Encode qw( decode_utf8 );
6 use IPC::Run qw( run binary );
8 use Text::Tradition::Collation::Reading;
15 default => sub { Graph->new() },
24 default => sub { Graph->new( undirected => 1 ) },
26 relationships => 'edges',
32 isa => 'Text::Tradition',
37 isa => 'HashRef[Text::Tradition::Collation::Reading]',
41 _add_reading => 'set',
42 del_reading => 'delete',
43 has_reading => 'exists',
46 default => sub { {} },
49 has 'wit_list_separator' => (
58 default => 'base text',
75 isa => 'Text::Tradition::Collation::Reading',
76 writer => '_set_start',
82 isa => 'Text::Tradition::Collation::Reading',
87 # The collation can be created two ways:
88 # 1. Collate a set of witnesses (with CollateX I guess) and process
89 # the results as in 2.
90 # 2. Read a pre-prepared collation in one of a variety of formats,
91 # and make the graph from that.
93 # The graph itself will (for now) be immutable, and the positions
94 # within the graph will also be immutable. We need to calculate those
95 # positions upon graph construction. The equivalences between graph
96 # nodes will be mutable, entirely determined by the user (or possibly
97 # by some semantic pre-processing provided by the user.) So the
98 # constructor should just make an empty equivalences object. The
99 # constructor will also need to make the witness objects, if we didn't
100 # come through option 1.
104 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
105 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
108 ### Reading construct/destruct functions
111 my( $self, $reading ) = @_;
112 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
113 my %args = %$reading;
114 $reading = Text::Tradition::Collation::Reading->new(
115 'collation' => $self,
118 # First check to see if a reading with this ID exists.
119 if( $self->reading( $reading->id ) ) {
120 warn "Collation already has a reading with id " . $reading->id;
123 $self->_add_reading( $reading->id => $reading );
124 # Once the reading has been added, put it in both graphs.
125 $self->sequence->add_vertex( $reading->id );
126 $self->relations->add_vertex( $reading->id );
130 around del_reading => sub {
135 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
138 # Remove the reading from the graphs.
139 $self->sequence->delete_vertex( $arg );
140 $self->relations->delete_vertex( $arg );
143 $self->$orig( $arg );
146 # merge_readings( $main, $to_be_deleted );
151 # We only need the IDs for adding paths to the graph, not the reading
152 # objects themselves.
153 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
155 # The kept reading should inherit the paths and the relationships
156 # of the deleted reading.
157 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
158 my @vector = ( $kept );
159 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
160 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
161 next if $vector[0] eq $vector[1]; # Don't add a self loop
162 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
163 $self->sequence->add_edge( @vector );
164 my $fwits = $self->sequence->get_edge_attributes( @vector );
165 @wits{keys %$fwits} = values %$fwits;
166 $self->sequence->set_edge_attributes( @vector, \%wits );
168 foreach my $rel ( $self->relations->edges_at( $deleted ) ) {
169 my @vector = ( $kept );
170 push( @vector, $rel->[0] eq $deleted ? $rel->[1] : $rel->[0] );
171 next if $vector[0] eq $vector[1]; # Don't add a self loop
172 # Is there a relationship here already? If so, keep it.
173 # TODO Warn about conflicting relationships
174 next if $self->relations->has_edge( @vector );
175 # If not, adopt the relationship that would be deleted.
176 $self->relations->add_edge( @vector );
177 my $attr = $self->relations->get_edge_attributes( @$rel );
178 $self->relations->set_edge_attributes( @vector, $attr );
181 # Do the deletion deed.
182 if( $combine_char ) {
183 my $kept_obj = $self->reading( $kept );
184 my $new_text = join( $combine_char, $kept_obj->text,
185 $self->reading( $deleted )->text );
186 $kept_obj->alter_text( $new_text );
188 $self->del_reading( $deleted );
192 # Helper function for manipulating the graph.
193 sub _stringify_args {
194 my( $self, $first, $second, $arg ) = @_;
196 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
197 $second = $second->id
198 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
199 return( $first, $second, $arg );
207 # We only need the IDs for adding paths to the graph, not the reading
208 # objects themselves.
209 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
211 # Connect the readings
212 $self->sequence->add_edge( $source, $target );
213 # Note the witness in question
214 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
220 if( ref( $_[0] ) eq 'ARRAY' ) {
227 # We only need the IDs for adding paths to the graph, not the reading
228 # objects themselves.
229 my( $source, $target, $wit ) = $self->_stringify_args( @args );
231 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
232 $self->sequence->delete_edge_attribute( $source, $target, $wit );
234 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
235 $self->sequence->delete_edge( $source, $target );
240 # Extra graph-alike utility
243 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
244 return undef unless $self->sequence->has_edge( $source, $target );
245 return $self->sequence->has_edge_attribute( $source, $target, $wit );
248 ### Relationship logic
250 =head2 add_relationship( $reading1, $reading2, $definition )
252 Adds the specified relationship between the two readings. A relationship
253 is transitive (i.e. undirected), and must have the following attributes
254 specified in the hashref $definition:
258 =item * type - Can be one of spelling, orthographic, grammatical, meaning, lexical, collated, repetition, transposition. All but the last two are only valid relationships between readings that occur at the same point in the text.
260 =item * non_correctable - (Optional) True if the reading would not have been corrected independently.
262 =item * non_independent - (Optional) True if the variant is unlikely to have occurred independently in unrelated witnesses.
264 =item * global - (Optional) A meta-attribute, to set the same relationship between readings with the same text whenever they occur in the same place.
270 # Wouldn't it be lovely if edges could be objects, and all this type checking
271 # and attribute management could be done via Moose?
273 sub add_relationship {
275 my( $source, $target, $options ) = $self->_stringify_args( @_ );
278 if( !defined $options->{'type'} ||
279 $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|lexical|collated|repetition|transposition)$/i ) {
280 my $t = $options->{'type'} ? $options->{'type'} : '';
281 return( undef, "Invalid or missing type " . $options->{'type'} );
283 unless ( $options->{'type'} =~ /^(repetition|transposition)$/ ) {
284 $options->{'colocated'} = 1;
287 # Make sure there is not another relationship between these two
289 if( $self->relations->has_edge( $source, $target ) ) {
290 return ( undef, "Relationship already exists between these readings" );
292 if( !$self->relationship_valid( $source, $target, $options->{'type'} ) ) {
293 return ( undef, 'Relationship creates witness loop' );
296 my @vector = ( $source, $target );
297 $self->relations->add_edge( @vector );
298 $self->relations->set_edge_attributes( @vector, $options );
300 # TODO Handle global relationship setting
302 return( 1, @vector );
305 sub relationship_valid {
306 my( $self, $source, $target, $rel ) = @_;
307 if( $rel eq 'repetition' ) {
309 } elsif ( $rel eq 'transposition' ) {
310 # Check that the two readings do not appear in the same witness.
312 map { $seen_wits{$_} = 1 } $self->reading_witnesses( $source );
313 foreach my $w ( $self->reading_witnesses( $target ) ) {
314 return 0 if $seen_wits{$w};
318 # Check that linking the source and target in a relationship won't lead
319 # to a path loop for any witness. First make a lookup table of all the
320 # readings related to either the source or the target.
321 my @proposed_related = ( $source, $target );
322 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
323 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
325 map { $pr_ids{ $_ } = 1 } @proposed_related;
327 # None of these proposed related readings should have a neighbor that
328 # is also in proposed_related.
329 foreach my $pr ( keys %pr_ids ) {
330 foreach my $neighbor( $self->sequence->neighbors( $pr ) ) {
331 return 0 if exists $pr_ids{$neighbor};
338 # Return a list of the witnesses in which the reading appears.
339 sub reading_witnesses {
340 my( $self, $reading ) = @_;
341 # We need only check either the incoming or the outgoing edges; I have
342 # arbitrarily chosen "incoming". Thus, special-case the start node.
343 if( $reading eq $self->start ) {
344 return map { $_->sigil } $self->tradition->witnesses;
347 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
348 my $wits = $self->sequence->get_edge_attributes( @$e );
349 @all_witnesses{ keys %$wits } = 1;
351 return keys %all_witnesses;
354 sub related_readings {
355 my( $self, $reading, $colocated ) = @_;
357 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
358 $reading = $reading->id;
360 # print STDERR "Returning related objects\n";
362 # print STDERR "Returning related object names\n";
364 my @related = $self->relations->all_reachable( $reading );
366 my @colo = grep { $self->relations->has_edge_attribute( $reading, $_, 'colocated' ) } @related;
369 return $return_object ? map { $self->reading( $_ ) } @related : @related;
372 =head2 Output method(s)
378 print $graph->as_svg( $recalculate );
380 Returns an SVG string that represents the graph, via as_dot and graphviz.
387 my @cmd = qw/dot -Tsvg/;
389 my $dotfile = File::Temp->new();
391 # $dotfile->unlink_on_destroy(0);
392 binmode $dotfile, ':utf8';
393 print $dotfile $self->as_dot();
394 push( @cmd, $dotfile->filename );
395 run( \@cmd, ">", binary(), \$svg );
396 $svg = decode_utf8( $svg );
402 print $graph->as_dot( $view, $recalculate );
404 Returns a string that is the collation graph expressed in dot
405 (i.e. GraphViz) format. The 'view' argument determines what kind of
407 * 'path': a graph of witness paths through the collation (DEFAULT)
408 * 'relationship': a graph of how collation readings relate to
414 my( $self, $view ) = @_;
415 $view = 'sequence' unless $view;
416 # TODO consider making some of these things configurable
417 my $graph_name = $self->tradition->name;
418 $graph_name =~ s/[^\w\s]//g;
419 $graph_name = join( '_', split( /\s+/, $graph_name ) );
420 my $dot = sprintf( "digraph %s {\n", $graph_name );
421 $dot .= "\tedge [ arrowhead=open ];\n";
422 $dot .= "\tgraph [ rankdir=LR,bgcolor=none ];\n";
423 $dot .= sprintf( "\tnode [ fontsize=%d, fillcolor=%s, style=%s, shape=%s ];\n",
424 11, "white", "filled", "ellipse" );
426 foreach my $reading ( $self->readings ) {
427 # Need not output nodes without separate labels
428 next if $reading->id eq $reading->text;
429 my $label = $reading->text;
430 $label =~ s/\"/\\\"/g;
431 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
434 # TODO do something sensible for relationships
436 my @edges = $self->paths;
437 foreach my $edge ( @edges ) {
438 my %variables = ( 'color' => '#000000',
439 'fontcolor' => '#000000',
440 'label' => join( ', ', $self->path_display_label( $edge ) ),
442 my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
443 # Account for the rank gap if necessary
444 my $rankgap = $self->reading( $edge->[1] )->rank
445 - $self->reading( $edge->[0] )->rank;
446 $varopts .= ", minlen=$rankgap" if $rankgap > 1;
447 $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n",
448 $edge->[0], $edge->[1], $varopts );
455 my( $self, @edge ) = @_;
456 # If edge is an arrayref, cope.
457 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
461 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
465 sub path_display_label {
466 my( $self, $edge ) = @_;
467 my @wits = $self->path_witnesses( $edge );
468 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
469 if( scalar @wits > $maj ) {
472 return join( ', ', @wits );
479 print $graph->as_graphml( $recalculate )
481 Returns a GraphML representation of the collation graph, with
482 transposition information and position information. Unless
483 $recalculate is passed (and is a true value), the method will return a
484 cached copy of the SVG after the first call to the method.
492 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
493 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
494 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
495 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
497 # Create the document and root node
498 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
499 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
500 $graphml->setDocumentElement( $root );
501 $root->setNamespace( $xsi_ns, 'xsi', 0 );
502 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
504 # Add the data keys for the graph
507 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
508 foreach my $datum ( @graph_attributes ) {
509 $graph_data_keys{$datum} = 'dg'.$gdi++;
510 my $key = $root->addNewChild( $graphml_ns, 'key' );
511 $key->setAttribute( 'attr.name', $datum );
512 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
513 $key->setAttribute( 'for', 'graph' );
514 $key->setAttribute( 'id', $graph_data_keys{$datum} );
517 # Add the data keys for nodes
524 is_start => 'boolean',
526 is_lacuna => 'boolean',
528 foreach my $datum ( keys %node_data ) {
529 $node_data_keys{$datum} = 'dn'.$ndi++;
530 my $key = $root->addNewChild( $graphml_ns, 'key' );
531 $key->setAttribute( 'attr.name', $datum );
532 $key->setAttribute( 'attr.type', $node_data{$datum} );
533 $key->setAttribute( 'for', 'node' );
534 $key->setAttribute( 'id', $node_data_keys{$datum} );
537 # Add the data keys for edges, i.e. witnesses
541 class => 'string', # Class, deprecated soon
542 witness => 'string', # ID/label for a path
543 relationship => 'string', # ID/label for a relationship
544 extra => 'boolean', # Path key
545 colocated => 'boolean', # Relationship key
546 non_correctable => 'boolean', # Relationship key
547 non_independent => 'boolean', # Relationship key
549 foreach my $datum ( keys %edge_data ) {
550 $edge_data_keys{$datum} = 'de'.$edi++;
551 my $key = $root->addNewChild( $graphml_ns, 'key' );
552 $key->setAttribute( 'attr.name', $datum );
553 $key->setAttribute( 'attr.type', $edge_data{$datum} );
554 $key->setAttribute( 'for', 'edge' );
555 $key->setAttribute( 'id', $edge_data_keys{$datum} );
558 # Add the collation graphs themselves
559 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
560 $sgraph->setAttribute( 'edgedefault', 'directed' );
561 $sgraph->setAttribute( 'id', $self->tradition->name );
562 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
563 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
564 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
565 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
566 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
569 if( scalar $self->relationships ) {
570 my $rgraph = $root->addNewChild( $graphml_ns, 'graph' );
571 $rgraph->setAttribute( 'edgedefault', 'undirected' );
572 $rgraph->setAttribute( 'id', 'relationships' );
573 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
574 $rgraph->setAttribute( 'parse.edges', scalar($self->relationships) );
575 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
576 $rgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
577 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
580 # Collation attribute data
581 foreach my $datum ( @graph_attributes ) {
582 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
583 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
588 # Add our readings to the graphs
589 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
590 # Add to the main graph
591 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
592 my $node_xmlid = 'n' . $node_ctr++;
593 $node_hash{ $n->id } = $node_xmlid;
594 $node_el->setAttribute( 'id', $node_xmlid );
595 foreach my $d ( keys %node_data ) {
597 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
600 # Add to the relationships graph
602 my $rnode_el = $rgraph->addNewChild( $graphml_ns, 'node' );
603 $rnode_el->setAttribute( 'id', $node_xmlid );
607 # Add the path edges to the sequence graph
609 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
610 # We add an edge in the graphml for every witness in $e.
611 foreach my $wit ( $self->path_witnesses( $e ) ) {
612 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
613 $node_hash{ $e->[0] },
614 $node_hash{ $e->[1] } );
615 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
616 $edge_el->setAttribute( 'source', $from );
617 $edge_el->setAttribute( 'target', $to );
618 $edge_el->setAttribute( 'id', $id );
620 # It's a witness path, so add the witness
622 my $key = $edge_data_keys{'witness'};
623 # Is this an ante-corr witness?
624 my $aclabel = $self->ac_label;
625 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
626 # Keep the base witness
628 # ...and record that this is an 'extra' reading path
629 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
631 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
632 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
636 # Add the relationship edges to the relationships graph
638 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->relationships ) {
639 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
640 $node_hash{ $e->[0] },
641 $node_hash{ $e->[1] } );
642 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
643 $edge_el->setAttribute( 'source', $from );
644 $edge_el->setAttribute( 'target', $to );
645 $edge_el->setAttribute( 'id', $id );
647 my $data = $self->relations->get_edge_attributes( @$e );
648 # It's a relationship, so save the relationship data
649 _add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $data->{type} );
650 _add_graphml_data( $edge_el, $edge_data_keys{'colocated'}, $data->{colocated} );
651 if( exists $data->{non_correctable} ) {
652 _add_graphml_data( $edge_el, $edge_data_keys{'non_correctable'},
653 $data->{non_correctable} );
655 if( exists $data->{non_independent} ) {
656 _add_graphml_data( $edge_el, $edge_data_keys{'non_independent'},
657 $data->{non_independent} );
659 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'relationship' );
663 # Save and return the thing
664 my $result = decode_utf8( $graphml->toString(1) );
668 sub _add_graphml_data {
669 my( $el, $key, $value ) = @_;
670 return unless defined $value;
671 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
672 $data_el->setAttribute( 'key', $key );
673 $data_el->appendText( $value );
678 print $graph->as_csv( $recalculate )
680 Returns a CSV alignment table representation of the collation graph, one
681 row per witness (or witness uncorrected.)
687 my $table = $self->make_alignment_table;
688 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
690 # Make the header row
691 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
692 push( @result, decode_utf8( $csv->string ) );
693 # Make the rest of the rows
694 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
695 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
696 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
697 $csv->combine( @row );
698 push( @result, decode_utf8( $csv->string ) );
700 return join( "\n", @result );
703 =item B<make_alignment_table>
705 my $table = $graph->make_alignment_table( $use_refs, \@wits_to_include )
707 Return a reference to an alignment table, in a slightly enhanced CollateX
708 format which looks like this:
710 $table = { alignment => [ { witness => "SIGIL",
711 tokens => [ { t => "READINGTEXT" }, ... ] },
713 tokens => [ { t => "READINGTEXT" }, ... ] },
717 If $use_refs is set to 1, the reading object is returned in the table
718 instead of READINGTEXT; if not, the text of the reading is returned.
719 If $wits_to_include is set to a hashref, only the witnesses whose sigil
720 keys have a true hash value will be included.
724 sub make_alignment_table {
725 my( $self, $noderefs, $include ) = @_;
726 unless( $self->linear ) {
727 warn "Need a linear graph in order to make an alignment table";
730 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
731 my @all_pos = ( 1 .. $self->end->rank - 1 );
732 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
734 next unless $include->{$wit->sigil};
736 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
737 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
738 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
739 push( @{$table->{'alignment'}},
740 { 'witness' => $wit->sigil, 'tokens' => \@row } );
741 if( $wit->is_layered ) {
742 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
743 $wit->sigil.$self->ac_label, $wit->sigil );
744 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
745 push( @{$table->{'alignment'}},
746 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
752 sub _make_witness_row {
753 my( $path, $positions, $noderefs ) = @_;
755 map { $char_hash{$_} = undef } @$positions;
757 foreach my $rdg ( @$path ) {
758 my $rtext = $rdg->text;
759 $rtext = '#LACUNA#' if $rdg->is_lacuna;
760 print STDERR "rank " . $rdg->rank . "\n" if $debug;
761 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
762 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
765 my @row = map { $char_hash{$_} } @$positions;
766 # Fill in lacuna markers for undef spots in the row
767 my $last_el = shift @row;
768 my @filled_row = ( $last_el );
769 foreach my $el ( @row ) {
770 # If we are using node reference, make the lacuna node appear many times
771 # in the table. If not, use the lacuna tag.
772 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
773 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
775 push( @filled_row, $el );
781 # Tiny utility function to say if a table element is a lacuna
784 return 1 if $el->{'t'} eq '#LACUNA#';
785 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
786 && $el->{'t'}->is_lacuna;
790 # Helper to turn the witnesses along columns rather than rows. Assumes
795 return $result unless scalar @$table;
796 my $nrows = scalar @{$table->[0]};
797 foreach my $idx ( 0 .. $nrows - 1 ) {
798 foreach my $wit ( 0 .. $#{$table} ) {
799 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
807 =head2 Navigation methods
813 my $beginning = $collation->start();
815 Returns the beginning of the collation, a meta-reading with label '#START#'.
819 my $end = $collation->end();
821 Returns the end of the collation, a meta-reading with label '#END#'.
824 =item B<reading_sequence>
826 my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] );
828 Returns the ordered list of readings, starting with $first and ending
829 with $last, along the given witness path. If no path is specified,
830 assume that the path is that of the base text (if any.)
834 # TODO Think about returning some lazy-eval iterator.
836 sub reading_sequence {
837 my( $self, $start, $end, $witness, $backup ) = @_;
839 $witness = $self->baselabel unless $witness;
840 my @readings = ( $start );
843 while( $n && $n->id ne $end->id ) {
844 if( exists( $seen{$n->id} ) ) {
845 warn "Detected loop at " . $n->id;
850 my $next = $self->next_reading( $n, $witness, $backup );
852 warn "Did not find any path for $witness from reading " . $n->id;
855 push( @readings, $next );
858 # Check that the last reading is our end reading.
859 my $last = $readings[$#readings];
860 warn "Last reading found from " . $start->text .
861 " for witness $witness is not the end!"
862 unless $last->id eq $end->id;
867 =item B<next_reading>
869 my $next_reading = $graph->next_reading( $reading, $witpath );
871 Returns the reading that follows the given reading along the given witness
877 # Return the successor via the corresponding path.
879 my $answer = $self->_find_linked_reading( 'next', @_ );
880 return undef unless $answer;
881 return $self->reading( $answer );
884 =item B<prior_reading>
886 my $prior_reading = $graph->prior_reading( $reading, $witpath );
888 Returns the reading that precedes the given reading along the given witness
894 # Return the predecessor via the corresponding path.
896 my $answer = $self->_find_linked_reading( 'prior', @_ );
897 return $self->reading( $answer );
900 sub _find_linked_reading {
901 my( $self, $direction, $node, $path, $alt_path ) = @_;
902 my @linked_paths = $direction eq 'next'
903 ? $self->sequence->edges_from( $node )
904 : $self->sequence->edges_to( $node );
905 return undef unless scalar( @linked_paths );
907 # We have to find the linked path that contains all of the
908 # witnesses supplied in $path.
909 my( @path_wits, @alt_path_wits );
910 @path_wits = sort( $self->witnesses_of_label( $path ) ) if $path;
911 @alt_path_wits = sort( $self->witnesses_of_label( $alt_path ) ) if $alt_path;
914 foreach my $le ( @linked_paths ) {
915 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
918 my @le_wits = $self->path_witnesses( $le );
919 if( _is_within( \@path_wits, \@le_wits ) ) {
920 # This is the right path.
921 return $direction eq 'next' ? $le->[1] : $le->[0];
922 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
926 # Got this far? Return the alternate path if it exists.
927 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
930 # Got this far? Return the base path if it exists.
931 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
934 # Got this far? We have no appropriate path.
935 warn "Could not find $direction node from " . $node->id
936 . " along path $path";
942 my( $set1, $set2 ) = @_;
943 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
944 foreach my $el ( @$set1 ) {
945 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
951 ## INITIALIZATION METHODS - for use by parsers
953 # For use when a collation is constructed from a base text and an apparatus.
954 # We have the sequences of readings and just need to add path edges.
955 # When we are done, clear out the witness path attributes, as they are no
957 # TODO Find a way to replace the witness path attributes with encapsulated functions?
959 sub make_witness_paths {
961 foreach my $wit ( $self->tradition->witnesses ) {
962 # print STDERR "Making path for " . $wit->sigil . "\n";
963 $self->make_witness_path( $wit );
967 sub make_witness_path {
968 my( $self, $wit ) = @_;
969 my @chain = @{$wit->path};
970 my $sig = $wit->sigil;
971 foreach my $idx ( 0 .. $#chain-1 ) {
972 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
974 if( $wit->is_layered ) {
975 @chain = @{$wit->uncorrected_path};
976 foreach my $idx( 0 .. $#chain-1 ) {
977 my $source = $chain[$idx];
978 my $target = $chain[$idx+1];
979 $self->add_path( $source, $target, $sig.$self->ac_label )
980 unless $self->has_path( $source, $target, $sig );
984 $wit->clear_uncorrected_path;
987 sub calculate_ranks {
989 # Walk a version of the graph where every node linked by a relationship
990 # edge is fundamentally the same node, and do a topological ranking on
991 # the nodes in this graph.
992 my $topo_graph = Graph->new();
996 foreach my $r ( $self->readings ) {
997 next if exists $rel_containers{$r->id};
998 my @rels = $r->related_readings( 'colocated' );
1000 # Make a relationship container.
1002 my $rn = 'rel_container_' . $rel_ctr++;
1003 $topo_graph->add_vertex( $rn );
1005 $rel_containers{$_->id} = $rn;
1008 # Add a new node to mirror the old node.
1009 $rel_containers{$r->id} = $r->id;
1010 $topo_graph->add_vertex( $r->id );
1015 foreach my $r ( $self->readings ) {
1016 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1017 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1018 $rel_containers{$n} );
1019 $DB::single = 1 unless $tfrom && $tto;
1020 $topo_graph->add_edge( $tfrom, $tto );
1024 # Now do the rankings, starting with the start node.
1025 my $topo_start = $rel_containers{$self->start->id};
1026 my $node_ranks = { $topo_start => 0 };
1027 my @curr_origin = ( $topo_start );
1028 # A little iterative function.
1029 while( @curr_origin ) {
1030 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
1032 # Transfer our rankings from the topological graph to the real one.
1033 foreach my $r ( $self->readings ) {
1034 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1035 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
1038 die "No rank calculated for node " . $r->id
1039 . " - do you have a cycle in the graph?";
1045 my( $graph, $node_ranks, @current_nodes ) = @_;
1046 # Look at each of the children of @current_nodes. If all the child's
1047 # parents have a rank, assign it the highest rank + 1 and add it to
1048 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1049 # parent gets a rank.
1051 foreach my $c ( @current_nodes ) {
1052 warn "Current reading $c has no rank!"
1053 unless exists $node_ranks->{$c};
1054 # print STDERR "Looking at child of node $c, rank "
1055 # . $node_ranks->{$c} . "\n";
1056 foreach my $child ( $graph->successors( $c ) ) {
1057 next if exists $node_ranks->{$child};
1058 my $highest_rank = -1;
1060 foreach my $parent ( $graph->predecessors( $child ) ) {
1061 if( exists $node_ranks->{$parent} ) {
1062 $highest_rank = $node_ranks->{$parent}
1063 if $highest_rank <= $node_ranks->{$parent};
1070 my $c_rank = $highest_rank + 1;
1071 # print STDERR "Assigning rank $c_rank to node $child \n";
1072 $node_ranks->{$child} = $c_rank;
1073 push( @next_nodes, $child );
1079 # Another method to make up for rough collation methods. If the same reading
1080 # appears multiple times at the same rank, collapse the nodes.
1083 my %unique_rank_rdg;
1084 foreach my $rdg ( $self->readings ) {
1085 next unless $rdg->has_rank;
1086 my $key = $rdg->rank . "||" . $rdg->text;
1087 if( exists $unique_rank_rdg{$key} ) {
1089 # print STDERR "Combining readings at same rank: $key\n";
1090 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1092 $unique_rank_rdg{$key} = $rdg;
1098 ## Utility functions
1100 # Return the string that joins together a list of witnesses for
1101 # display on a single path.
1102 sub witnesses_of_label {
1103 my( $self, $label ) = @_;
1104 my $regex = $self->wit_list_separator;
1105 my @answer = split( /\Q$regex\E/, $label );
1110 __PACKAGE__->meta->make_immutable;
1116 =item * Think about making Relationship objects again