restore total count; get rid of benchmarking
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
df6d9812 10use XML::LibXML;
dd3b58b0 11use Moose;
12
3a2ebbf4 13has 'sequence' => (
d047cd52 14 is => 'ro',
3a2ebbf4 15 isa => 'Graph',
16 default => sub { Graph->new() },
d047cd52 17 handles => {
3a2ebbf4 18 paths => 'edges',
d047cd52 19 },
d047cd52 20 );
3a2ebbf4 21
22has 'relations' => (
23 is => 'ro',
22222af9 24 isa => 'Text::Tradition::Collation::RelationshipStore',
25 handles => {
26 relationships => 'relationships',
27 related_readings => 'related_readings',
28 },
29 writer => '_set_relations',
3a2ebbf4 30 );
dd3b58b0 31
3a2ebbf4 32has 'tradition' => (
33 is => 'ro',
d047cd52 34 isa => 'Text::Tradition',
8d9a1cd8 35 weak_ref => 1,
d047cd52 36 );
dd3b58b0 37
3a2ebbf4 38has 'readings' => (
39 isa => 'HashRef[Text::Tradition::Collation::Reading]',
40 traits => ['Hash'],
41 handles => {
42 reading => 'get',
43 _add_reading => 'set',
44 del_reading => 'delete',
45 has_reading => 'exists',
46 readings => 'values',
47 },
48 default => sub { {} },
49 );
910a0a6d 50
4a8828f0 51has 'wit_list_separator' => (
7854e12e 52 is => 'rw',
53 isa => 'Str',
54 default => ', ',
55 );
56
57has 'baselabel' => (
58 is => 'rw',
59 isa => 'Str',
60 default => 'base text',
61 );
4a8828f0 62
15d2d3df 63has 'linear' => (
64 is => 'rw',
65 isa => 'Bool',
66 default => 1,
67 );
c84275ff 68
ef9d481f 69has 'ac_label' => (
70 is => 'rw',
71 isa => 'Str',
72 default => ' (a.c.)',
73 );
3a2ebbf4 74
75has 'start' => (
76 is => 'ro',
77 isa => 'Text::Tradition::Collation::Reading',
78 writer => '_set_start',
79 weak_ref => 1,
80 );
81
82has 'end' => (
83 is => 'ro',
84 isa => 'Text::Tradition::Collation::Reading',
85 writer => '_set_end',
86 weak_ref => 1,
87 );
1f563ac3 88
4e5a7b2c 89=head1 NAME
90
91Text::Tradition::Collation - a software model for a text collation
92
93=head1 SYNOPSIS
94
95 use Text::Tradition;
96 my $t = Text::Tradition->new(
97 'name' => 'this is a text',
98 'input' => 'TEI',
99 'file' => '/path/to/tei_parallel_seg_file.xml' );
100
101 my $c = $t->collation;
102 my @readings = $c->readings;
103 my @paths = $c->paths;
104 my @relationships = $c->relationships;
105
106 my $svg_variant_graph = $t->collation->as_svg();
107
108=head1 DESCRIPTION
109
110Text::Tradition is a library for representation and analysis of collated
111texts, particularly medieval ones. The Collation is the central feature of
112a Tradition, where the text, its sequence of readings, and its relationships
113between readings are actually kept.
114
115=head1 CONSTRUCTOR
116
117=head2 new
118
119The constructor. Takes a hash or hashref of the following arguments:
120
121=over
122
123=item * tradition - The Text::Tradition object to which the collation
124belongs. Required.
125
126=item * linear - Whether the collation should be linear; that is, whether
127transposed readings should be treated as two linked readings rather than one,
128and therefore whether the collation graph is acyclic. Defaults to true.
129
4e5a7b2c 130=item * baselabel - The default label for the path taken by a base text
131(if any). Defaults to 'base text'.
132
133=item * wit_list_separator - The string to join a list of witnesses for
134purposes of making labels in display graphs. Defaults to ', '.
135
136=item * ac_label - The extra label to tack onto a witness sigil when
137representing another layer of path for the given witness - that is, when
138a text has more than one possible reading due to scribal corrections or
139the like. Defaults to ' (a.c.)'.
140
141=back
142
143=head1 ACCESSORS
144
145=head2 tradition
146
147=head2 linear
148
4e5a7b2c 149=head2 wit_list_separator
150
151=head2 baselabel
152
153=head2 ac_label
154
155Simple accessors for collation attributes.
156
157=head2 start
158
159The meta-reading at the start of every witness path.
160
161=head2 end
162
163The meta-reading at the end of every witness path.
164
165=head2 readings
166
167Returns all Reading objects in the graph.
168
169=head2 reading( $id )
170
171Returns the Reading object corresponding to the given ID.
172
173=head2 add_reading( $reading_args )
174
175Adds a new reading object to the collation.
176See L<Text::Tradition::Collation::Reading> for the available arguments.
177
178=head2 del_reading( $object_or_id )
179
180Removes the given reading from the collation, implicitly removing its
181paths and relationships.
182
183=head2 merge_readings( $main, $second )
184
185Merges the $second reading into the $main one.
186The arguments may be either readings or reading IDs.
187
188=head2 has_reading( $id )
189
190Predicate to see whether a given reading ID is in the graph.
191
192=head2 reading_witnesses( $object_or_id )
193
194Returns a list of sigils whose witnesses contain the reading.
195
196=head2 paths
197
198Returns all reading paths within the document - that is, all edges in the
199collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
200
201=head2 add_path( $source, $target, $sigil )
202
203Links the given readings in the collation in sequence, under the given witness
204sigil. The readings may be specified by object or ID.
205
206=head2 del_path( $source, $target, $sigil )
207
208Links the given readings in the collation in sequence, under the given witness
209sigil. The readings may be specified by object or ID.
210
211=head2 has_path( $source, $target );
212
213Returns true if the two readings are linked in sequence in any witness.
214The readings may be specified by object or ID.
215
216=head2 relationships
217
218Returns all Relationship objects in the collation.
219
220=head2 add_relationship( $reading, $other_reading, $options )
221
222Adds a new relationship of the type given in $options between the two readings,
223which may be specified by object or ID. Returns a value of ( $status, @vectors)
224where $status is true on success, and @vectors is a list of relationship edges
225that were ultimately added.
226See L<Text::Tradition::Collation::Relationship> for the available options.
227
228=cut
dd3b58b0 229
d047cd52 230sub BUILD {
3a2ebbf4 231 my $self = shift;
22222af9 232 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 233 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
234 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 235}
784877d9 236
3a2ebbf4 237### Reading construct/destruct functions
238
239sub add_reading {
240 my( $self, $reading ) = @_;
241 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
242 my %args = %$reading;
243 $reading = Text::Tradition::Collation::Reading->new(
244 'collation' => $self,
245 %args );
246 }
247 # First check to see if a reading with this ID exists.
248 if( $self->reading( $reading->id ) ) {
249 warn "Collation already has a reading with id " . $reading->id;
250 return undef;
251 }
252 $self->_add_reading( $reading->id => $reading );
253 # Once the reading has been added, put it in both graphs.
254 $self->sequence->add_vertex( $reading->id );
22222af9 255 $self->relations->add_reading( $reading->id );
3a2ebbf4 256 return $reading;
eca16057 257};
258
3a2ebbf4 259around del_reading => sub {
260 my $orig = shift;
261 my $self = shift;
262 my $arg = shift;
263
264 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
265 $arg = $arg->id;
266 }
3a2ebbf4 267 # Remove the reading from the graphs.
268 $self->sequence->delete_vertex( $arg );
22222af9 269 $self->relations->delete_reading( $arg );
3a2ebbf4 270
271 # Carry on.
272 $self->$orig( $arg );
273};
7854e12e 274
3a2ebbf4 275# merge_readings( $main, $to_be_deleted );
7854e12e 276
3a2ebbf4 277sub merge_readings {
278 my $self = shift;
279
280 # We only need the IDs for adding paths to the graph, not the reading
281 # objects themselves.
49d4f2ac 282 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 283
284 # The kept reading should inherit the paths and the relationships
285 # of the deleted reading.
286 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
287 my @vector = ( $kept );
288 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
289 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 290 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 291 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
292 $self->sequence->add_edge( @vector );
293 my $fwits = $self->sequence->get_edge_attributes( @vector );
294 @wits{keys %$fwits} = values %$fwits;
295 $self->sequence->set_edge_attributes( @vector, \%wits );
296 }
22222af9 297 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 298
299 # Do the deletion deed.
49d4f2ac 300 if( $combine_char ) {
301 my $kept_obj = $self->reading( $kept );
302 my $new_text = join( $combine_char, $kept_obj->text,
303 $self->reading( $deleted )->text );
304 $kept_obj->alter_text( $new_text );
305 }
3a2ebbf4 306 $self->del_reading( $deleted );
307}
7854e12e 308
3265b0ce 309
3a2ebbf4 310# Helper function for manipulating the graph.
311sub _stringify_args {
312 my( $self, $first, $second, $arg ) = @_;
313 $first = $first->id
314 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
315 $second = $second->id
316 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
317 return( $first, $second, $arg );
318}
df6d9812 319
4e5a7b2c 320# Helper function for manipulating the graph.
321sub _objectify_args {
322 my( $self, $first, $second, $arg ) = @_;
323 $first = $self->reading( $first )
324 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
325 $second = $self->reading( $second )
326 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
327 return( $first, $second, $arg );
328}
3a2ebbf4 329### Path logic
330
331sub add_path {
332 my $self = shift;
333
334 # We only need the IDs for adding paths to the graph, not the reading
335 # objects themselves.
336 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
337
338 # Connect the readings
339 $self->sequence->add_edge( $source, $target );
340 # Note the witness in question
341 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 342};
343
3a2ebbf4 344sub del_path {
345 my $self = shift;
49d4f2ac 346 my @args;
347 if( ref( $_[0] ) eq 'ARRAY' ) {
348 my $e = shift @_;
349 @args = ( @$e, @_ );
350 } else {
351 @args = @_;
352 }
3a2ebbf4 353
354 # We only need the IDs for adding paths to the graph, not the reading
355 # objects themselves.
49d4f2ac 356 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 357
358 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 359 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 360 }
361 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
362 $self->sequence->delete_edge( $source, $target );
363 }
784877d9 364}
365
3a2ebbf4 366
15d2d3df 367# Extra graph-alike utility
368sub has_path {
3a2ebbf4 369 my $self = shift;
370 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
371 return undef unless $self->sequence->has_edge( $source, $target );
372 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 373}
374
4e5a7b2c 375=head2 clear_witness( @sigil_list )
3a2ebbf4 376
4e5a7b2c 377Clear the given witnesses out of the collation entirely, removing references
378to them in paths, and removing readings that belong only to them. Should only
379be called via $tradition->del_witness.
3a2ebbf4 380
381=cut
382
4e5a7b2c 383sub clear_witness {
384 my( $self, @sigils ) = @_;
385
386 # Clear the witness(es) out of the paths
387 foreach my $e ( $self->paths ) {
388 foreach my $sig ( @sigils ) {
389 $self->del_path( $e, $sig );
390 }
391 }
392
393 # Clear out the newly unused readings
394 foreach my $r ( $self->readings ) {
395 unless( $self->reading_witnesses( $r ) ) {
396 $self->del_reading( $r );
397 }
398 }
399}
3a2ebbf4 400
401sub add_relationship {
402 my $self = shift;
22222af9 403 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
64ae6270 404 my( $ret, @vectors ) = $self->relations->add_relationship( $source,
405 $self->reading( $source ), $target, $self->reading( $target ), $opts );
406 # Force a full rank recalculation every time. Yuck.
407 $self->calculate_ranks() if $ret && $self->end->has_rank;
408 return( $ret, @vectors );
22222af9 409}
ef9d481f 410
22222af9 411=head2 reading_witnesses( $reading )
910a0a6d 412
22222af9 413Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 414
22222af9 415=cut
1d310495 416
1d310495 417sub reading_witnesses {
418 my( $self, $reading ) = @_;
419 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 420 # arbitrarily chosen "incoming". Thus, special-case the start node.
421 if( $reading eq $self->start ) {
422 return map { $_->sigil } $self->tradition->witnesses;
423 }
1d310495 424 my %all_witnesses;
425 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
426 my $wits = $self->sequence->get_edge_attributes( @$e );
427 @all_witnesses{ keys %$wits } = 1;
428 }
429 return keys %all_witnesses;
910a0a6d 430}
431
4e5a7b2c 432=head1 OUTPUT METHODS
8e1394aa 433
4e5a7b2c 434=head2 as_svg
8e1394aa 435
0068967c 436Returns an SVG string that represents the graph, via as_dot and graphviz.
8e1394aa 437
438=cut
439
440sub as_svg {
3a2ebbf4 441 my( $self ) = @_;
442
8e1394aa 443 my @cmd = qw/dot -Tsvg/;
444 my( $svg, $err );
910a0a6d 445 my $dotfile = File::Temp->new();
d9e873d0 446 ## TODO REMOVE
eca16057 447 # $dotfile->unlink_on_destroy(0);
910a0a6d 448 binmode $dotfile, ':utf8';
449 print $dotfile $self->as_dot();
450 push( @cmd, $dotfile->filename );
451 run( \@cmd, ">", binary(), \$svg );
452 $svg = decode_utf8( $svg );
8e1394aa 453 return $svg;
454}
455
4e5a7b2c 456=head2 svg_subgraph( $from, $to )
b22576c6 457
458Returns an SVG string that represents the portion of the graph given by the
459specified range. The $from and $to variables refer to ranks within the graph.
460
461=cut
462
463sub svg_subgraph {
464 my( $self, $from, $to ) = @_;
465
466 my $dot = $self->as_dot( $from, $to );
467 unless( $dot ) {
468 warn "Could not output a graph with range $from - $to";
469 return;
470 }
471
472 my @cmd = qw/dot -Tsvg/;
473 my( $svg, $err );
474 my $dotfile = File::Temp->new();
475 ## TODO REMOVE
476 # $dotfile->unlink_on_destroy(0);
477 binmode $dotfile, ':utf8';
478 print $dotfile $dot;
479 push( @cmd, $dotfile->filename );
480 run( \@cmd, ">", binary(), \$svg );
481 $svg = decode_utf8( $svg );
482 return $svg;
483}
484
485
4e5a7b2c 486=head2 as_dot( $from, $to )
df6d9812 487
488Returns a string that is the collation graph expressed in dot
4e5a7b2c 489(i.e. GraphViz) format. If $from or $to is passed, as_dot creates
490a subgraph rather than the entire graph.
df6d9812 491
492=cut
493
494sub as_dot {
b22576c6 495 my( $self, $startrank, $endrank ) = @_;
496
497 # Check the arguments
498 if( $startrank ) {
499 return if $endrank && $startrank > $endrank;
500 return if $startrank > $self->end->rank;
501 }
502 if( defined $endrank ) {
503 return if $endrank < 0;
f1b3b33a 504 $endrank = undef if $endrank == $self->end->rank;
b22576c6 505 }
506
df6d9812 507 # TODO consider making some of these things configurable
67da8d6c 508 my $graph_name = $self->tradition->name;
509 $graph_name =~ s/[^\w\s]//g;
510 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 511
512 my %graph_attrs = (
513 'rankdir' => 'LR',
514 'bgcolor' => 'none',
515 );
516 my %node_attrs = (
517 'fontsize' => 11,
518 'fillcolor' => 'white',
519 'style' => 'filled',
520 'shape' => 'ellipse'
521 );
522 my %edge_attrs = (
523 'arrowhead' => 'open',
524 'color' => '#000000',
525 'fontcolor' => '#000000',
526 );
527
67da8d6c 528 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 529 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
530 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 531
b22576c6 532 # Output substitute start/end readings if necessary
533 if( $startrank ) {
534 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
535 }
536 if( $endrank ) {
537 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
538 }
539 my %used; # Keep track of the readings that actually appear in the graph
df6d9812 540 foreach my $reading ( $self->readings ) {
b22576c6 541 # Only output readings within our rank range.
542 next if $startrank && $reading->rank < $startrank;
543 next if $endrank && $reading->rank > $endrank;
544 $used{$reading->id} = 1;
910a0a6d 545 # Need not output nodes without separate labels
3a2ebbf4 546 next if $reading->id eq $reading->text;
30f0df34 547 my $label = $reading->text;
8f9cab7b 548 $label =~ s/\"/\\\"/g;
549 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
df6d9812 550 }
3a2ebbf4 551
b22576c6 552 # Add the real edges
553 my @edges = $self->paths;
3bdec618 554 my( %substart, %subend );
b22576c6 555 foreach my $edge ( @edges ) {
556 # Do we need to output this edge?
557 if( $used{$edge->[0]} && $used{$edge->[1]} ) {;
f13b5582 558 my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
559 my $variables = { %edge_attrs, 'label' => $label };
b22576c6 560 # Account for the rank gap if necessary
b0b4421a 561 if( $self->reading( $edge->[1] )->has_rank
562 && $self->reading( $edge->[0] )->has_rank
563 && $self->reading( $edge->[1] )->rank
564 - $self->reading( $edge->[0] )->rank > 1 ) {
565 $variables->{'minlen'} = $self->reading( $edge->[1] )->rank
b22576c6 566 - $self->reading( $edge->[0] )->rank;
b0b4421a 567 }
f13b5582 568 my $varopts = _dot_attr_string( $variables );
569 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
570 $edge->[0], $edge->[1], $varopts );
3bdec618 571 } elsif( $used{$edge->[0]} ) {
572 $subend{$edge->[0]} = 1;
573 } elsif( $used{$edge->[1]} ) {
574 $substart{$edge->[1]} = 1;
b22576c6 575 }
df6d9812 576 }
3bdec618 577 # Add substitute start and end edges if necessary
578 foreach my $node ( keys %substart ) {
579 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 580 my $variables = { %edge_attrs, 'label' => $witstr };
581 my $varopts = _dot_attr_string( $variables );
582 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 583 }
584 foreach my $node ( keys %subend ) {
585 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 586 my $variables = { %edge_attrs, 'label' => $witstr };
587 my $varopts = _dot_attr_string( $variables );
588 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 589 }
590
df6d9812 591 $dot .= "}\n";
592 return $dot;
593}
594
f13b5582 595sub _dot_attr_string {
596 my( $hash ) = @_;
597 my @attrs;
598 foreach my $k ( sort keys %$hash ) {
599 my $v = $hash->{$k};
600 push( @attrs, $k.'="'.$v.'"' );
601 }
602 return( '[ ' . join( ', ', @attrs ) . ' ]' );
603}
604
3a2ebbf4 605sub path_witnesses {
606 my( $self, @edge ) = @_;
607 # If edge is an arrayref, cope.
608 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
609 my $e = shift @edge;
610 @edge = @$e;
611 }
612 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
613 return sort @wits;
614}
615
8f9cab7b 616sub path_display_label {
3bdec618 617 my( $self, @wits ) = @_;
8f9cab7b 618 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
619 if( scalar @wits > $maj ) {
f13b5582 620 # TODO break out a.c. wits
8f9cab7b 621 return 'majority';
622 } else {
623 return join( ', ', @wits );
624 }
625}
626
627
4e5a7b2c 628=head2 as_graphml
8e1394aa 629
4e5a7b2c 630Returns a GraphML representation of the collation. The GraphML will contain
631two graphs. The first expresses the attributes of the readings and the witness
632paths that link them; the second expresses the relationships that link the
633readings. This is the native transfer format for a tradition.
8e1394aa 634
56eefa04 635=begin testing
636
637use Text::Tradition;
638
639my $READINGS = 311;
640my $PATHS = 361;
641
642my $datafile = 't/data/florilegium_tei_ps.xml';
643my $tradition = Text::Tradition->new( 'input' => 'TEI',
644 'name' => 'test0',
645 'file' => $datafile,
646 'linear' => 1 );
647
648ok( $tradition, "Got a tradition object" );
649is( scalar $tradition->witnesses, 13, "Found all witnesses" );
650ok( $tradition->collation, "Tradition has a collation" );
651
652my $c = $tradition->collation;
653is( scalar $c->readings, $READINGS, "Collation has all readings" );
654is( scalar $c->paths, $PATHS, "Collation has all paths" );
655is( scalar $c->relationships, 0, "Collation has all relationships" );
656
657# Add a few relationships
658$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
659$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
660$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
661
662# Now write it to GraphML and parse it again.
663
664my $graphml = $c->as_graphml;
665my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
666is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
667is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
668is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
669
670=end testing
671
8e1394aa 672=cut
673
674sub as_graphml {
3a2ebbf4 675 my( $self ) = @_;
8e1394aa 676
677 # Some namespaces
678 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
679 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
680 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 681 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 682
683 # Create the document and root node
684 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
685 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
686 $graphml->setDocumentElement( $root );
687 $root->setNamespace( $xsi_ns, 'xsi', 0 );
688 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
689
e309421a 690 # Add the data keys for the graph
691 my %graph_data_keys;
692 my $gdi = 0;
1d310495 693 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 694 foreach my $datum ( @graph_attributes ) {
695 $graph_data_keys{$datum} = 'dg'.$gdi++;
696 my $key = $root->addNewChild( $graphml_ns, 'key' );
697 $key->setAttribute( 'attr.name', $datum );
698 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
699 $key->setAttribute( 'for', 'graph' );
700 $key->setAttribute( 'id', $graph_data_keys{$datum} );
701 }
f6066bac 702
8e1394aa 703 # Add the data keys for nodes
ef9d481f 704 my %node_data_keys;
705 my $ndi = 0;
3a2ebbf4 706 my %node_data = (
707 id => 'string',
255875b8 708 text => 'string',
3a2ebbf4 709 rank => 'string',
710 is_start => 'boolean',
711 is_end => 'boolean',
712 is_lacuna => 'boolean',
713 );
714 foreach my $datum ( keys %node_data ) {
910a0a6d 715 $node_data_keys{$datum} = 'dn'.$ndi++;
716 my $key = $root->addNewChild( $graphml_ns, 'key' );
717 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 718 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 719 $key->setAttribute( 'for', 'node' );
720 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 721 }
722
df6d9812 723 # Add the data keys for edges, i.e. witnesses
ef9d481f 724 my $edi = 0;
725 my %edge_data_keys;
3a2ebbf4 726 my %edge_data = (
f523c7a8 727 class => 'string', # Class, deprecated soon
3a2ebbf4 728 witness => 'string', # ID/label for a path
729 relationship => 'string', # ID/label for a relationship
730 extra => 'boolean', # Path key
c84275ff 731 scope => 'string', # Relationship key
3a2ebbf4 732 non_correctable => 'boolean', # Relationship key
733 non_independent => 'boolean', # Relationship key
734 );
735 foreach my $datum ( keys %edge_data ) {
736 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 737 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 738 $key->setAttribute( 'attr.name', $datum );
739 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 740 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 741 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 742 }
3a2ebbf4 743
22222af9 744 # Add the collation graph itself
2c669bca 745 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
746 $sgraph->setAttribute( 'edgedefault', 'directed' );
747 $sgraph->setAttribute( 'id', $self->tradition->name );
748 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
749 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
750 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
751 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
752 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 753
e309421a 754 # Collation attribute data
755 foreach my $datum ( @graph_attributes ) {
2c669bca 756 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
757 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 758 }
8e1394aa 759
760 my $node_ctr = 0;
761 my %node_hash;
22222af9 762 # Add our readings to the graph
3a2ebbf4 763 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 764 # Add to the main graph
765 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 766 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 767 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 768 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 769 foreach my $d ( keys %node_data ) {
770 my $nval = $n->$d;
771 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
772 if defined $nval;
773 }
b15511bf 774 }
775
2c669bca 776 # Add the path edges to the sequence graph
df6d9812 777 my $edge_ctr = 0;
3a2ebbf4 778 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
779 # We add an edge in the graphml for every witness in $e.
780 foreach my $wit ( $self->path_witnesses( $e ) ) {
781 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
782 $node_hash{ $e->[0] },
783 $node_hash{ $e->[1] } );
2c669bca 784 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 785 $edge_el->setAttribute( 'source', $from );
786 $edge_el->setAttribute( 'target', $to );
787 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 788
789 # It's a witness path, so add the witness
790 my $base = $wit;
791 my $key = $edge_data_keys{'witness'};
792 # Is this an ante-corr witness?
793 my $aclabel = $self->ac_label;
794 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
795 # Keep the base witness
796 $base = $1;
797 # ...and record that this is an 'extra' reading path
798 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
799 }
800 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 801 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 802 }
803 }
804
22222af9 805 # Add the relationship graph to the XML
2626f709 806 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
807 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 808
94c00c71 809 # Save and return the thing
810 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 811 return $result;
df6d9812 812}
813
b15511bf 814sub _add_graphml_data {
815 my( $el, $key, $value ) = @_;
b15511bf 816 return unless defined $value;
c9bf3dbf 817 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 818 $data_el->setAttribute( 'key', $key );
819 $data_el->appendText( $value );
8e1394aa 820}
821
4e5a7b2c 822=head2 as_csv
910a0a6d 823
824Returns a CSV alignment table representation of the collation graph, one
2c669bca 825row per witness (or witness uncorrected.)
910a0a6d 826
827=cut
828
829sub as_csv {
3a2ebbf4 830 my( $self ) = @_;
910a0a6d 831 my $table = $self->make_alignment_table;
832 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
833 my @result;
2c669bca 834 # Make the header row
835 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
836 push( @result, decode_utf8( $csv->string ) );
837 # Make the rest of the rows
838 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 839 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 840 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
841 $csv->combine( @row );
910a0a6d 842 push( @result, decode_utf8( $csv->string ) );
843 }
3a2ebbf4 844 return join( "\n", @result );
910a0a6d 845}
846
4e5a7b2c 847=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 848
566f4595 849Return a reference to an alignment table, in a slightly enhanced CollateX
850format which looks like this:
851
852 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 853 tokens => [ { t => "TEXT" }, ... ] },
566f4595 854 { witness => "SIG2",
4e5a7b2c 855 tokens => [ { t => "TEXT" }, ... ] },
566f4595 856 ... ],
857 length => TEXTLEN };
858
859If $use_refs is set to 1, the reading object is returned in the table
860instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 861
862If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 863keys have a true hash value will be included.
2c669bca 864
865=cut
9f3ba6f7 866
910a0a6d 867sub make_alignment_table {
08e0fb85 868 my( $self, $noderefs, $include ) = @_;
910a0a6d 869 unless( $self->linear ) {
870 warn "Need a linear graph in order to make an alignment table";
871 return;
872 }
2c669bca 873 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 874 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 875 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 876 if( $include ) {
566f4595 877 next unless $include->{$wit->sigil};
2c669bca 878 }
eca16057 879 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 880 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
881 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 882 push( @{$table->{'alignment'}},
883 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 884 if( $wit->is_layered ) {
885 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 886 $wit->sigil.$self->ac_label );
1f7aa795 887 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 888 push( @{$table->{'alignment'}},
889 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 890 }
891 }
2c669bca 892 return $table;
910a0a6d 893}
894
895sub _make_witness_row {
0e476982 896 my( $path, $positions, $noderefs ) = @_;
910a0a6d 897 my %char_hash;
898 map { $char_hash{$_} = undef } @$positions;
2c669bca 899 my $debug = 0;
910a0a6d 900 foreach my $rdg ( @$path ) {
eca16057 901 my $rtext = $rdg->text;
902 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 903 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 904 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 905 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
906 : { 't' => $rtext };
910a0a6d 907 }
908 my @row = map { $char_hash{$_} } @$positions;
eca16057 909 # Fill in lacuna markers for undef spots in the row
910 my $last_el = shift @row;
911 my @filled_row = ( $last_el );
912 foreach my $el ( @row ) {
0e476982 913 # If we are using node reference, make the lacuna node appear many times
914 # in the table. If not, use the lacuna tag.
915 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 916 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 917 }
918 push( @filled_row, $el );
919 $last_el = $el;
920 }
921 return @filled_row;
910a0a6d 922}
923
0e476982 924# Tiny utility function to say if a table element is a lacuna
925sub _el_is_lacuna {
926 my $el = shift;
2c669bca 927 return 1 if $el->{'t'} eq '#LACUNA#';
928 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
929 && $el->{'t'}->is_lacuna;
0e476982 930 return 0;
931}
932
910a0a6d 933# Helper to turn the witnesses along columns rather than rows. Assumes
934# equal-sized rows.
935sub _turn_table {
936 my( $table ) = @_;
937 my $result = [];
938 return $result unless scalar @$table;
939 my $nrows = scalar @{$table->[0]};
940 foreach my $idx ( 0 .. $nrows - 1 ) {
941 foreach my $wit ( 0 .. $#{$table} ) {
942 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
943 }
944 }
945 return $result;
946}
947
4e5a7b2c 948=head1 NAVIGATION METHODS
910a0a6d 949
4e5a7b2c 950=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 951
952Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 953with $last, for the witness given in $sigil. If a $backup sigil is
954specified (e.g. when walking a layered witness), it will be used wherever
955no $sigil path exists. If there is a base text reading, that will be
956used wherever no path exists for $sigil or $backup.
e2902068 957
958=cut
959
910a0a6d 960# TODO Think about returning some lazy-eval iterator.
b0b4421a 961# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 962
e2902068 963sub reading_sequence {
861c3e27 964 my( $self, $start, $end, $witness ) = @_;
e2902068 965
930ff666 966 $witness = $self->baselabel unless $witness;
e2902068 967 my @readings = ( $start );
968 my %seen;
969 my $n = $start;
3a2ebbf4 970 while( $n && $n->id ne $end->id ) {
971 if( exists( $seen{$n->id} ) ) {
972 warn "Detected loop at " . $n->id;
910a0a6d 973 last;
974 }
3a2ebbf4 975 $seen{$n->id} = 1;
910a0a6d 976
861c3e27 977 my $next = $self->next_reading( $n, $witness );
44771cf2 978 unless( $next ) {
3a2ebbf4 979 warn "Did not find any path for $witness from reading " . $n->id;
44771cf2 980 last;
981 }
910a0a6d 982 push( @readings, $next );
983 $n = $next;
e2902068 984 }
985 # Check that the last reading is our end reading.
986 my $last = $readings[$#readings];
3a2ebbf4 987 warn "Last reading found from " . $start->text .
910a0a6d 988 " for witness $witness is not the end!"
3a2ebbf4 989 unless $last->id eq $end->id;
e2902068 990
991 return @readings;
992}
993
4e5a7b2c 994=head2 next_reading( $reading, $sigil );
8e1394aa 995
4a8828f0 996Returns the reading that follows the given reading along the given witness
930ff666 997path.
8e1394aa 998
999=cut
1000
4a8828f0 1001sub next_reading {
e2902068 1002 # Return the successor via the corresponding path.
8e1394aa 1003 my $self = shift;
3a2ebbf4 1004 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1005 return undef unless $answer;
3a2ebbf4 1006 return $self->reading( $answer );
8e1394aa 1007}
1008
4e5a7b2c 1009=head2 prior_reading( $reading, $sigil )
8e1394aa 1010
4a8828f0 1011Returns the reading that precedes the given reading along the given witness
930ff666 1012path.
8e1394aa 1013
1014=cut
1015
4a8828f0 1016sub prior_reading {
e2902068 1017 # Return the predecessor via the corresponding path.
8e1394aa 1018 my $self = shift;
3a2ebbf4 1019 my $answer = $self->_find_linked_reading( 'prior', @_ );
1020 return $self->reading( $answer );
8e1394aa 1021}
1022
4a8828f0 1023sub _find_linked_reading {
861c3e27 1024 my( $self, $direction, $node, $path ) = @_;
1025
1026 # Get a backup if we are dealing with a layered witness
1027 my $alt_path;
1028 my $aclabel = $self->ac_label;
1029 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1030 $alt_path = $1;
1031 }
1032
e2902068 1033 my @linked_paths = $direction eq 'next'
3a2ebbf4 1034 ? $self->sequence->edges_from( $node )
1035 : $self->sequence->edges_to( $node );
e2902068 1036 return undef unless scalar( @linked_paths );
8e1394aa 1037
e2902068 1038 # We have to find the linked path that contains all of the
1039 # witnesses supplied in $path.
1040 my( @path_wits, @alt_path_wits );
4e5a7b2c 1041 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1042 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1043 my $base_le;
1044 my $alt_le;
1045 foreach my $le ( @linked_paths ) {
3a2ebbf4 1046 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1047 $base_le = $le;
910a0a6d 1048 }
3a2ebbf4 1049 my @le_wits = $self->path_witnesses( $le );
1050 if( _is_within( \@path_wits, \@le_wits ) ) {
1051 # This is the right path.
1052 return $direction eq 'next' ? $le->[1] : $le->[0];
1053 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1054 $alt_le = $le;
1055 }
8e1394aa 1056 }
e2902068 1057 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1058 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1059 if $alt_le;
e2902068 1060
1061 # Got this far? Return the base path if it exists.
3a2ebbf4 1062 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1063 if $base_le;
e2902068 1064
1065 # Got this far? We have no appropriate path.
2c669bca 1066 warn "Could not find $direction node from " . $node->id
910a0a6d 1067 . " along path $path";
8e1394aa 1068 return undef;
1069}
1070
4a8828f0 1071# Some set logic.
1072sub _is_within {
1073 my( $set1, $set2 ) = @_;
7854e12e 1074 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1075 foreach my $el ( @$set1 ) {
910a0a6d 1076 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1077 }
1078 return $ret;
1079}
1080
4e5a7b2c 1081# Return the string that joins together a list of witnesses for
1082# display on a single path.
1083sub _witnesses_of_label {
1084 my( $self, $label ) = @_;
1085 my $regex = $self->wit_list_separator;
1086 my @answer = split( /\Q$regex\E/, $label );
1087 return @answer;
b0b4421a 1088}
1089
1090=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1091
1092Returns the text of a witness (plus its backup, if we are using a layer)
1093as stored in the collation. The text is returned as a string, where the
1094individual readings are joined with spaces and the meta-readings (e.g.
1095lacunae) are omitted. Optional specification of $start and $end allows
1096the generation of a subset of the witness text.
4e5a7b2c 1097
b0b4421a 1098=cut
1099
1100sub path_text {
861c3e27 1101 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1102 $start = $self->start unless $start;
1103 $end = $self->end unless $end;
861c3e27 1104 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1105 return join( ' ', map { $_->text } @path );
1106}
4e5a7b2c 1107
1108=head1 INITIALIZATION METHODS
1109
1110These are mostly for use by parsers.
1111
1112=head2 make_witness_path( $witness )
1113
1114Link the array of readings contained in $witness->path (and in
1115$witness->uncorrected_path if it exists) into collation paths.
1116Clear out the arrays when finished.
de51424a 1117
4e5a7b2c 1118=head2 make_witness_paths
1119
1120Call make_witness_path for all witnesses in the tradition.
1121
1122=cut
930ff666 1123
7e450e44 1124# For use when a collation is constructed from a base text and an apparatus.
1125# We have the sequences of readings and just need to add path edges.
1f7aa795 1126# When we are done, clear out the witness path attributes, as they are no
1127# longer needed.
1128# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1129
6a222840 1130sub make_witness_paths {
1131 my( $self ) = @_;
910a0a6d 1132 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1133 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1134 $self->make_witness_path( $wit );
7854e12e 1135 }
7854e12e 1136}
1137
6a222840 1138sub make_witness_path {
7854e12e 1139 my( $self, $wit ) = @_;
1140 my @chain = @{$wit->path};
15d2d3df 1141 my $sig = $wit->sigil;
7854e12e 1142 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1143 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1144 }
1f7aa795 1145 if( $wit->is_layered ) {
d9e873d0 1146 @chain = @{$wit->uncorrected_path};
1147 foreach my $idx( 0 .. $#chain-1 ) {
1148 my $source = $chain[$idx];
1149 my $target = $chain[$idx+1];
1150 $self->add_path( $source, $target, $sig.$self->ac_label )
1151 unless $self->has_path( $source, $target, $sig );
1152 }
15d2d3df 1153 }
1f7aa795 1154 $wit->clear_path;
1155 $wit->clear_uncorrected_path;
e2902068 1156}
1157
4e5a7b2c 1158=head2 calculate_ranks
1159
1160Calculate the reading ranks (that is, their aligned positions relative
1161to each other) for the graph. This can only be called on linear collations.
1162
1163=cut
1164
910a0a6d 1165sub calculate_ranks {
1166 my $self = shift;
1167 # Walk a version of the graph where every node linked by a relationship
1168 # edge is fundamentally the same node, and do a topological ranking on
1169 # the nodes in this graph.
c9bf3dbf 1170 my $topo_graph = Graph->new();
910a0a6d 1171 my %rel_containers;
1172 my $rel_ctr = 0;
1173 # Add the nodes
1174 foreach my $r ( $self->readings ) {
3a2ebbf4 1175 next if exists $rel_containers{$r->id};
910a0a6d 1176 my @rels = $r->related_readings( 'colocated' );
1177 if( @rels ) {
1178 # Make a relationship container.
1179 push( @rels, $r );
c9bf3dbf 1180 my $rn = 'rel_container_' . $rel_ctr++;
1181 $topo_graph->add_vertex( $rn );
910a0a6d 1182 foreach( @rels ) {
3a2ebbf4 1183 $rel_containers{$_->id} = $rn;
910a0a6d 1184 }
1185 } else {
1186 # Add a new node to mirror the old node.
3a2ebbf4 1187 $rel_containers{$r->id} = $r->id;
1188 $topo_graph->add_vertex( $r->id );
910a0a6d 1189 }
4a8828f0 1190 }
3a1f2523 1191
3a2ebbf4 1192 # Add the edges.
910a0a6d 1193 foreach my $r ( $self->readings ) {
3a2ebbf4 1194 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1195 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1196 $rel_containers{$n} );
4e5a7b2c 1197 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1198 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1199 }
1200 }
1201
1202 # Now do the rankings, starting with the start node.
3a2ebbf4 1203 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1204 my $node_ranks = { $topo_start => 0 };
910a0a6d 1205 my @curr_origin = ( $topo_start );
1206 # A little iterative function.
1207 while( @curr_origin ) {
c9bf3dbf 1208 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1209 }
1210 # Transfer our rankings from the topological graph to the real one.
1211 foreach my $r ( $self->readings ) {
3a2ebbf4 1212 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1213 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1214 } else {
3a2ebbf4 1215 die "No rank calculated for node " . $r->id
67da8d6c 1216 . " - do you have a cycle in the graph?";
1217 }
de51424a 1218 }
8e1394aa 1219}
3a1f2523 1220
910a0a6d 1221sub _assign_rank {
c9bf3dbf 1222 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1223 # Look at each of the children of @current_nodes. If all the child's
1224 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1225 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1226 # parent gets a rank.
910a0a6d 1227 my @next_nodes;
1228 foreach my $c ( @current_nodes ) {
c9bf3dbf 1229 warn "Current reading $c has no rank!"
1230 unless exists $node_ranks->{$c};
1231 # print STDERR "Looking at child of node $c, rank "
1232 # . $node_ranks->{$c} . "\n";
1233 foreach my $child ( $graph->successors( $c ) ) {
1234 next if exists $node_ranks->{$child};
910a0a6d 1235 my $highest_rank = -1;
1236 my $skip = 0;
c9bf3dbf 1237 foreach my $parent ( $graph->predecessors( $child ) ) {
1238 if( exists $node_ranks->{$parent} ) {
1239 $highest_rank = $node_ranks->{$parent}
1240 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1241 } else {
1242 $skip = 1;
1243 last;
1244 }
1245 }
1246 next if $skip;
c9bf3dbf 1247 my $c_rank = $highest_rank + 1;
1248 # print STDERR "Assigning rank $c_rank to node $child \n";
1249 $node_ranks->{$child} = $c_rank;
910a0a6d 1250 push( @next_nodes, $child );
1251 }
1252 }
1253 return @next_nodes;
4cdd82f1 1254}
910a0a6d 1255
4e5a7b2c 1256=head2 flatten_ranks
1257
1258A convenience method for parsing collation data. Searches the graph for readings
1259with the same text at the same rank, and merges any that are found.
1260
1261=cut
1262
0e476982 1263sub flatten_ranks {
1264 my $self = shift;
1265 my %unique_rank_rdg;
1266 foreach my $rdg ( $self->readings ) {
1267 next unless $rdg->has_rank;
1268 my $key = $rdg->rank . "||" . $rdg->text;
1269 if( exists $unique_rank_rdg{$key} ) {
1270 # Combine!
56eefa04 1271 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1272 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1273 } else {
1274 $unique_rank_rdg{$key} = $rdg;
1275 }
1276 }
1277}
1278
861c3e27 1279=head2 text_from_paths
1280
1281Calculate the text array for all witnesses from the path, for later consistency
1282checking. Only to be used if there is no non-graph-based way to know the
1283original texts.
1284
1285=cut
1286
1287sub text_from_paths {
1288 my $self = shift;
1289 foreach my $wit ( $self->tradition->witnesses ) {
1290 my @text = split( /\s+/,
1291 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1292 $wit->text( \@text );
1293 if( $wit->is_layered ) {
1294 my @uctext = split( /\s+/,
1295 $self->reading_sequence( $self->start, $self->end,
1296 $wit->sigil.$self->ac_label ) );
1297 $wit->text( \@uctext );
1298 }
1299 }
1300}
0e476982 1301
4e5a7b2c 1302=head1 UTILITY FUNCTIONS
1303
1304=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1305
4e5a7b2c 1306Find the last reading that occurs in sequence before both the given readings.
1307
1308=head2 common_successor( $reading_a, $reading_b )
1309
1310Find the first reading that occurs in sequence after both the given readings.
1311
22222af9 1312=begin testing
1313
1314use Text::Tradition;
1315
1316my $cxfile = 't/data/Collatex-16.xml';
1317my $t = Text::Tradition->new(
1318 'name' => 'inline',
1319 'input' => 'CollateX',
1320 'file' => $cxfile,
1321 );
1322my $c = $t->collation;
1323
4e5a7b2c 1324is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1325 'n20', "Found correct common predecessor" );
4e5a7b2c 1326is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1327 '#END#', "Found correct common successor" );
1328
4e5a7b2c 1329is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1330 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1331is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1332 '#END#', "Found correct common successor for readings on same path" );
1333
1334=end testing
1335
1336=cut
1337
1338## Return the closest reading that is a predecessor of both the given readings.
1339sub common_predecessor {
1340 my $self = shift;
4e5a7b2c 1341 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1342 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1343}
1344
1345sub common_successor {
1346 my $self = shift;
4e5a7b2c 1347 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1348 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1349}
1350
1351sub common_in_path {
1352 my( $self, $r1, $r2, $dir ) = @_;
1353 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1354 $iter = $self->end->rank - $iter if $dir eq 'successors';
1355 my @candidates;
1356 my @last_checked = ( $r1, $r2 );
1357 my %all_seen;
1358 while( !@candidates ) {
1359 my @new_lc;
1360 foreach my $lc ( @last_checked ) {
1361 foreach my $p ( $lc->$dir ) {
1362 if( $all_seen{$p->id} ) {
1363 push( @candidates, $p );
1364 } else {
1365 $all_seen{$p->id} = 1;
1366 push( @new_lc, $p );
1367 }
1368 }
1369 }
1370 @last_checked = @new_lc;
1371 }
1372 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1373 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1374}
1375
dd3b58b0 1376no Moose;
1377__PACKAGE__->meta->make_immutable;
e867486f 1378
1379=head1 BUGS / TODO
1380
1381=over
1382
4e5a7b2c 1383=item * Get rid of $backup in reading_sequence
e867486f 1384
1385=back