add exceptions to the rest of the Tradition library
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
63778331 10use Text::Tradition::Error;
df6d9812 11use XML::LibXML;
dd3b58b0 12use Moose;
13
3a2ebbf4 14has 'sequence' => (
d047cd52 15 is => 'ro',
3a2ebbf4 16 isa => 'Graph',
17 default => sub { Graph->new() },
d047cd52 18 handles => {
3a2ebbf4 19 paths => 'edges',
d047cd52 20 },
d047cd52 21 );
3a2ebbf4 22
23has 'relations' => (
24 is => 'ro',
22222af9 25 isa => 'Text::Tradition::Collation::RelationshipStore',
26 handles => {
27 relationships => 'relationships',
28 related_readings => 'related_readings',
29 },
30 writer => '_set_relations',
3a2ebbf4 31 );
dd3b58b0 32
3a2ebbf4 33has 'tradition' => (
34 is => 'ro',
d047cd52 35 isa => 'Text::Tradition',
8d9a1cd8 36 weak_ref => 1,
d047cd52 37 );
dd3b58b0 38
3a2ebbf4 39has 'readings' => (
40 isa => 'HashRef[Text::Tradition::Collation::Reading]',
41 traits => ['Hash'],
42 handles => {
43 reading => 'get',
44 _add_reading => 'set',
45 del_reading => 'delete',
46 has_reading => 'exists',
47 readings => 'values',
48 },
49 default => sub { {} },
50 );
910a0a6d 51
4a8828f0 52has 'wit_list_separator' => (
7854e12e 53 is => 'rw',
54 isa => 'Str',
55 default => ', ',
56 );
57
58has 'baselabel' => (
59 is => 'rw',
60 isa => 'Str',
61 default => 'base text',
62 );
4a8828f0 63
15d2d3df 64has 'linear' => (
65 is => 'rw',
66 isa => 'Bool',
67 default => 1,
68 );
c84275ff 69
ef9d481f 70has 'ac_label' => (
71 is => 'rw',
72 isa => 'Str',
73 default => ' (a.c.)',
74 );
3a2ebbf4 75
76has 'start' => (
77 is => 'ro',
78 isa => 'Text::Tradition::Collation::Reading',
79 writer => '_set_start',
80 weak_ref => 1,
81 );
82
83has 'end' => (
84 is => 'ro',
85 isa => 'Text::Tradition::Collation::Reading',
86 writer => '_set_end',
87 weak_ref => 1,
88 );
1f563ac3 89
4e5a7b2c 90=head1 NAME
91
92Text::Tradition::Collation - a software model for a text collation
93
94=head1 SYNOPSIS
95
96 use Text::Tradition;
97 my $t = Text::Tradition->new(
98 'name' => 'this is a text',
99 'input' => 'TEI',
100 'file' => '/path/to/tei_parallel_seg_file.xml' );
101
102 my $c = $t->collation;
103 my @readings = $c->readings;
104 my @paths = $c->paths;
105 my @relationships = $c->relationships;
106
107 my $svg_variant_graph = $t->collation->as_svg();
108
109=head1 DESCRIPTION
110
111Text::Tradition is a library for representation and analysis of collated
112texts, particularly medieval ones. The Collation is the central feature of
113a Tradition, where the text, its sequence of readings, and its relationships
114between readings are actually kept.
115
116=head1 CONSTRUCTOR
117
118=head2 new
119
120The constructor. Takes a hash or hashref of the following arguments:
121
122=over
123
124=item * tradition - The Text::Tradition object to which the collation
125belongs. Required.
126
127=item * linear - Whether the collation should be linear; that is, whether
128transposed readings should be treated as two linked readings rather than one,
129and therefore whether the collation graph is acyclic. Defaults to true.
130
4e5a7b2c 131=item * baselabel - The default label for the path taken by a base text
132(if any). Defaults to 'base text'.
133
134=item * wit_list_separator - The string to join a list of witnesses for
135purposes of making labels in display graphs. Defaults to ', '.
136
137=item * ac_label - The extra label to tack onto a witness sigil when
138representing another layer of path for the given witness - that is, when
139a text has more than one possible reading due to scribal corrections or
140the like. Defaults to ' (a.c.)'.
141
142=back
143
144=head1 ACCESSORS
145
146=head2 tradition
147
148=head2 linear
149
4e5a7b2c 150=head2 wit_list_separator
151
152=head2 baselabel
153
154=head2 ac_label
155
156Simple accessors for collation attributes.
157
158=head2 start
159
160The meta-reading at the start of every witness path.
161
162=head2 end
163
164The meta-reading at the end of every witness path.
165
166=head2 readings
167
168Returns all Reading objects in the graph.
169
170=head2 reading( $id )
171
172Returns the Reading object corresponding to the given ID.
173
174=head2 add_reading( $reading_args )
175
176Adds a new reading object to the collation.
177See L<Text::Tradition::Collation::Reading> for the available arguments.
178
179=head2 del_reading( $object_or_id )
180
181Removes the given reading from the collation, implicitly removing its
182paths and relationships.
183
184=head2 merge_readings( $main, $second )
185
186Merges the $second reading into the $main one.
187The arguments may be either readings or reading IDs.
188
189=head2 has_reading( $id )
190
191Predicate to see whether a given reading ID is in the graph.
192
193=head2 reading_witnesses( $object_or_id )
194
195Returns a list of sigils whose witnesses contain the reading.
196
197=head2 paths
198
199Returns all reading paths within the document - that is, all edges in the
200collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
201
202=head2 add_path( $source, $target, $sigil )
203
204Links the given readings in the collation in sequence, under the given witness
205sigil. The readings may be specified by object or ID.
206
207=head2 del_path( $source, $target, $sigil )
208
209Links the given readings in the collation in sequence, under the given witness
210sigil. The readings may be specified by object or ID.
211
212=head2 has_path( $source, $target );
213
214Returns true if the two readings are linked in sequence in any witness.
215The readings may be specified by object or ID.
216
217=head2 relationships
218
219Returns all Relationship objects in the collation.
220
221=head2 add_relationship( $reading, $other_reading, $options )
222
223Adds a new relationship of the type given in $options between the two readings,
224which may be specified by object or ID. Returns a value of ( $status, @vectors)
225where $status is true on success, and @vectors is a list of relationship edges
226that were ultimately added.
227See L<Text::Tradition::Collation::Relationship> for the available options.
228
229=cut
dd3b58b0 230
d047cd52 231sub BUILD {
3a2ebbf4 232 my $self = shift;
22222af9 233 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 234 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
235 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 236}
784877d9 237
3a2ebbf4 238### Reading construct/destruct functions
239
240sub add_reading {
241 my( $self, $reading ) = @_;
242 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
243 my %args = %$reading;
244 $reading = Text::Tradition::Collation::Reading->new(
245 'collation' => $self,
246 %args );
247 }
248 # First check to see if a reading with this ID exists.
249 if( $self->reading( $reading->id ) ) {
63778331 250 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 251 }
252 $self->_add_reading( $reading->id => $reading );
253 # Once the reading has been added, put it in both graphs.
254 $self->sequence->add_vertex( $reading->id );
22222af9 255 $self->relations->add_reading( $reading->id );
3a2ebbf4 256 return $reading;
eca16057 257};
258
3a2ebbf4 259around del_reading => sub {
260 my $orig = shift;
261 my $self = shift;
262 my $arg = shift;
263
264 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
265 $arg = $arg->id;
266 }
3a2ebbf4 267 # Remove the reading from the graphs.
268 $self->sequence->delete_vertex( $arg );
22222af9 269 $self->relations->delete_reading( $arg );
3a2ebbf4 270
271 # Carry on.
272 $self->$orig( $arg );
273};
7854e12e 274
3a2ebbf4 275# merge_readings( $main, $to_be_deleted );
7854e12e 276
3a2ebbf4 277sub merge_readings {
278 my $self = shift;
279
280 # We only need the IDs for adding paths to the graph, not the reading
281 # objects themselves.
49d4f2ac 282 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 283
284 # The kept reading should inherit the paths and the relationships
285 # of the deleted reading.
286 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
287 my @vector = ( $kept );
288 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
289 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 290 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 291 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
292 $self->sequence->add_edge( @vector );
293 my $fwits = $self->sequence->get_edge_attributes( @vector );
294 @wits{keys %$fwits} = values %$fwits;
295 $self->sequence->set_edge_attributes( @vector, \%wits );
296 }
22222af9 297 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 298
299 # Do the deletion deed.
49d4f2ac 300 if( $combine_char ) {
301 my $kept_obj = $self->reading( $kept );
302 my $new_text = join( $combine_char, $kept_obj->text,
303 $self->reading( $deleted )->text );
304 $kept_obj->alter_text( $new_text );
305 }
3a2ebbf4 306 $self->del_reading( $deleted );
307}
7854e12e 308
3265b0ce 309
3a2ebbf4 310# Helper function for manipulating the graph.
311sub _stringify_args {
312 my( $self, $first, $second, $arg ) = @_;
313 $first = $first->id
314 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
315 $second = $second->id
316 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
317 return( $first, $second, $arg );
318}
df6d9812 319
4e5a7b2c 320# Helper function for manipulating the graph.
321sub _objectify_args {
322 my( $self, $first, $second, $arg ) = @_;
323 $first = $self->reading( $first )
324 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
325 $second = $self->reading( $second )
326 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
327 return( $first, $second, $arg );
328}
3a2ebbf4 329### Path logic
330
331sub add_path {
332 my $self = shift;
333
334 # We only need the IDs for adding paths to the graph, not the reading
335 # objects themselves.
336 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
337
338 # Connect the readings
339 $self->sequence->add_edge( $source, $target );
340 # Note the witness in question
341 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 342};
343
3a2ebbf4 344sub del_path {
345 my $self = shift;
49d4f2ac 346 my @args;
347 if( ref( $_[0] ) eq 'ARRAY' ) {
348 my $e = shift @_;
349 @args = ( @$e, @_ );
350 } else {
351 @args = @_;
352 }
3a2ebbf4 353
354 # We only need the IDs for adding paths to the graph, not the reading
355 # objects themselves.
49d4f2ac 356 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 357
358 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 359 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 360 }
361 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
362 $self->sequence->delete_edge( $source, $target );
363 }
784877d9 364}
365
3a2ebbf4 366
15d2d3df 367# Extra graph-alike utility
368sub has_path {
3a2ebbf4 369 my $self = shift;
370 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
371 return undef unless $self->sequence->has_edge( $source, $target );
372 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 373}
374
4e5a7b2c 375=head2 clear_witness( @sigil_list )
3a2ebbf4 376
4e5a7b2c 377Clear the given witnesses out of the collation entirely, removing references
378to them in paths, and removing readings that belong only to them. Should only
379be called via $tradition->del_witness.
3a2ebbf4 380
381=cut
382
4e5a7b2c 383sub clear_witness {
384 my( $self, @sigils ) = @_;
385
386 # Clear the witness(es) out of the paths
387 foreach my $e ( $self->paths ) {
388 foreach my $sig ( @sigils ) {
389 $self->del_path( $e, $sig );
390 }
391 }
392
393 # Clear out the newly unused readings
394 foreach my $r ( $self->readings ) {
395 unless( $self->reading_witnesses( $r ) ) {
396 $self->del_reading( $r );
397 }
398 }
399}
3a2ebbf4 400
401sub add_relationship {
402 my $self = shift;
22222af9 403 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 404 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 405 $self->reading( $source ), $target, $self->reading( $target ), $opts );
406 # Force a full rank recalculation every time. Yuck.
63778331 407 $self->calculate_ranks() if $self->end->has_rank;
408 return @vectors;
22222af9 409}
ef9d481f 410
22222af9 411=head2 reading_witnesses( $reading )
910a0a6d 412
22222af9 413Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 414
22222af9 415=cut
1d310495 416
1d310495 417sub reading_witnesses {
418 my( $self, $reading ) = @_;
419 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 420 # arbitrarily chosen "incoming". Thus, special-case the start node.
421 if( $reading eq $self->start ) {
422 return map { $_->sigil } $self->tradition->witnesses;
423 }
1d310495 424 my %all_witnesses;
425 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
426 my $wits = $self->sequence->get_edge_attributes( @$e );
427 @all_witnesses{ keys %$wits } = 1;
428 }
429 return keys %all_witnesses;
910a0a6d 430}
431
4e5a7b2c 432=head1 OUTPUT METHODS
8e1394aa 433
4e5a7b2c 434=head2 as_svg
8e1394aa 435
0068967c 436Returns an SVG string that represents the graph, via as_dot and graphviz.
8e1394aa 437
438=cut
439
440sub as_svg {
3a2ebbf4 441 my( $self ) = @_;
442
8e1394aa 443 my @cmd = qw/dot -Tsvg/;
444 my( $svg, $err );
910a0a6d 445 my $dotfile = File::Temp->new();
d9e873d0 446 ## TODO REMOVE
eca16057 447 # $dotfile->unlink_on_destroy(0);
910a0a6d 448 binmode $dotfile, ':utf8';
449 print $dotfile $self->as_dot();
450 push( @cmd, $dotfile->filename );
451 run( \@cmd, ">", binary(), \$svg );
452 $svg = decode_utf8( $svg );
8e1394aa 453 return $svg;
454}
455
4e5a7b2c 456=head2 svg_subgraph( $from, $to )
b22576c6 457
458Returns an SVG string that represents the portion of the graph given by the
459specified range. The $from and $to variables refer to ranks within the graph.
460
461=cut
462
463sub svg_subgraph {
464 my( $self, $from, $to ) = @_;
465
466 my $dot = $self->as_dot( $from, $to );
467 unless( $dot ) {
63778331 468 throw( "Could not output a graph with range $from - $to" );
b22576c6 469 }
470
471 my @cmd = qw/dot -Tsvg/;
472 my( $svg, $err );
473 my $dotfile = File::Temp->new();
474 ## TODO REMOVE
475 # $dotfile->unlink_on_destroy(0);
476 binmode $dotfile, ':utf8';
477 print $dotfile $dot;
478 push( @cmd, $dotfile->filename );
479 run( \@cmd, ">", binary(), \$svg );
480 $svg = decode_utf8( $svg );
481 return $svg;
482}
483
484
4e5a7b2c 485=head2 as_dot( $from, $to )
df6d9812 486
487Returns a string that is the collation graph expressed in dot
4e5a7b2c 488(i.e. GraphViz) format. If $from or $to is passed, as_dot creates
489a subgraph rather than the entire graph.
df6d9812 490
491=cut
492
493sub as_dot {
b22576c6 494 my( $self, $startrank, $endrank ) = @_;
495
496 # Check the arguments
497 if( $startrank ) {
498 return if $endrank && $startrank > $endrank;
499 return if $startrank > $self->end->rank;
500 }
501 if( defined $endrank ) {
502 return if $endrank < 0;
f1b3b33a 503 $endrank = undef if $endrank == $self->end->rank;
b22576c6 504 }
505
df6d9812 506 # TODO consider making some of these things configurable
67da8d6c 507 my $graph_name = $self->tradition->name;
508 $graph_name =~ s/[^\w\s]//g;
509 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 510
511 my %graph_attrs = (
512 'rankdir' => 'LR',
513 'bgcolor' => 'none',
514 );
515 my %node_attrs = (
516 'fontsize' => 11,
517 'fillcolor' => 'white',
518 'style' => 'filled',
519 'shape' => 'ellipse'
520 );
521 my %edge_attrs = (
522 'arrowhead' => 'open',
523 'color' => '#000000',
524 'fontcolor' => '#000000',
525 );
526
67da8d6c 527 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 528 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
529 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 530
b22576c6 531 # Output substitute start/end readings if necessary
532 if( $startrank ) {
533 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
534 }
535 if( $endrank ) {
536 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
537 }
538 my %used; # Keep track of the readings that actually appear in the graph
df6d9812 539 foreach my $reading ( $self->readings ) {
b22576c6 540 # Only output readings within our rank range.
541 next if $startrank && $reading->rank < $startrank;
542 next if $endrank && $reading->rank > $endrank;
543 $used{$reading->id} = 1;
910a0a6d 544 # Need not output nodes without separate labels
3a2ebbf4 545 next if $reading->id eq $reading->text;
30f0df34 546 my $label = $reading->text;
8f9cab7b 547 $label =~ s/\"/\\\"/g;
548 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
df6d9812 549 }
3a2ebbf4 550
b22576c6 551 # Add the real edges
552 my @edges = $self->paths;
3bdec618 553 my( %substart, %subend );
b22576c6 554 foreach my $edge ( @edges ) {
555 # Do we need to output this edge?
556 if( $used{$edge->[0]} && $used{$edge->[1]} ) {;
f13b5582 557 my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
558 my $variables = { %edge_attrs, 'label' => $label };
b22576c6 559 # Account for the rank gap if necessary
b0b4421a 560 if( $self->reading( $edge->[1] )->has_rank
561 && $self->reading( $edge->[0] )->has_rank
562 && $self->reading( $edge->[1] )->rank
563 - $self->reading( $edge->[0] )->rank > 1 ) {
564 $variables->{'minlen'} = $self->reading( $edge->[1] )->rank
b22576c6 565 - $self->reading( $edge->[0] )->rank;
b0b4421a 566 }
f13b5582 567 my $varopts = _dot_attr_string( $variables );
568 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
569 $edge->[0], $edge->[1], $varopts );
3bdec618 570 } elsif( $used{$edge->[0]} ) {
571 $subend{$edge->[0]} = 1;
572 } elsif( $used{$edge->[1]} ) {
573 $substart{$edge->[1]} = 1;
b22576c6 574 }
df6d9812 575 }
3bdec618 576 # Add substitute start and end edges if necessary
577 foreach my $node ( keys %substart ) {
578 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 579 my $variables = { %edge_attrs, 'label' => $witstr };
580 my $varopts = _dot_attr_string( $variables );
581 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 582 }
583 foreach my $node ( keys %subend ) {
584 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 585 my $variables = { %edge_attrs, 'label' => $witstr };
586 my $varopts = _dot_attr_string( $variables );
587 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 588 }
589
df6d9812 590 $dot .= "}\n";
591 return $dot;
592}
593
f13b5582 594sub _dot_attr_string {
595 my( $hash ) = @_;
596 my @attrs;
597 foreach my $k ( sort keys %$hash ) {
598 my $v = $hash->{$k};
599 push( @attrs, $k.'="'.$v.'"' );
600 }
601 return( '[ ' . join( ', ', @attrs ) . ' ]' );
602}
603
3a2ebbf4 604sub path_witnesses {
605 my( $self, @edge ) = @_;
606 # If edge is an arrayref, cope.
607 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
608 my $e = shift @edge;
609 @edge = @$e;
610 }
611 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
612 return sort @wits;
613}
614
8f9cab7b 615sub path_display_label {
3bdec618 616 my( $self, @wits ) = @_;
8f9cab7b 617 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
618 if( scalar @wits > $maj ) {
f13b5582 619 # TODO break out a.c. wits
8f9cab7b 620 return 'majority';
621 } else {
622 return join( ', ', @wits );
623 }
624}
625
626
4e5a7b2c 627=head2 as_graphml
8e1394aa 628
4e5a7b2c 629Returns a GraphML representation of the collation. The GraphML will contain
630two graphs. The first expresses the attributes of the readings and the witness
631paths that link them; the second expresses the relationships that link the
632readings. This is the native transfer format for a tradition.
8e1394aa 633
56eefa04 634=begin testing
635
636use Text::Tradition;
637
638my $READINGS = 311;
639my $PATHS = 361;
640
641my $datafile = 't/data/florilegium_tei_ps.xml';
642my $tradition = Text::Tradition->new( 'input' => 'TEI',
643 'name' => 'test0',
644 'file' => $datafile,
645 'linear' => 1 );
646
647ok( $tradition, "Got a tradition object" );
648is( scalar $tradition->witnesses, 13, "Found all witnesses" );
649ok( $tradition->collation, "Tradition has a collation" );
650
651my $c = $tradition->collation;
652is( scalar $c->readings, $READINGS, "Collation has all readings" );
653is( scalar $c->paths, $PATHS, "Collation has all paths" );
654is( scalar $c->relationships, 0, "Collation has all relationships" );
655
656# Add a few relationships
657$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
658$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
659$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
660
661# Now write it to GraphML and parse it again.
662
663my $graphml = $c->as_graphml;
664my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
665is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
666is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
667is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
668
669=end testing
670
8e1394aa 671=cut
672
673sub as_graphml {
3a2ebbf4 674 my( $self ) = @_;
8e1394aa 675
676 # Some namespaces
677 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
678 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
679 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 680 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 681
682 # Create the document and root node
683 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
684 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
685 $graphml->setDocumentElement( $root );
686 $root->setNamespace( $xsi_ns, 'xsi', 0 );
687 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
688
e309421a 689 # Add the data keys for the graph
690 my %graph_data_keys;
691 my $gdi = 0;
1d310495 692 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 693 foreach my $datum ( @graph_attributes ) {
694 $graph_data_keys{$datum} = 'dg'.$gdi++;
695 my $key = $root->addNewChild( $graphml_ns, 'key' );
696 $key->setAttribute( 'attr.name', $datum );
697 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
698 $key->setAttribute( 'for', 'graph' );
699 $key->setAttribute( 'id', $graph_data_keys{$datum} );
700 }
f6066bac 701
8e1394aa 702 # Add the data keys for nodes
ef9d481f 703 my %node_data_keys;
704 my $ndi = 0;
3a2ebbf4 705 my %node_data = (
706 id => 'string',
255875b8 707 text => 'string',
3a2ebbf4 708 rank => 'string',
709 is_start => 'boolean',
710 is_end => 'boolean',
711 is_lacuna => 'boolean',
712 );
713 foreach my $datum ( keys %node_data ) {
910a0a6d 714 $node_data_keys{$datum} = 'dn'.$ndi++;
715 my $key = $root->addNewChild( $graphml_ns, 'key' );
716 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 717 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 718 $key->setAttribute( 'for', 'node' );
719 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 720 }
721
df6d9812 722 # Add the data keys for edges, i.e. witnesses
ef9d481f 723 my $edi = 0;
724 my %edge_data_keys;
3a2ebbf4 725 my %edge_data = (
f523c7a8 726 class => 'string', # Class, deprecated soon
3a2ebbf4 727 witness => 'string', # ID/label for a path
728 relationship => 'string', # ID/label for a relationship
729 extra => 'boolean', # Path key
c84275ff 730 scope => 'string', # Relationship key
3a2ebbf4 731 non_correctable => 'boolean', # Relationship key
732 non_independent => 'boolean', # Relationship key
733 );
734 foreach my $datum ( keys %edge_data ) {
735 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 736 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 737 $key->setAttribute( 'attr.name', $datum );
738 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 739 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 740 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 741 }
3a2ebbf4 742
22222af9 743 # Add the collation graph itself
2c669bca 744 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
745 $sgraph->setAttribute( 'edgedefault', 'directed' );
746 $sgraph->setAttribute( 'id', $self->tradition->name );
747 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
748 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
749 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
750 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
751 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 752
e309421a 753 # Collation attribute data
754 foreach my $datum ( @graph_attributes ) {
2c669bca 755 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
756 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 757 }
8e1394aa 758
759 my $node_ctr = 0;
760 my %node_hash;
22222af9 761 # Add our readings to the graph
3a2ebbf4 762 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 763 # Add to the main graph
764 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 765 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 766 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 767 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 768 foreach my $d ( keys %node_data ) {
769 my $nval = $n->$d;
770 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
771 if defined $nval;
772 }
b15511bf 773 }
774
2c669bca 775 # Add the path edges to the sequence graph
df6d9812 776 my $edge_ctr = 0;
3a2ebbf4 777 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
778 # We add an edge in the graphml for every witness in $e.
779 foreach my $wit ( $self->path_witnesses( $e ) ) {
780 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
781 $node_hash{ $e->[0] },
782 $node_hash{ $e->[1] } );
2c669bca 783 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 784 $edge_el->setAttribute( 'source', $from );
785 $edge_el->setAttribute( 'target', $to );
786 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 787
788 # It's a witness path, so add the witness
789 my $base = $wit;
790 my $key = $edge_data_keys{'witness'};
791 # Is this an ante-corr witness?
792 my $aclabel = $self->ac_label;
793 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
794 # Keep the base witness
795 $base = $1;
796 # ...and record that this is an 'extra' reading path
797 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
798 }
799 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 800 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 801 }
802 }
803
22222af9 804 # Add the relationship graph to the XML
2626f709 805 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
806 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 807
94c00c71 808 # Save and return the thing
809 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 810 return $result;
df6d9812 811}
812
b15511bf 813sub _add_graphml_data {
814 my( $el, $key, $value ) = @_;
b15511bf 815 return unless defined $value;
c9bf3dbf 816 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 817 $data_el->setAttribute( 'key', $key );
818 $data_el->appendText( $value );
8e1394aa 819}
820
4e5a7b2c 821=head2 as_csv
910a0a6d 822
823Returns a CSV alignment table representation of the collation graph, one
2c669bca 824row per witness (or witness uncorrected.)
910a0a6d 825
826=cut
827
828sub as_csv {
3a2ebbf4 829 my( $self ) = @_;
910a0a6d 830 my $table = $self->make_alignment_table;
831 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
832 my @result;
2c669bca 833 # Make the header row
834 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
835 push( @result, decode_utf8( $csv->string ) );
836 # Make the rest of the rows
837 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 838 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 839 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
840 $csv->combine( @row );
910a0a6d 841 push( @result, decode_utf8( $csv->string ) );
842 }
3a2ebbf4 843 return join( "\n", @result );
910a0a6d 844}
845
4e5a7b2c 846=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 847
566f4595 848Return a reference to an alignment table, in a slightly enhanced CollateX
849format which looks like this:
850
851 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 852 tokens => [ { t => "TEXT" }, ... ] },
566f4595 853 { witness => "SIG2",
4e5a7b2c 854 tokens => [ { t => "TEXT" }, ... ] },
566f4595 855 ... ],
856 length => TEXTLEN };
857
858If $use_refs is set to 1, the reading object is returned in the table
859instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 860
861If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 862keys have a true hash value will be included.
2c669bca 863
864=cut
9f3ba6f7 865
910a0a6d 866sub make_alignment_table {
08e0fb85 867 my( $self, $noderefs, $include ) = @_;
910a0a6d 868 unless( $self->linear ) {
63778331 869 throw( "Need a linear graph in order to make an alignment table" );
910a0a6d 870 }
2c669bca 871 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 872 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 873 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 874 if( $include ) {
566f4595 875 next unless $include->{$wit->sigil};
2c669bca 876 }
eca16057 877 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 878 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
879 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 880 push( @{$table->{'alignment'}},
881 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 882 if( $wit->is_layered ) {
883 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 884 $wit->sigil.$self->ac_label );
1f7aa795 885 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 886 push( @{$table->{'alignment'}},
887 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 888 }
889 }
2c669bca 890 return $table;
910a0a6d 891}
892
893sub _make_witness_row {
0e476982 894 my( $path, $positions, $noderefs ) = @_;
910a0a6d 895 my %char_hash;
896 map { $char_hash{$_} = undef } @$positions;
2c669bca 897 my $debug = 0;
910a0a6d 898 foreach my $rdg ( @$path ) {
eca16057 899 my $rtext = $rdg->text;
900 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 901 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 902 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 903 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
904 : { 't' => $rtext };
910a0a6d 905 }
906 my @row = map { $char_hash{$_} } @$positions;
eca16057 907 # Fill in lacuna markers for undef spots in the row
908 my $last_el = shift @row;
909 my @filled_row = ( $last_el );
910 foreach my $el ( @row ) {
0e476982 911 # If we are using node reference, make the lacuna node appear many times
912 # in the table. If not, use the lacuna tag.
913 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 914 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 915 }
916 push( @filled_row, $el );
917 $last_el = $el;
918 }
919 return @filled_row;
910a0a6d 920}
921
0e476982 922# Tiny utility function to say if a table element is a lacuna
923sub _el_is_lacuna {
924 my $el = shift;
2c669bca 925 return 1 if $el->{'t'} eq '#LACUNA#';
926 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
927 && $el->{'t'}->is_lacuna;
0e476982 928 return 0;
929}
930
910a0a6d 931# Helper to turn the witnesses along columns rather than rows. Assumes
932# equal-sized rows.
933sub _turn_table {
934 my( $table ) = @_;
935 my $result = [];
936 return $result unless scalar @$table;
937 my $nrows = scalar @{$table->[0]};
938 foreach my $idx ( 0 .. $nrows - 1 ) {
939 foreach my $wit ( 0 .. $#{$table} ) {
940 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
941 }
942 }
943 return $result;
944}
945
4e5a7b2c 946=head1 NAVIGATION METHODS
910a0a6d 947
4e5a7b2c 948=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 949
950Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 951with $last, for the witness given in $sigil. If a $backup sigil is
952specified (e.g. when walking a layered witness), it will be used wherever
953no $sigil path exists. If there is a base text reading, that will be
954used wherever no path exists for $sigil or $backup.
e2902068 955
956=cut
957
910a0a6d 958# TODO Think about returning some lazy-eval iterator.
b0b4421a 959# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 960
e2902068 961sub reading_sequence {
861c3e27 962 my( $self, $start, $end, $witness ) = @_;
e2902068 963
930ff666 964 $witness = $self->baselabel unless $witness;
e2902068 965 my @readings = ( $start );
966 my %seen;
967 my $n = $start;
3a2ebbf4 968 while( $n && $n->id ne $end->id ) {
969 if( exists( $seen{$n->id} ) ) {
63778331 970 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 971 }
3a2ebbf4 972 $seen{$n->id} = 1;
910a0a6d 973
861c3e27 974 my $next = $self->next_reading( $n, $witness );
44771cf2 975 unless( $next ) {
63778331 976 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 977 }
910a0a6d 978 push( @readings, $next );
979 $n = $next;
e2902068 980 }
981 # Check that the last reading is our end reading.
982 my $last = $readings[$#readings];
63778331 983 throw( "Last reading found from " . $start->text .
984 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 985 unless $last->id eq $end->id;
e2902068 986
987 return @readings;
988}
989
4e5a7b2c 990=head2 next_reading( $reading, $sigil );
8e1394aa 991
4a8828f0 992Returns the reading that follows the given reading along the given witness
930ff666 993path.
8e1394aa 994
995=cut
996
4a8828f0 997sub next_reading {
e2902068 998 # Return the successor via the corresponding path.
8e1394aa 999 my $self = shift;
3a2ebbf4 1000 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1001 return undef unless $answer;
3a2ebbf4 1002 return $self->reading( $answer );
8e1394aa 1003}
1004
4e5a7b2c 1005=head2 prior_reading( $reading, $sigil )
8e1394aa 1006
4a8828f0 1007Returns the reading that precedes the given reading along the given witness
930ff666 1008path.
8e1394aa 1009
1010=cut
1011
4a8828f0 1012sub prior_reading {
e2902068 1013 # Return the predecessor via the corresponding path.
8e1394aa 1014 my $self = shift;
3a2ebbf4 1015 my $answer = $self->_find_linked_reading( 'prior', @_ );
1016 return $self->reading( $answer );
8e1394aa 1017}
1018
4a8828f0 1019sub _find_linked_reading {
861c3e27 1020 my( $self, $direction, $node, $path ) = @_;
1021
1022 # Get a backup if we are dealing with a layered witness
1023 my $alt_path;
1024 my $aclabel = $self->ac_label;
1025 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1026 $alt_path = $1;
1027 }
1028
e2902068 1029 my @linked_paths = $direction eq 'next'
3a2ebbf4 1030 ? $self->sequence->edges_from( $node )
1031 : $self->sequence->edges_to( $node );
e2902068 1032 return undef unless scalar( @linked_paths );
8e1394aa 1033
e2902068 1034 # We have to find the linked path that contains all of the
1035 # witnesses supplied in $path.
1036 my( @path_wits, @alt_path_wits );
4e5a7b2c 1037 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1038 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1039 my $base_le;
1040 my $alt_le;
1041 foreach my $le ( @linked_paths ) {
3a2ebbf4 1042 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1043 $base_le = $le;
910a0a6d 1044 }
3a2ebbf4 1045 my @le_wits = $self->path_witnesses( $le );
1046 if( _is_within( \@path_wits, \@le_wits ) ) {
1047 # This is the right path.
1048 return $direction eq 'next' ? $le->[1] : $le->[0];
1049 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1050 $alt_le = $le;
1051 }
8e1394aa 1052 }
e2902068 1053 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1054 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1055 if $alt_le;
e2902068 1056
1057 # Got this far? Return the base path if it exists.
3a2ebbf4 1058 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1059 if $base_le;
e2902068 1060
1061 # Got this far? We have no appropriate path.
2c669bca 1062 warn "Could not find $direction node from " . $node->id
910a0a6d 1063 . " along path $path";
8e1394aa 1064 return undef;
1065}
1066
4a8828f0 1067# Some set logic.
1068sub _is_within {
1069 my( $set1, $set2 ) = @_;
7854e12e 1070 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1071 foreach my $el ( @$set1 ) {
910a0a6d 1072 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1073 }
1074 return $ret;
1075}
1076
4e5a7b2c 1077# Return the string that joins together a list of witnesses for
1078# display on a single path.
1079sub _witnesses_of_label {
1080 my( $self, $label ) = @_;
1081 my $regex = $self->wit_list_separator;
1082 my @answer = split( /\Q$regex\E/, $label );
1083 return @answer;
b0b4421a 1084}
1085
1086=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1087
1088Returns the text of a witness (plus its backup, if we are using a layer)
1089as stored in the collation. The text is returned as a string, where the
1090individual readings are joined with spaces and the meta-readings (e.g.
1091lacunae) are omitted. Optional specification of $start and $end allows
1092the generation of a subset of the witness text.
4e5a7b2c 1093
b0b4421a 1094=cut
1095
1096sub path_text {
861c3e27 1097 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1098 $start = $self->start unless $start;
1099 $end = $self->end unless $end;
861c3e27 1100 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1101 return join( ' ', map { $_->text } @path );
1102}
4e5a7b2c 1103
1104=head1 INITIALIZATION METHODS
1105
1106These are mostly for use by parsers.
1107
1108=head2 make_witness_path( $witness )
1109
1110Link the array of readings contained in $witness->path (and in
1111$witness->uncorrected_path if it exists) into collation paths.
1112Clear out the arrays when finished.
de51424a 1113
4e5a7b2c 1114=head2 make_witness_paths
1115
1116Call make_witness_path for all witnesses in the tradition.
1117
1118=cut
930ff666 1119
7e450e44 1120# For use when a collation is constructed from a base text and an apparatus.
1121# We have the sequences of readings and just need to add path edges.
1f7aa795 1122# When we are done, clear out the witness path attributes, as they are no
1123# longer needed.
1124# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1125
6a222840 1126sub make_witness_paths {
1127 my( $self ) = @_;
910a0a6d 1128 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1129 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1130 $self->make_witness_path( $wit );
7854e12e 1131 }
7854e12e 1132}
1133
6a222840 1134sub make_witness_path {
7854e12e 1135 my( $self, $wit ) = @_;
1136 my @chain = @{$wit->path};
15d2d3df 1137 my $sig = $wit->sigil;
7854e12e 1138 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1139 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1140 }
1f7aa795 1141 if( $wit->is_layered ) {
d9e873d0 1142 @chain = @{$wit->uncorrected_path};
1143 foreach my $idx( 0 .. $#chain-1 ) {
1144 my $source = $chain[$idx];
1145 my $target = $chain[$idx+1];
1146 $self->add_path( $source, $target, $sig.$self->ac_label )
1147 unless $self->has_path( $source, $target, $sig );
1148 }
15d2d3df 1149 }
1f7aa795 1150 $wit->clear_path;
1151 $wit->clear_uncorrected_path;
e2902068 1152}
1153
4e5a7b2c 1154=head2 calculate_ranks
1155
1156Calculate the reading ranks (that is, their aligned positions relative
1157to each other) for the graph. This can only be called on linear collations.
1158
1159=cut
1160
910a0a6d 1161sub calculate_ranks {
1162 my $self = shift;
1163 # Walk a version of the graph where every node linked by a relationship
1164 # edge is fundamentally the same node, and do a topological ranking on
1165 # the nodes in this graph.
c9bf3dbf 1166 my $topo_graph = Graph->new();
910a0a6d 1167 my %rel_containers;
1168 my $rel_ctr = 0;
1169 # Add the nodes
1170 foreach my $r ( $self->readings ) {
3a2ebbf4 1171 next if exists $rel_containers{$r->id};
910a0a6d 1172 my @rels = $r->related_readings( 'colocated' );
1173 if( @rels ) {
1174 # Make a relationship container.
1175 push( @rels, $r );
c9bf3dbf 1176 my $rn = 'rel_container_' . $rel_ctr++;
1177 $topo_graph->add_vertex( $rn );
910a0a6d 1178 foreach( @rels ) {
3a2ebbf4 1179 $rel_containers{$_->id} = $rn;
910a0a6d 1180 }
1181 } else {
1182 # Add a new node to mirror the old node.
3a2ebbf4 1183 $rel_containers{$r->id} = $r->id;
1184 $topo_graph->add_vertex( $r->id );
910a0a6d 1185 }
4a8828f0 1186 }
3a1f2523 1187
3a2ebbf4 1188 # Add the edges.
910a0a6d 1189 foreach my $r ( $self->readings ) {
3a2ebbf4 1190 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1191 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1192 $rel_containers{$n} );
4e5a7b2c 1193 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1194 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1195 }
1196 }
1197
1198 # Now do the rankings, starting with the start node.
3a2ebbf4 1199 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1200 my $node_ranks = { $topo_start => 0 };
910a0a6d 1201 my @curr_origin = ( $topo_start );
1202 # A little iterative function.
1203 while( @curr_origin ) {
c9bf3dbf 1204 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1205 }
1206 # Transfer our rankings from the topological graph to the real one.
1207 foreach my $r ( $self->readings ) {
3a2ebbf4 1208 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1209 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1210 } else {
63778331 1211 # Die. Find the last rank we calculated.
1212 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1213 <=> $node_ranks->{$rel_containers{$b->id}} }
1214 $self->readings;
1215 my $last = pop @all_defined;
1216 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1217 }
de51424a 1218 }
8e1394aa 1219}
3a1f2523 1220
910a0a6d 1221sub _assign_rank {
c9bf3dbf 1222 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1223 # Look at each of the children of @current_nodes. If all the child's
1224 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1225 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1226 # parent gets a rank.
910a0a6d 1227 my @next_nodes;
1228 foreach my $c ( @current_nodes ) {
c9bf3dbf 1229 warn "Current reading $c has no rank!"
1230 unless exists $node_ranks->{$c};
1231 # print STDERR "Looking at child of node $c, rank "
1232 # . $node_ranks->{$c} . "\n";
1233 foreach my $child ( $graph->successors( $c ) ) {
1234 next if exists $node_ranks->{$child};
910a0a6d 1235 my $highest_rank = -1;
1236 my $skip = 0;
c9bf3dbf 1237 foreach my $parent ( $graph->predecessors( $child ) ) {
1238 if( exists $node_ranks->{$parent} ) {
1239 $highest_rank = $node_ranks->{$parent}
1240 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1241 } else {
1242 $skip = 1;
1243 last;
1244 }
1245 }
1246 next if $skip;
c9bf3dbf 1247 my $c_rank = $highest_rank + 1;
1248 # print STDERR "Assigning rank $c_rank to node $child \n";
1249 $node_ranks->{$child} = $c_rank;
910a0a6d 1250 push( @next_nodes, $child );
1251 }
1252 }
1253 return @next_nodes;
4cdd82f1 1254}
910a0a6d 1255
4e5a7b2c 1256=head2 flatten_ranks
1257
1258A convenience method for parsing collation data. Searches the graph for readings
1259with the same text at the same rank, and merges any that are found.
1260
1261=cut
1262
0e476982 1263sub flatten_ranks {
1264 my $self = shift;
1265 my %unique_rank_rdg;
1266 foreach my $rdg ( $self->readings ) {
1267 next unless $rdg->has_rank;
1268 my $key = $rdg->rank . "||" . $rdg->text;
1269 if( exists $unique_rank_rdg{$key} ) {
1270 # Combine!
56eefa04 1271 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1272 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1273 } else {
1274 $unique_rank_rdg{$key} = $rdg;
1275 }
1276 }
1277}
1278
861c3e27 1279=head2 text_from_paths
1280
1281Calculate the text array for all witnesses from the path, for later consistency
1282checking. Only to be used if there is no non-graph-based way to know the
1283original texts.
1284
1285=cut
1286
1287sub text_from_paths {
1288 my $self = shift;
1289 foreach my $wit ( $self->tradition->witnesses ) {
1290 my @text = split( /\s+/,
1291 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1292 $wit->text( \@text );
1293 if( $wit->is_layered ) {
1294 my @uctext = split( /\s+/,
1295 $self->reading_sequence( $self->start, $self->end,
1296 $wit->sigil.$self->ac_label ) );
1297 $wit->text( \@uctext );
1298 }
1299 }
1300}
0e476982 1301
4e5a7b2c 1302=head1 UTILITY FUNCTIONS
1303
1304=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1305
4e5a7b2c 1306Find the last reading that occurs in sequence before both the given readings.
1307
1308=head2 common_successor( $reading_a, $reading_b )
1309
1310Find the first reading that occurs in sequence after both the given readings.
1311
22222af9 1312=begin testing
1313
1314use Text::Tradition;
1315
1316my $cxfile = 't/data/Collatex-16.xml';
1317my $t = Text::Tradition->new(
1318 'name' => 'inline',
1319 'input' => 'CollateX',
1320 'file' => $cxfile,
1321 );
1322my $c = $t->collation;
1323
4e5a7b2c 1324is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1325 'n20', "Found correct common predecessor" );
4e5a7b2c 1326is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1327 '#END#', "Found correct common successor" );
1328
4e5a7b2c 1329is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1330 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1331is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1332 '#END#', "Found correct common successor for readings on same path" );
1333
1334=end testing
1335
1336=cut
1337
1338## Return the closest reading that is a predecessor of both the given readings.
1339sub common_predecessor {
1340 my $self = shift;
4e5a7b2c 1341 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1342 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1343}
1344
1345sub common_successor {
1346 my $self = shift;
4e5a7b2c 1347 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1348 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1349}
1350
1351sub common_in_path {
1352 my( $self, $r1, $r2, $dir ) = @_;
1353 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1354 $iter = $self->end->rank - $iter if $dir eq 'successors';
1355 my @candidates;
1356 my @last_checked = ( $r1, $r2 );
1357 my %all_seen;
1358 while( !@candidates ) {
1359 my @new_lc;
1360 foreach my $lc ( @last_checked ) {
1361 foreach my $p ( $lc->$dir ) {
1362 if( $all_seen{$p->id} ) {
1363 push( @candidates, $p );
1364 } else {
1365 $all_seen{$p->id} = 1;
1366 push( @new_lc, $p );
1367 }
1368 }
1369 }
1370 @last_checked = @new_lc;
1371 }
1372 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1373 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1374}
1375
63778331 1376sub throw {
1377 Text::Tradition::Error->throw(
1378 'ident' => 'Collation error',
1379 'message' => $_[0],
1380 );
1381}
1382
dd3b58b0 1383no Moose;
1384__PACKAGE__->meta->make_immutable;
e867486f 1385
1386=head1 BUGS / TODO
1387
1388=over
1389
4e5a7b2c 1390=item * Get rid of $backup in reading_sequence
e867486f 1391
1392=back