refactor the hardcoded attribute stuff in as_dot
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
df6d9812 10use XML::LibXML;
dd3b58b0 11use Moose;
12
3a2ebbf4 13has 'sequence' => (
d047cd52 14 is => 'ro',
3a2ebbf4 15 isa => 'Graph',
16 default => sub { Graph->new() },
d047cd52 17 handles => {
3a2ebbf4 18 paths => 'edges',
d047cd52 19 },
d047cd52 20 );
3a2ebbf4 21
22has 'relations' => (
23 is => 'ro',
22222af9 24 isa => 'Text::Tradition::Collation::RelationshipStore',
25 handles => {
26 relationships => 'relationships',
27 related_readings => 'related_readings',
28 },
29 writer => '_set_relations',
3a2ebbf4 30 );
dd3b58b0 31
3a2ebbf4 32has 'tradition' => (
33 is => 'ro',
d047cd52 34 isa => 'Text::Tradition',
8d9a1cd8 35 weak_ref => 1,
d047cd52 36 );
dd3b58b0 37
3a2ebbf4 38has 'readings' => (
39 isa => 'HashRef[Text::Tradition::Collation::Reading]',
40 traits => ['Hash'],
41 handles => {
42 reading => 'get',
43 _add_reading => 'set',
44 del_reading => 'delete',
45 has_reading => 'exists',
46 readings => 'values',
47 },
48 default => sub { {} },
49 );
910a0a6d 50
4a8828f0 51has 'wit_list_separator' => (
7854e12e 52 is => 'rw',
53 isa => 'Str',
54 default => ', ',
55 );
56
57has 'baselabel' => (
58 is => 'rw',
59 isa => 'Str',
60 default => 'base text',
61 );
4a8828f0 62
15d2d3df 63has 'linear' => (
64 is => 'rw',
65 isa => 'Bool',
66 default => 1,
67 );
c84275ff 68
69has 'collapse_punctuation' => (
70 is => 'rw',
71 isa => 'Bool',
72 default => 1,
73 );
1f563ac3 74
ef9d481f 75has 'ac_label' => (
76 is => 'rw',
77 isa => 'Str',
78 default => ' (a.c.)',
79 );
3a2ebbf4 80
81has 'start' => (
82 is => 'ro',
83 isa => 'Text::Tradition::Collation::Reading',
84 writer => '_set_start',
85 weak_ref => 1,
86 );
87
88has 'end' => (
89 is => 'ro',
90 isa => 'Text::Tradition::Collation::Reading',
91 writer => '_set_end',
92 weak_ref => 1,
93 );
1f563ac3 94
4e5a7b2c 95=head1 NAME
96
97Text::Tradition::Collation - a software model for a text collation
98
99=head1 SYNOPSIS
100
101 use Text::Tradition;
102 my $t = Text::Tradition->new(
103 'name' => 'this is a text',
104 'input' => 'TEI',
105 'file' => '/path/to/tei_parallel_seg_file.xml' );
106
107 my $c = $t->collation;
108 my @readings = $c->readings;
109 my @paths = $c->paths;
110 my @relationships = $c->relationships;
111
112 my $svg_variant_graph = $t->collation->as_svg();
113
114=head1 DESCRIPTION
115
116Text::Tradition is a library for representation and analysis of collated
117texts, particularly medieval ones. The Collation is the central feature of
118a Tradition, where the text, its sequence of readings, and its relationships
119between readings are actually kept.
120
121=head1 CONSTRUCTOR
122
123=head2 new
124
125The constructor. Takes a hash or hashref of the following arguments:
126
127=over
128
129=item * tradition - The Text::Tradition object to which the collation
130belongs. Required.
131
132=item * linear - Whether the collation should be linear; that is, whether
133transposed readings should be treated as two linked readings rather than one,
134and therefore whether the collation graph is acyclic. Defaults to true.
135
136=item * collapse_punctuation - TODO
137
138=item * baselabel - The default label for the path taken by a base text
139(if any). Defaults to 'base text'.
140
141=item * wit_list_separator - The string to join a list of witnesses for
142purposes of making labels in display graphs. Defaults to ', '.
143
144=item * ac_label - The extra label to tack onto a witness sigil when
145representing another layer of path for the given witness - that is, when
146a text has more than one possible reading due to scribal corrections or
147the like. Defaults to ' (a.c.)'.
148
149=back
150
151=head1 ACCESSORS
152
153=head2 tradition
154
155=head2 linear
156
157=head2 collapse_punctuation
158
159=head2 wit_list_separator
160
161=head2 baselabel
162
163=head2 ac_label
164
165Simple accessors for collation attributes.
166
167=head2 start
168
169The meta-reading at the start of every witness path.
170
171=head2 end
172
173The meta-reading at the end of every witness path.
174
175=head2 readings
176
177Returns all Reading objects in the graph.
178
179=head2 reading( $id )
180
181Returns the Reading object corresponding to the given ID.
182
183=head2 add_reading( $reading_args )
184
185Adds a new reading object to the collation.
186See L<Text::Tradition::Collation::Reading> for the available arguments.
187
188=head2 del_reading( $object_or_id )
189
190Removes the given reading from the collation, implicitly removing its
191paths and relationships.
192
193=head2 merge_readings( $main, $second )
194
195Merges the $second reading into the $main one.
196The arguments may be either readings or reading IDs.
197
198=head2 has_reading( $id )
199
200Predicate to see whether a given reading ID is in the graph.
201
202=head2 reading_witnesses( $object_or_id )
203
204Returns a list of sigils whose witnesses contain the reading.
205
206=head2 paths
207
208Returns all reading paths within the document - that is, all edges in the
209collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
210
211=head2 add_path( $source, $target, $sigil )
212
213Links the given readings in the collation in sequence, under the given witness
214sigil. The readings may be specified by object or ID.
215
216=head2 del_path( $source, $target, $sigil )
217
218Links the given readings in the collation in sequence, under the given witness
219sigil. The readings may be specified by object or ID.
220
221=head2 has_path( $source, $target );
222
223Returns true if the two readings are linked in sequence in any witness.
224The readings may be specified by object or ID.
225
226=head2 relationships
227
228Returns all Relationship objects in the collation.
229
230=head2 add_relationship( $reading, $other_reading, $options )
231
232Adds a new relationship of the type given in $options between the two readings,
233which may be specified by object or ID. Returns a value of ( $status, @vectors)
234where $status is true on success, and @vectors is a list of relationship edges
235that were ultimately added.
236See L<Text::Tradition::Collation::Relationship> for the available options.
237
238=cut
dd3b58b0 239
d047cd52 240sub BUILD {
3a2ebbf4 241 my $self = shift;
22222af9 242 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 243 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
244 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 245}
784877d9 246
3a2ebbf4 247### Reading construct/destruct functions
248
249sub add_reading {
250 my( $self, $reading ) = @_;
251 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
252 my %args = %$reading;
253 $reading = Text::Tradition::Collation::Reading->new(
254 'collation' => $self,
255 %args );
256 }
257 # First check to see if a reading with this ID exists.
258 if( $self->reading( $reading->id ) ) {
259 warn "Collation already has a reading with id " . $reading->id;
260 return undef;
261 }
262 $self->_add_reading( $reading->id => $reading );
263 # Once the reading has been added, put it in both graphs.
264 $self->sequence->add_vertex( $reading->id );
22222af9 265 $self->relations->add_reading( $reading->id );
3a2ebbf4 266 return $reading;
eca16057 267};
268
3a2ebbf4 269around del_reading => sub {
270 my $orig = shift;
271 my $self = shift;
272 my $arg = shift;
273
274 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
275 $arg = $arg->id;
276 }
3a2ebbf4 277 # Remove the reading from the graphs.
278 $self->sequence->delete_vertex( $arg );
22222af9 279 $self->relations->delete_reading( $arg );
3a2ebbf4 280
281 # Carry on.
282 $self->$orig( $arg );
283};
7854e12e 284
3a2ebbf4 285# merge_readings( $main, $to_be_deleted );
7854e12e 286
3a2ebbf4 287sub merge_readings {
288 my $self = shift;
289
290 # We only need the IDs for adding paths to the graph, not the reading
291 # objects themselves.
49d4f2ac 292 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 293
294 # The kept reading should inherit the paths and the relationships
295 # of the deleted reading.
296 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
297 my @vector = ( $kept );
298 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
299 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 300 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 301 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
302 $self->sequence->add_edge( @vector );
303 my $fwits = $self->sequence->get_edge_attributes( @vector );
304 @wits{keys %$fwits} = values %$fwits;
305 $self->sequence->set_edge_attributes( @vector, \%wits );
306 }
22222af9 307 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 308
309 # Do the deletion deed.
49d4f2ac 310 if( $combine_char ) {
311 my $kept_obj = $self->reading( $kept );
312 my $new_text = join( $combine_char, $kept_obj->text,
313 $self->reading( $deleted )->text );
314 $kept_obj->alter_text( $new_text );
315 }
3a2ebbf4 316 $self->del_reading( $deleted );
317}
7854e12e 318
3265b0ce 319
3a2ebbf4 320# Helper function for manipulating the graph.
321sub _stringify_args {
322 my( $self, $first, $second, $arg ) = @_;
323 $first = $first->id
324 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
325 $second = $second->id
326 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
327 return( $first, $second, $arg );
328}
df6d9812 329
4e5a7b2c 330# Helper function for manipulating the graph.
331sub _objectify_args {
332 my( $self, $first, $second, $arg ) = @_;
333 $first = $self->reading( $first )
334 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
335 $second = $self->reading( $second )
336 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
337 return( $first, $second, $arg );
338}
3a2ebbf4 339### Path logic
340
341sub add_path {
342 my $self = shift;
343
344 # We only need the IDs for adding paths to the graph, not the reading
345 # objects themselves.
346 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
347
348 # Connect the readings
349 $self->sequence->add_edge( $source, $target );
350 # Note the witness in question
351 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 352};
353
3a2ebbf4 354sub del_path {
355 my $self = shift;
49d4f2ac 356 my @args;
357 if( ref( $_[0] ) eq 'ARRAY' ) {
358 my $e = shift @_;
359 @args = ( @$e, @_ );
360 } else {
361 @args = @_;
362 }
3a2ebbf4 363
364 # We only need the IDs for adding paths to the graph, not the reading
365 # objects themselves.
49d4f2ac 366 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 367
368 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 369 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 370 }
371 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
372 $self->sequence->delete_edge( $source, $target );
373 }
784877d9 374}
375
3a2ebbf4 376
15d2d3df 377# Extra graph-alike utility
378sub has_path {
3a2ebbf4 379 my $self = shift;
380 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
381 return undef unless $self->sequence->has_edge( $source, $target );
382 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 383}
384
4e5a7b2c 385=head2 clear_witness( @sigil_list )
3a2ebbf4 386
4e5a7b2c 387Clear the given witnesses out of the collation entirely, removing references
388to them in paths, and removing readings that belong only to them. Should only
389be called via $tradition->del_witness.
3a2ebbf4 390
391=cut
392
4e5a7b2c 393sub clear_witness {
394 my( $self, @sigils ) = @_;
395
396 # Clear the witness(es) out of the paths
397 foreach my $e ( $self->paths ) {
398 foreach my $sig ( @sigils ) {
399 $self->del_path( $e, $sig );
400 }
401 }
402
403 # Clear out the newly unused readings
404 foreach my $r ( $self->readings ) {
405 unless( $self->reading_witnesses( $r ) ) {
406 $self->del_reading( $r );
407 }
408 }
409}
3a2ebbf4 410
411sub add_relationship {
412 my $self = shift;
22222af9 413 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
64ae6270 414 my( $ret, @vectors ) = $self->relations->add_relationship( $source,
415 $self->reading( $source ), $target, $self->reading( $target ), $opts );
416 # Force a full rank recalculation every time. Yuck.
417 $self->calculate_ranks() if $ret && $self->end->has_rank;
418 return( $ret, @vectors );
22222af9 419}
ef9d481f 420
22222af9 421=head2 reading_witnesses( $reading )
910a0a6d 422
22222af9 423Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 424
22222af9 425=cut
1d310495 426
1d310495 427sub reading_witnesses {
428 my( $self, $reading ) = @_;
429 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 430 # arbitrarily chosen "incoming". Thus, special-case the start node.
431 if( $reading eq $self->start ) {
432 return map { $_->sigil } $self->tradition->witnesses;
433 }
1d310495 434 my %all_witnesses;
435 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
436 my $wits = $self->sequence->get_edge_attributes( @$e );
437 @all_witnesses{ keys %$wits } = 1;
438 }
439 return keys %all_witnesses;
910a0a6d 440}
441
4e5a7b2c 442=head1 OUTPUT METHODS
8e1394aa 443
4e5a7b2c 444=head2 as_svg
8e1394aa 445
0068967c 446Returns an SVG string that represents the graph, via as_dot and graphviz.
8e1394aa 447
448=cut
449
450sub as_svg {
3a2ebbf4 451 my( $self ) = @_;
452
8e1394aa 453 my @cmd = qw/dot -Tsvg/;
454 my( $svg, $err );
910a0a6d 455 my $dotfile = File::Temp->new();
d9e873d0 456 ## TODO REMOVE
eca16057 457 # $dotfile->unlink_on_destroy(0);
910a0a6d 458 binmode $dotfile, ':utf8';
459 print $dotfile $self->as_dot();
460 push( @cmd, $dotfile->filename );
461 run( \@cmd, ">", binary(), \$svg );
462 $svg = decode_utf8( $svg );
8e1394aa 463 return $svg;
464}
465
4e5a7b2c 466=head2 svg_subgraph( $from, $to )
b22576c6 467
468Returns an SVG string that represents the portion of the graph given by the
469specified range. The $from and $to variables refer to ranks within the graph.
470
471=cut
472
473sub svg_subgraph {
474 my( $self, $from, $to ) = @_;
475
476 my $dot = $self->as_dot( $from, $to );
477 unless( $dot ) {
478 warn "Could not output a graph with range $from - $to";
479 return;
480 }
481
482 my @cmd = qw/dot -Tsvg/;
483 my( $svg, $err );
484 my $dotfile = File::Temp->new();
485 ## TODO REMOVE
486 # $dotfile->unlink_on_destroy(0);
487 binmode $dotfile, ':utf8';
488 print $dotfile $dot;
489 push( @cmd, $dotfile->filename );
490 run( \@cmd, ">", binary(), \$svg );
491 $svg = decode_utf8( $svg );
492 return $svg;
493}
494
495
4e5a7b2c 496=head2 as_dot( $from, $to )
df6d9812 497
498Returns a string that is the collation graph expressed in dot
4e5a7b2c 499(i.e. GraphViz) format. If $from or $to is passed, as_dot creates
500a subgraph rather than the entire graph.
df6d9812 501
502=cut
503
504sub as_dot {
b22576c6 505 my( $self, $startrank, $endrank ) = @_;
506
507 # Check the arguments
508 if( $startrank ) {
509 return if $endrank && $startrank > $endrank;
510 return if $startrank > $self->end->rank;
511 }
512 if( defined $endrank ) {
513 return if $endrank < 0;
f1b3b33a 514 $endrank = undef if $endrank == $self->end->rank;
b22576c6 515 }
516
df6d9812 517 # TODO consider making some of these things configurable
67da8d6c 518 my $graph_name = $self->tradition->name;
519 $graph_name =~ s/[^\w\s]//g;
520 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 521
522 my %graph_attrs = (
523 'rankdir' => 'LR',
524 'bgcolor' => 'none',
525 );
526 my %node_attrs = (
527 'fontsize' => 11,
528 'fillcolor' => 'white',
529 'style' => 'filled',
530 'shape' => 'ellipse'
531 );
532 my %edge_attrs = (
533 'arrowhead' => 'open',
534 'color' => '#000000',
535 'fontcolor' => '#000000',
536 );
537
67da8d6c 538 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 539 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
540 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 541
b22576c6 542 # Output substitute start/end readings if necessary
543 if( $startrank ) {
544 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
545 }
546 if( $endrank ) {
547 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
548 }
549 my %used; # Keep track of the readings that actually appear in the graph
df6d9812 550 foreach my $reading ( $self->readings ) {
b22576c6 551 # Only output readings within our rank range.
552 next if $startrank && $reading->rank < $startrank;
553 next if $endrank && $reading->rank > $endrank;
554 $used{$reading->id} = 1;
910a0a6d 555 # Need not output nodes without separate labels
3a2ebbf4 556 next if $reading->id eq $reading->text;
c84275ff 557 my $label = $reading->punctuated_form;
8f9cab7b 558 $label =~ s/\"/\\\"/g;
559 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
df6d9812 560 }
3a2ebbf4 561
b22576c6 562 # Add the real edges
563 my @edges = $self->paths;
3bdec618 564 my( %substart, %subend );
b22576c6 565 foreach my $edge ( @edges ) {
566 # Do we need to output this edge?
567 if( $used{$edge->[0]} && $used{$edge->[1]} ) {;
f13b5582 568 my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
569 my $variables = { %edge_attrs, 'label' => $label };
b22576c6 570 # Account for the rank gap if necessary
571 my $rankgap = $self->reading( $edge->[1] )->rank
572 - $self->reading( $edge->[0] )->rank;
f13b5582 573 $variables->{'minlen'} = $rankgap if $rankgap > 1;
574 my $varopts = _dot_attr_string( $variables );
575 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
576 $edge->[0], $edge->[1], $varopts );
3bdec618 577 } elsif( $used{$edge->[0]} ) {
578 $subend{$edge->[0]} = 1;
579 } elsif( $used{$edge->[1]} ) {
580 $substart{$edge->[1]} = 1;
b22576c6 581 }
df6d9812 582 }
3bdec618 583 # Add substitute start and end edges if necessary
584 foreach my $node ( keys %substart ) {
585 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 586 my $variables = { %edge_attrs, 'label' => $witstr };
587 my $varopts = _dot_attr_string( $variables );
588 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 589 }
590 foreach my $node ( keys %subend ) {
591 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 592 my $variables = { %edge_attrs, 'label' => $witstr };
593 my $varopts = _dot_attr_string( $variables );
594 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 595 }
596
df6d9812 597 $dot .= "}\n";
598 return $dot;
599}
600
f13b5582 601sub _dot_attr_string {
602 my( $hash ) = @_;
603 my @attrs;
604 foreach my $k ( sort keys %$hash ) {
605 my $v = $hash->{$k};
606 push( @attrs, $k.'="'.$v.'"' );
607 }
608 return( '[ ' . join( ', ', @attrs ) . ' ]' );
609}
610
3a2ebbf4 611sub path_witnesses {
612 my( $self, @edge ) = @_;
613 # If edge is an arrayref, cope.
614 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
615 my $e = shift @edge;
616 @edge = @$e;
617 }
618 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
619 return sort @wits;
620}
621
8f9cab7b 622sub path_display_label {
3bdec618 623 my( $self, @wits ) = @_;
8f9cab7b 624 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
625 if( scalar @wits > $maj ) {
f13b5582 626 # TODO break out a.c. wits
8f9cab7b 627 return 'majority';
628 } else {
629 return join( ', ', @wits );
630 }
631}
632
633
4e5a7b2c 634=head2 as_graphml
8e1394aa 635
4e5a7b2c 636Returns a GraphML representation of the collation. The GraphML will contain
637two graphs. The first expresses the attributes of the readings and the witness
638paths that link them; the second expresses the relationships that link the
639readings. This is the native transfer format for a tradition.
8e1394aa 640
56eefa04 641=begin testing
642
643use Text::Tradition;
644
645my $READINGS = 311;
646my $PATHS = 361;
647
648my $datafile = 't/data/florilegium_tei_ps.xml';
649my $tradition = Text::Tradition->new( 'input' => 'TEI',
650 'name' => 'test0',
651 'file' => $datafile,
652 'linear' => 1 );
653
654ok( $tradition, "Got a tradition object" );
655is( scalar $tradition->witnesses, 13, "Found all witnesses" );
656ok( $tradition->collation, "Tradition has a collation" );
657
658my $c = $tradition->collation;
659is( scalar $c->readings, $READINGS, "Collation has all readings" );
660is( scalar $c->paths, $PATHS, "Collation has all paths" );
661is( scalar $c->relationships, 0, "Collation has all relationships" );
662
663# Add a few relationships
664$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
665$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
666$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
667
668# Now write it to GraphML and parse it again.
669
670my $graphml = $c->as_graphml;
671my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
672is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
673is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
674is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
675
676=end testing
677
8e1394aa 678=cut
679
680sub as_graphml {
3a2ebbf4 681 my( $self ) = @_;
8e1394aa 682
683 # Some namespaces
684 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
685 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
686 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 687 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 688
689 # Create the document and root node
690 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
691 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
692 $graphml->setDocumentElement( $root );
693 $root->setNamespace( $xsi_ns, 'xsi', 0 );
694 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
695
e309421a 696 # Add the data keys for the graph
697 my %graph_data_keys;
698 my $gdi = 0;
1d310495 699 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 700 foreach my $datum ( @graph_attributes ) {
701 $graph_data_keys{$datum} = 'dg'.$gdi++;
702 my $key = $root->addNewChild( $graphml_ns, 'key' );
703 $key->setAttribute( 'attr.name', $datum );
704 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
705 $key->setAttribute( 'for', 'graph' );
706 $key->setAttribute( 'id', $graph_data_keys{$datum} );
707 }
f6066bac 708
8e1394aa 709 # Add the data keys for nodes
ef9d481f 710 my %node_data_keys;
711 my $ndi = 0;
3a2ebbf4 712 my %node_data = (
713 id => 'string',
255875b8 714 text => 'string',
3a2ebbf4 715 rank => 'string',
716 is_start => 'boolean',
717 is_end => 'boolean',
718 is_lacuna => 'boolean',
719 );
720 foreach my $datum ( keys %node_data ) {
910a0a6d 721 $node_data_keys{$datum} = 'dn'.$ndi++;
722 my $key = $root->addNewChild( $graphml_ns, 'key' );
723 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 724 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 725 $key->setAttribute( 'for', 'node' );
726 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 727 }
728
df6d9812 729 # Add the data keys for edges, i.e. witnesses
ef9d481f 730 my $edi = 0;
731 my %edge_data_keys;
3a2ebbf4 732 my %edge_data = (
f523c7a8 733 class => 'string', # Class, deprecated soon
3a2ebbf4 734 witness => 'string', # ID/label for a path
735 relationship => 'string', # ID/label for a relationship
736 extra => 'boolean', # Path key
c84275ff 737 scope => 'string', # Relationship key
3a2ebbf4 738 non_correctable => 'boolean', # Relationship key
739 non_independent => 'boolean', # Relationship key
740 );
741 foreach my $datum ( keys %edge_data ) {
742 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 743 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 744 $key->setAttribute( 'attr.name', $datum );
745 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 746 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 747 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 748 }
3a2ebbf4 749
22222af9 750 # Add the collation graph itself
2c669bca 751 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
752 $sgraph->setAttribute( 'edgedefault', 'directed' );
753 $sgraph->setAttribute( 'id', $self->tradition->name );
754 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
755 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
756 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
757 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
758 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 759
e309421a 760 # Collation attribute data
761 foreach my $datum ( @graph_attributes ) {
2c669bca 762 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
763 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 764 }
8e1394aa 765
766 my $node_ctr = 0;
767 my %node_hash;
22222af9 768 # Add our readings to the graph
3a2ebbf4 769 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 770 # Add to the main graph
771 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 772 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 773 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 774 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 775 foreach my $d ( keys %node_data ) {
776 my $nval = $n->$d;
c84275ff 777 $nval = $n->punctuated_form if $d eq 'text';
255875b8 778 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
779 if defined $nval;
780 }
b15511bf 781 }
782
2c669bca 783 # Add the path edges to the sequence graph
df6d9812 784 my $edge_ctr = 0;
3a2ebbf4 785 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
786 # We add an edge in the graphml for every witness in $e.
787 foreach my $wit ( $self->path_witnesses( $e ) ) {
788 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
789 $node_hash{ $e->[0] },
790 $node_hash{ $e->[1] } );
2c669bca 791 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 792 $edge_el->setAttribute( 'source', $from );
793 $edge_el->setAttribute( 'target', $to );
794 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 795
796 # It's a witness path, so add the witness
797 my $base = $wit;
798 my $key = $edge_data_keys{'witness'};
799 # Is this an ante-corr witness?
800 my $aclabel = $self->ac_label;
801 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
802 # Keep the base witness
803 $base = $1;
804 # ...and record that this is an 'extra' reading path
805 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
806 }
807 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 808 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 809 }
810 }
811
22222af9 812 # Add the relationship graph to the XML
2626f709 813 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
814 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 815
94c00c71 816 # Save and return the thing
817 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 818 return $result;
df6d9812 819}
820
b15511bf 821sub _add_graphml_data {
822 my( $el, $key, $value ) = @_;
b15511bf 823 return unless defined $value;
c9bf3dbf 824 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 825 $data_el->setAttribute( 'key', $key );
826 $data_el->appendText( $value );
8e1394aa 827}
828
4e5a7b2c 829=head2 as_csv
910a0a6d 830
831Returns a CSV alignment table representation of the collation graph, one
2c669bca 832row per witness (or witness uncorrected.)
910a0a6d 833
834=cut
835
836sub as_csv {
3a2ebbf4 837 my( $self ) = @_;
910a0a6d 838 my $table = $self->make_alignment_table;
839 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
840 my @result;
2c669bca 841 # Make the header row
842 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
843 push( @result, decode_utf8( $csv->string ) );
844 # Make the rest of the rows
845 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 846 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 847 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
848 $csv->combine( @row );
910a0a6d 849 push( @result, decode_utf8( $csv->string ) );
850 }
3a2ebbf4 851 return join( "\n", @result );
910a0a6d 852}
853
4e5a7b2c 854=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 855
566f4595 856Return a reference to an alignment table, in a slightly enhanced CollateX
857format which looks like this:
858
859 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 860 tokens => [ { t => "TEXT" }, ... ] },
566f4595 861 { witness => "SIG2",
4e5a7b2c 862 tokens => [ { t => "TEXT" }, ... ] },
566f4595 863 ... ],
864 length => TEXTLEN };
865
866If $use_refs is set to 1, the reading object is returned in the table
867instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 868
869If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 870keys have a true hash value will be included.
2c669bca 871
872=cut
9f3ba6f7 873
910a0a6d 874sub make_alignment_table {
08e0fb85 875 my( $self, $noderefs, $include ) = @_;
910a0a6d 876 unless( $self->linear ) {
877 warn "Need a linear graph in order to make an alignment table";
878 return;
879 }
2c669bca 880 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 881 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 882 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 883 if( $include ) {
566f4595 884 next unless $include->{$wit->sigil};
2c669bca 885 }
eca16057 886 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 887 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
888 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 889 push( @{$table->{'alignment'}},
890 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 891 if( $wit->is_layered ) {
892 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
893 $wit->sigil.$self->ac_label, $wit->sigil );
894 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 895 push( @{$table->{'alignment'}},
896 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 897 }
898 }
2c669bca 899 return $table;
910a0a6d 900}
901
902sub _make_witness_row {
0e476982 903 my( $path, $positions, $noderefs ) = @_;
910a0a6d 904 my %char_hash;
905 map { $char_hash{$_} = undef } @$positions;
2c669bca 906 my $debug = 0;
910a0a6d 907 foreach my $rdg ( @$path ) {
eca16057 908 my $rtext = $rdg->text;
909 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 910 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 911 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 912 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
913 : { 't' => $rtext };
910a0a6d 914 }
915 my @row = map { $char_hash{$_} } @$positions;
eca16057 916 # Fill in lacuna markers for undef spots in the row
917 my $last_el = shift @row;
918 my @filled_row = ( $last_el );
919 foreach my $el ( @row ) {
0e476982 920 # If we are using node reference, make the lacuna node appear many times
921 # in the table. If not, use the lacuna tag.
922 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 923 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 924 }
925 push( @filled_row, $el );
926 $last_el = $el;
927 }
928 return @filled_row;
910a0a6d 929}
930
0e476982 931# Tiny utility function to say if a table element is a lacuna
932sub _el_is_lacuna {
933 my $el = shift;
2c669bca 934 return 1 if $el->{'t'} eq '#LACUNA#';
935 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
936 && $el->{'t'}->is_lacuna;
0e476982 937 return 0;
938}
939
910a0a6d 940# Helper to turn the witnesses along columns rather than rows. Assumes
941# equal-sized rows.
942sub _turn_table {
943 my( $table ) = @_;
944 my $result = [];
945 return $result unless scalar @$table;
946 my $nrows = scalar @{$table->[0]};
947 foreach my $idx ( 0 .. $nrows - 1 ) {
948 foreach my $wit ( 0 .. $#{$table} ) {
949 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
950 }
951 }
952 return $result;
953}
954
4e5a7b2c 955=head1 NAVIGATION METHODS
910a0a6d 956
4e5a7b2c 957=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 958
959Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 960with $last, for the witness given in $sigil. If a $backup sigil is
961specified (e.g. when walking a layered witness), it will be used wherever
962no $sigil path exists. If there is a base text reading, that will be
963used wherever no path exists for $sigil or $backup.
e2902068 964
965=cut
966
910a0a6d 967# TODO Think about returning some lazy-eval iterator.
968
e2902068 969sub reading_sequence {
970 my( $self, $start, $end, $witness, $backup ) = @_;
971
930ff666 972 $witness = $self->baselabel unless $witness;
e2902068 973 my @readings = ( $start );
974 my %seen;
975 my $n = $start;
3a2ebbf4 976 while( $n && $n->id ne $end->id ) {
977 if( exists( $seen{$n->id} ) ) {
978 warn "Detected loop at " . $n->id;
910a0a6d 979 last;
980 }
3a2ebbf4 981 $seen{$n->id} = 1;
910a0a6d 982
983 my $next = $self->next_reading( $n, $witness, $backup );
44771cf2 984 unless( $next ) {
3a2ebbf4 985 warn "Did not find any path for $witness from reading " . $n->id;
44771cf2 986 last;
987 }
910a0a6d 988 push( @readings, $next );
989 $n = $next;
e2902068 990 }
991 # Check that the last reading is our end reading.
992 my $last = $readings[$#readings];
3a2ebbf4 993 warn "Last reading found from " . $start->text .
910a0a6d 994 " for witness $witness is not the end!"
3a2ebbf4 995 unless $last->id eq $end->id;
e2902068 996
997 return @readings;
998}
999
4e5a7b2c 1000=head2 next_reading( $reading, $sigil );
8e1394aa 1001
4a8828f0 1002Returns the reading that follows the given reading along the given witness
930ff666 1003path.
8e1394aa 1004
1005=cut
1006
4a8828f0 1007sub next_reading {
e2902068 1008 # Return the successor via the corresponding path.
8e1394aa 1009 my $self = shift;
3a2ebbf4 1010 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1011 return undef unless $answer;
3a2ebbf4 1012 return $self->reading( $answer );
8e1394aa 1013}
1014
4e5a7b2c 1015=head2 prior_reading( $reading, $sigil )
8e1394aa 1016
4a8828f0 1017Returns the reading that precedes the given reading along the given witness
930ff666 1018path.
8e1394aa 1019
1020=cut
1021
4a8828f0 1022sub prior_reading {
e2902068 1023 # Return the predecessor via the corresponding path.
8e1394aa 1024 my $self = shift;
3a2ebbf4 1025 my $answer = $self->_find_linked_reading( 'prior', @_ );
1026 return $self->reading( $answer );
8e1394aa 1027}
1028
4a8828f0 1029sub _find_linked_reading {
e2902068 1030 my( $self, $direction, $node, $path, $alt_path ) = @_;
1031 my @linked_paths = $direction eq 'next'
3a2ebbf4 1032 ? $self->sequence->edges_from( $node )
1033 : $self->sequence->edges_to( $node );
e2902068 1034 return undef unless scalar( @linked_paths );
8e1394aa 1035
e2902068 1036 # We have to find the linked path that contains all of the
1037 # witnesses supplied in $path.
1038 my( @path_wits, @alt_path_wits );
4e5a7b2c 1039 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1040 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1041 my $base_le;
1042 my $alt_le;
1043 foreach my $le ( @linked_paths ) {
3a2ebbf4 1044 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1045 $base_le = $le;
910a0a6d 1046 }
3a2ebbf4 1047 my @le_wits = $self->path_witnesses( $le );
1048 if( _is_within( \@path_wits, \@le_wits ) ) {
1049 # This is the right path.
1050 return $direction eq 'next' ? $le->[1] : $le->[0];
1051 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1052 $alt_le = $le;
1053 }
8e1394aa 1054 }
e2902068 1055 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1056 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1057 if $alt_le;
e2902068 1058
1059 # Got this far? Return the base path if it exists.
3a2ebbf4 1060 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1061 if $base_le;
e2902068 1062
1063 # Got this far? We have no appropriate path.
2c669bca 1064 warn "Could not find $direction node from " . $node->id
910a0a6d 1065 . " along path $path";
8e1394aa 1066 return undef;
1067}
1068
4a8828f0 1069# Some set logic.
1070sub _is_within {
1071 my( $set1, $set2 ) = @_;
7854e12e 1072 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1073 foreach my $el ( @$set1 ) {
910a0a6d 1074 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1075 }
1076 return $ret;
1077}
1078
4e5a7b2c 1079# Return the string that joins together a list of witnesses for
1080# display on a single path.
1081sub _witnesses_of_label {
1082 my( $self, $label ) = @_;
1083 my $regex = $self->wit_list_separator;
1084 my @answer = split( /\Q$regex\E/, $label );
1085 return @answer;
1086}
1087
1088
1089=head1 INITIALIZATION METHODS
1090
1091These are mostly for use by parsers.
1092
1093=head2 make_witness_path( $witness )
1094
1095Link the array of readings contained in $witness->path (and in
1096$witness->uncorrected_path if it exists) into collation paths.
1097Clear out the arrays when finished.
de51424a 1098
4e5a7b2c 1099=head2 make_witness_paths
1100
1101Call make_witness_path for all witnesses in the tradition.
1102
1103=cut
930ff666 1104
7e450e44 1105# For use when a collation is constructed from a base text and an apparatus.
1106# We have the sequences of readings and just need to add path edges.
1f7aa795 1107# When we are done, clear out the witness path attributes, as they are no
1108# longer needed.
1109# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1110
6a222840 1111sub make_witness_paths {
1112 my( $self ) = @_;
910a0a6d 1113 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1114 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1115 $self->make_witness_path( $wit );
7854e12e 1116 }
7854e12e 1117}
1118
6a222840 1119sub make_witness_path {
7854e12e 1120 my( $self, $wit ) = @_;
1121 my @chain = @{$wit->path};
15d2d3df 1122 my $sig = $wit->sigil;
7854e12e 1123 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1124 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1125 }
1f7aa795 1126 if( $wit->is_layered ) {
d9e873d0 1127 @chain = @{$wit->uncorrected_path};
1128 foreach my $idx( 0 .. $#chain-1 ) {
1129 my $source = $chain[$idx];
1130 my $target = $chain[$idx+1];
1131 $self->add_path( $source, $target, $sig.$self->ac_label )
1132 unless $self->has_path( $source, $target, $sig );
1133 }
15d2d3df 1134 }
1f7aa795 1135 $wit->clear_path;
1136 $wit->clear_uncorrected_path;
e2902068 1137}
1138
4e5a7b2c 1139=head2 calculate_ranks
1140
1141Calculate the reading ranks (that is, their aligned positions relative
1142to each other) for the graph. This can only be called on linear collations.
1143
1144=cut
1145
910a0a6d 1146sub calculate_ranks {
1147 my $self = shift;
1148 # Walk a version of the graph where every node linked by a relationship
1149 # edge is fundamentally the same node, and do a topological ranking on
1150 # the nodes in this graph.
c9bf3dbf 1151 my $topo_graph = Graph->new();
910a0a6d 1152 my %rel_containers;
1153 my $rel_ctr = 0;
1154 # Add the nodes
1155 foreach my $r ( $self->readings ) {
3a2ebbf4 1156 next if exists $rel_containers{$r->id};
910a0a6d 1157 my @rels = $r->related_readings( 'colocated' );
1158 if( @rels ) {
1159 # Make a relationship container.
1160 push( @rels, $r );
c9bf3dbf 1161 my $rn = 'rel_container_' . $rel_ctr++;
1162 $topo_graph->add_vertex( $rn );
910a0a6d 1163 foreach( @rels ) {
3a2ebbf4 1164 $rel_containers{$_->id} = $rn;
910a0a6d 1165 }
1166 } else {
1167 # Add a new node to mirror the old node.
3a2ebbf4 1168 $rel_containers{$r->id} = $r->id;
1169 $topo_graph->add_vertex( $r->id );
910a0a6d 1170 }
4a8828f0 1171 }
3a1f2523 1172
3a2ebbf4 1173 # Add the edges.
910a0a6d 1174 foreach my $r ( $self->readings ) {
3a2ebbf4 1175 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1176 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1177 $rel_containers{$n} );
4e5a7b2c 1178 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1179 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1180 }
1181 }
1182
1183 # Now do the rankings, starting with the start node.
3a2ebbf4 1184 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1185 my $node_ranks = { $topo_start => 0 };
910a0a6d 1186 my @curr_origin = ( $topo_start );
1187 # A little iterative function.
1188 while( @curr_origin ) {
c9bf3dbf 1189 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1190 }
1191 # Transfer our rankings from the topological graph to the real one.
1192 foreach my $r ( $self->readings ) {
3a2ebbf4 1193 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1194 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1195 } else {
3a2ebbf4 1196 die "No rank calculated for node " . $r->id
67da8d6c 1197 . " - do you have a cycle in the graph?";
1198 }
de51424a 1199 }
8e1394aa 1200}
3a1f2523 1201
910a0a6d 1202sub _assign_rank {
c9bf3dbf 1203 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1204 # Look at each of the children of @current_nodes. If all the child's
1205 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1206 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1207 # parent gets a rank.
910a0a6d 1208 my @next_nodes;
1209 foreach my $c ( @current_nodes ) {
c9bf3dbf 1210 warn "Current reading $c has no rank!"
1211 unless exists $node_ranks->{$c};
1212 # print STDERR "Looking at child of node $c, rank "
1213 # . $node_ranks->{$c} . "\n";
1214 foreach my $child ( $graph->successors( $c ) ) {
1215 next if exists $node_ranks->{$child};
910a0a6d 1216 my $highest_rank = -1;
1217 my $skip = 0;
c9bf3dbf 1218 foreach my $parent ( $graph->predecessors( $child ) ) {
1219 if( exists $node_ranks->{$parent} ) {
1220 $highest_rank = $node_ranks->{$parent}
1221 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1222 } else {
1223 $skip = 1;
1224 last;
1225 }
1226 }
1227 next if $skip;
c9bf3dbf 1228 my $c_rank = $highest_rank + 1;
1229 # print STDERR "Assigning rank $c_rank to node $child \n";
1230 $node_ranks->{$child} = $c_rank;
910a0a6d 1231 push( @next_nodes, $child );
1232 }
1233 }
1234 return @next_nodes;
4cdd82f1 1235}
910a0a6d 1236
4e5a7b2c 1237=head2 flatten_ranks
1238
1239A convenience method for parsing collation data. Searches the graph for readings
1240with the same text at the same rank, and merges any that are found.
1241
1242=cut
1243
0e476982 1244sub flatten_ranks {
1245 my $self = shift;
1246 my %unique_rank_rdg;
1247 foreach my $rdg ( $self->readings ) {
1248 next unless $rdg->has_rank;
1249 my $key = $rdg->rank . "||" . $rdg->text;
1250 if( exists $unique_rank_rdg{$key} ) {
1251 # Combine!
56eefa04 1252 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1253 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1254 } else {
1255 $unique_rank_rdg{$key} = $rdg;
1256 }
1257 }
1258}
1259
1260
4e5a7b2c 1261=head1 UTILITY FUNCTIONS
1262
1263=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1264
4e5a7b2c 1265Find the last reading that occurs in sequence before both the given readings.
1266
1267=head2 common_successor( $reading_a, $reading_b )
1268
1269Find the first reading that occurs in sequence after both the given readings.
1270
22222af9 1271=begin testing
1272
1273use Text::Tradition;
1274
1275my $cxfile = 't/data/Collatex-16.xml';
1276my $t = Text::Tradition->new(
1277 'name' => 'inline',
1278 'input' => 'CollateX',
1279 'file' => $cxfile,
1280 );
1281my $c = $t->collation;
1282
4e5a7b2c 1283is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1284 'n20', "Found correct common predecessor" );
4e5a7b2c 1285is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1286 '#END#', "Found correct common successor" );
1287
4e5a7b2c 1288is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1289 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1290is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1291 '#END#', "Found correct common successor for readings on same path" );
1292
1293=end testing
1294
1295=cut
1296
1297## Return the closest reading that is a predecessor of both the given readings.
1298sub common_predecessor {
1299 my $self = shift;
4e5a7b2c 1300 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1301 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1302}
1303
1304sub common_successor {
1305 my $self = shift;
4e5a7b2c 1306 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1307 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1308}
1309
1310sub common_in_path {
1311 my( $self, $r1, $r2, $dir ) = @_;
1312 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1313 $iter = $self->end->rank - $iter if $dir eq 'successors';
1314 my @candidates;
1315 my @last_checked = ( $r1, $r2 );
1316 my %all_seen;
1317 while( !@candidates ) {
1318 my @new_lc;
1319 foreach my $lc ( @last_checked ) {
1320 foreach my $p ( $lc->$dir ) {
1321 if( $all_seen{$p->id} ) {
1322 push( @candidates, $p );
1323 } else {
1324 $all_seen{$p->id} = 1;
1325 push( @new_lc, $p );
1326 }
1327 }
1328 }
1329 @last_checked = @new_lc;
1330 }
1331 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1332 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1333}
1334
dd3b58b0 1335no Moose;
1336__PACKAGE__->meta->make_immutable;
e867486f 1337
1338=head1 BUGS / TODO
1339
1340=over
1341
4e5a7b2c 1342=item * Get rid of $backup in reading_sequence
e867486f 1343
1344=back