add clear_witness functionality, use it, and update Collation pod
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
df6d9812 10use XML::LibXML;
dd3b58b0 11use Moose;
12
3a2ebbf4 13has 'sequence' => (
d047cd52 14 is => 'ro',
3a2ebbf4 15 isa => 'Graph',
16 default => sub { Graph->new() },
d047cd52 17 handles => {
3a2ebbf4 18 paths => 'edges',
d047cd52 19 },
d047cd52 20 );
3a2ebbf4 21
22has 'relations' => (
23 is => 'ro',
22222af9 24 isa => 'Text::Tradition::Collation::RelationshipStore',
25 handles => {
26 relationships => 'relationships',
27 related_readings => 'related_readings',
28 },
29 writer => '_set_relations',
3a2ebbf4 30 );
dd3b58b0 31
3a2ebbf4 32has 'tradition' => (
33 is => 'ro',
d047cd52 34 isa => 'Text::Tradition',
8d9a1cd8 35 weak_ref => 1,
d047cd52 36 );
dd3b58b0 37
3a2ebbf4 38has 'readings' => (
39 isa => 'HashRef[Text::Tradition::Collation::Reading]',
40 traits => ['Hash'],
41 handles => {
42 reading => 'get',
43 _add_reading => 'set',
44 del_reading => 'delete',
45 has_reading => 'exists',
46 readings => 'values',
47 },
48 default => sub { {} },
49 );
910a0a6d 50
4a8828f0 51has 'wit_list_separator' => (
7854e12e 52 is => 'rw',
53 isa => 'Str',
54 default => ', ',
55 );
56
57has 'baselabel' => (
58 is => 'rw',
59 isa => 'Str',
60 default => 'base text',
61 );
4a8828f0 62
15d2d3df 63has 'linear' => (
64 is => 'rw',
65 isa => 'Bool',
66 default => 1,
67 );
c84275ff 68
69has 'collapse_punctuation' => (
70 is => 'rw',
71 isa => 'Bool',
72 default => 1,
73 );
1f563ac3 74
ef9d481f 75has 'ac_label' => (
76 is => 'rw',
77 isa => 'Str',
78 default => ' (a.c.)',
79 );
3a2ebbf4 80
81has 'start' => (
82 is => 'ro',
83 isa => 'Text::Tradition::Collation::Reading',
84 writer => '_set_start',
85 weak_ref => 1,
86 );
87
88has 'end' => (
89 is => 'ro',
90 isa => 'Text::Tradition::Collation::Reading',
91 writer => '_set_end',
92 weak_ref => 1,
93 );
1f563ac3 94
4e5a7b2c 95=head1 NAME
96
97Text::Tradition::Collation - a software model for a text collation
98
99=head1 SYNOPSIS
100
101 use Text::Tradition;
102 my $t = Text::Tradition->new(
103 'name' => 'this is a text',
104 'input' => 'TEI',
105 'file' => '/path/to/tei_parallel_seg_file.xml' );
106
107 my $c = $t->collation;
108 my @readings = $c->readings;
109 my @paths = $c->paths;
110 my @relationships = $c->relationships;
111
112 my $svg_variant_graph = $t->collation->as_svg();
113
114=head1 DESCRIPTION
115
116Text::Tradition is a library for representation and analysis of collated
117texts, particularly medieval ones. The Collation is the central feature of
118a Tradition, where the text, its sequence of readings, and its relationships
119between readings are actually kept.
120
121=head1 CONSTRUCTOR
122
123=head2 new
124
125The constructor. Takes a hash or hashref of the following arguments:
126
127=over
128
129=item * tradition - The Text::Tradition object to which the collation
130belongs. Required.
131
132=item * linear - Whether the collation should be linear; that is, whether
133transposed readings should be treated as two linked readings rather than one,
134and therefore whether the collation graph is acyclic. Defaults to true.
135
136=item * collapse_punctuation - TODO
137
138=item * baselabel - The default label for the path taken by a base text
139(if any). Defaults to 'base text'.
140
141=item * wit_list_separator - The string to join a list of witnesses for
142purposes of making labels in display graphs. Defaults to ', '.
143
144=item * ac_label - The extra label to tack onto a witness sigil when
145representing another layer of path for the given witness - that is, when
146a text has more than one possible reading due to scribal corrections or
147the like. Defaults to ' (a.c.)'.
148
149=back
150
151=head1 ACCESSORS
152
153=head2 tradition
154
155=head2 linear
156
157=head2 collapse_punctuation
158
159=head2 wit_list_separator
160
161=head2 baselabel
162
163=head2 ac_label
164
165Simple accessors for collation attributes.
166
167=head2 start
168
169The meta-reading at the start of every witness path.
170
171=head2 end
172
173The meta-reading at the end of every witness path.
174
175=head2 readings
176
177Returns all Reading objects in the graph.
178
179=head2 reading( $id )
180
181Returns the Reading object corresponding to the given ID.
182
183=head2 add_reading( $reading_args )
184
185Adds a new reading object to the collation.
186See L<Text::Tradition::Collation::Reading> for the available arguments.
187
188=head2 del_reading( $object_or_id )
189
190Removes the given reading from the collation, implicitly removing its
191paths and relationships.
192
193=head2 merge_readings( $main, $second )
194
195Merges the $second reading into the $main one.
196The arguments may be either readings or reading IDs.
197
198=head2 has_reading( $id )
199
200Predicate to see whether a given reading ID is in the graph.
201
202=head2 reading_witnesses( $object_or_id )
203
204Returns a list of sigils whose witnesses contain the reading.
205
206=head2 paths
207
208Returns all reading paths within the document - that is, all edges in the
209collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
210
211=head2 add_path( $source, $target, $sigil )
212
213Links the given readings in the collation in sequence, under the given witness
214sigil. The readings may be specified by object or ID.
215
216=head2 del_path( $source, $target, $sigil )
217
218Links the given readings in the collation in sequence, under the given witness
219sigil. The readings may be specified by object or ID.
220
221=head2 has_path( $source, $target );
222
223Returns true if the two readings are linked in sequence in any witness.
224The readings may be specified by object or ID.
225
226=head2 relationships
227
228Returns all Relationship objects in the collation.
229
230=head2 add_relationship( $reading, $other_reading, $options )
231
232Adds a new relationship of the type given in $options between the two readings,
233which may be specified by object or ID. Returns a value of ( $status, @vectors)
234where $status is true on success, and @vectors is a list of relationship edges
235that were ultimately added.
236See L<Text::Tradition::Collation::Relationship> for the available options.
237
238=cut
dd3b58b0 239
d047cd52 240sub BUILD {
3a2ebbf4 241 my $self = shift;
22222af9 242 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 243 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
244 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 245}
784877d9 246
3a2ebbf4 247### Reading construct/destruct functions
248
249sub add_reading {
250 my( $self, $reading ) = @_;
251 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
252 my %args = %$reading;
253 $reading = Text::Tradition::Collation::Reading->new(
254 'collation' => $self,
255 %args );
256 }
257 # First check to see if a reading with this ID exists.
258 if( $self->reading( $reading->id ) ) {
259 warn "Collation already has a reading with id " . $reading->id;
260 return undef;
261 }
262 $self->_add_reading( $reading->id => $reading );
263 # Once the reading has been added, put it in both graphs.
264 $self->sequence->add_vertex( $reading->id );
22222af9 265 $self->relations->add_reading( $reading->id );
3a2ebbf4 266 return $reading;
eca16057 267};
268
3a2ebbf4 269around del_reading => sub {
270 my $orig = shift;
271 my $self = shift;
272 my $arg = shift;
273
274 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
275 $arg = $arg->id;
276 }
3a2ebbf4 277 # Remove the reading from the graphs.
278 $self->sequence->delete_vertex( $arg );
22222af9 279 $self->relations->delete_reading( $arg );
3a2ebbf4 280
281 # Carry on.
282 $self->$orig( $arg );
283};
7854e12e 284
3a2ebbf4 285# merge_readings( $main, $to_be_deleted );
7854e12e 286
3a2ebbf4 287sub merge_readings {
288 my $self = shift;
289
290 # We only need the IDs for adding paths to the graph, not the reading
291 # objects themselves.
49d4f2ac 292 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 293
294 # The kept reading should inherit the paths and the relationships
295 # of the deleted reading.
296 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
297 my @vector = ( $kept );
298 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
299 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 300 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 301 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
302 $self->sequence->add_edge( @vector );
303 my $fwits = $self->sequence->get_edge_attributes( @vector );
304 @wits{keys %$fwits} = values %$fwits;
305 $self->sequence->set_edge_attributes( @vector, \%wits );
306 }
22222af9 307 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 308
309 # Do the deletion deed.
49d4f2ac 310 if( $combine_char ) {
311 my $kept_obj = $self->reading( $kept );
312 my $new_text = join( $combine_char, $kept_obj->text,
313 $self->reading( $deleted )->text );
314 $kept_obj->alter_text( $new_text );
315 }
3a2ebbf4 316 $self->del_reading( $deleted );
317}
7854e12e 318
3265b0ce 319
3a2ebbf4 320# Helper function for manipulating the graph.
321sub _stringify_args {
322 my( $self, $first, $second, $arg ) = @_;
323 $first = $first->id
324 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
325 $second = $second->id
326 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
327 return( $first, $second, $arg );
328}
df6d9812 329
4e5a7b2c 330# Helper function for manipulating the graph.
331sub _objectify_args {
332 my( $self, $first, $second, $arg ) = @_;
333 $first = $self->reading( $first )
334 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
335 $second = $self->reading( $second )
336 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
337 return( $first, $second, $arg );
338}
3a2ebbf4 339### Path logic
340
341sub add_path {
342 my $self = shift;
343
344 # We only need the IDs for adding paths to the graph, not the reading
345 # objects themselves.
346 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
347
348 # Connect the readings
349 $self->sequence->add_edge( $source, $target );
350 # Note the witness in question
351 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 352};
353
3a2ebbf4 354sub del_path {
355 my $self = shift;
49d4f2ac 356 my @args;
357 if( ref( $_[0] ) eq 'ARRAY' ) {
358 my $e = shift @_;
359 @args = ( @$e, @_ );
360 } else {
361 @args = @_;
362 }
3a2ebbf4 363
364 # We only need the IDs for adding paths to the graph, not the reading
365 # objects themselves.
49d4f2ac 366 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 367
368 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 369 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 370 }
371 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
372 $self->sequence->delete_edge( $source, $target );
373 }
784877d9 374}
375
3a2ebbf4 376
15d2d3df 377# Extra graph-alike utility
378sub has_path {
3a2ebbf4 379 my $self = shift;
380 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
381 return undef unless $self->sequence->has_edge( $source, $target );
382 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 383}
384
4e5a7b2c 385=head2 clear_witness( @sigil_list )
3a2ebbf4 386
4e5a7b2c 387Clear the given witnesses out of the collation entirely, removing references
388to them in paths, and removing readings that belong only to them. Should only
389be called via $tradition->del_witness.
3a2ebbf4 390
391=cut
392
4e5a7b2c 393sub clear_witness {
394 my( $self, @sigils ) = @_;
395
396 # Clear the witness(es) out of the paths
397 foreach my $e ( $self->paths ) {
398 foreach my $sig ( @sigils ) {
399 $self->del_path( $e, $sig );
400 }
401 }
402
403 # Clear out the newly unused readings
404 foreach my $r ( $self->readings ) {
405 unless( $self->reading_witnesses( $r ) ) {
406 $self->del_reading( $r );
407 }
408 }
409}
3a2ebbf4 410
411sub add_relationship {
412 my $self = shift;
22222af9 413 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
64ae6270 414 my( $ret, @vectors ) = $self->relations->add_relationship( $source,
415 $self->reading( $source ), $target, $self->reading( $target ), $opts );
416 # Force a full rank recalculation every time. Yuck.
417 $self->calculate_ranks() if $ret && $self->end->has_rank;
418 return( $ret, @vectors );
22222af9 419}
ef9d481f 420
22222af9 421=head2 reading_witnesses( $reading )
910a0a6d 422
22222af9 423Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 424
22222af9 425=cut
1d310495 426
1d310495 427sub reading_witnesses {
428 my( $self, $reading ) = @_;
429 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 430 # arbitrarily chosen "incoming". Thus, special-case the start node.
431 if( $reading eq $self->start ) {
432 return map { $_->sigil } $self->tradition->witnesses;
433 }
1d310495 434 my %all_witnesses;
435 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
436 my $wits = $self->sequence->get_edge_attributes( @$e );
437 @all_witnesses{ keys %$wits } = 1;
438 }
439 return keys %all_witnesses;
910a0a6d 440}
441
4e5a7b2c 442=head1 OUTPUT METHODS
8e1394aa 443
4e5a7b2c 444=head2 as_svg
8e1394aa 445
0068967c 446Returns an SVG string that represents the graph, via as_dot and graphviz.
8e1394aa 447
448=cut
449
450sub as_svg {
3a2ebbf4 451 my( $self ) = @_;
452
8e1394aa 453 my @cmd = qw/dot -Tsvg/;
454 my( $svg, $err );
910a0a6d 455 my $dotfile = File::Temp->new();
d9e873d0 456 ## TODO REMOVE
eca16057 457 # $dotfile->unlink_on_destroy(0);
910a0a6d 458 binmode $dotfile, ':utf8';
459 print $dotfile $self->as_dot();
460 push( @cmd, $dotfile->filename );
461 run( \@cmd, ">", binary(), \$svg );
462 $svg = decode_utf8( $svg );
8e1394aa 463 return $svg;
464}
465
4e5a7b2c 466=head2 svg_subgraph( $from, $to )
b22576c6 467
468Returns an SVG string that represents the portion of the graph given by the
469specified range. The $from and $to variables refer to ranks within the graph.
470
471=cut
472
473sub svg_subgraph {
474 my( $self, $from, $to ) = @_;
475
476 my $dot = $self->as_dot( $from, $to );
477 unless( $dot ) {
478 warn "Could not output a graph with range $from - $to";
479 return;
480 }
481
482 my @cmd = qw/dot -Tsvg/;
483 my( $svg, $err );
484 my $dotfile = File::Temp->new();
485 ## TODO REMOVE
486 # $dotfile->unlink_on_destroy(0);
487 binmode $dotfile, ':utf8';
488 print $dotfile $dot;
489 push( @cmd, $dotfile->filename );
490 run( \@cmd, ">", binary(), \$svg );
491 $svg = decode_utf8( $svg );
492 return $svg;
493}
494
495
4e5a7b2c 496=head2 as_dot( $from, $to )
df6d9812 497
498Returns a string that is the collation graph expressed in dot
4e5a7b2c 499(i.e. GraphViz) format. If $from or $to is passed, as_dot creates
500a subgraph rather than the entire graph.
df6d9812 501
502=cut
503
504sub as_dot {
b22576c6 505 my( $self, $startrank, $endrank ) = @_;
506
507 # Check the arguments
508 if( $startrank ) {
509 return if $endrank && $startrank > $endrank;
510 return if $startrank > $self->end->rank;
511 }
512 if( defined $endrank ) {
513 return if $endrank < 0;
f1b3b33a 514 $endrank = undef if $endrank == $self->end->rank;
b22576c6 515 }
516
df6d9812 517 # TODO consider making some of these things configurable
67da8d6c 518 my $graph_name = $self->tradition->name;
519 $graph_name =~ s/[^\w\s]//g;
520 $graph_name = join( '_', split( /\s+/, $graph_name ) );
521 my $dot = sprintf( "digraph %s {\n", $graph_name );
df6d9812 522 $dot .= "\tedge [ arrowhead=open ];\n";
5f52ddca 523 $dot .= "\tgraph [ rankdir=LR,bgcolor=none ];\n";
df6d9812 524 $dot .= sprintf( "\tnode [ fontsize=%d, fillcolor=%s, style=%s, shape=%s ];\n",
3a2ebbf4 525 11, "white", "filled", "ellipse" );
df6d9812 526
b22576c6 527 # Output substitute start/end readings if necessary
528 if( $startrank ) {
529 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
530 }
531 if( $endrank ) {
532 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
533 }
534 my %used; # Keep track of the readings that actually appear in the graph
535 my %subedges;
536 my %subend;
df6d9812 537 foreach my $reading ( $self->readings ) {
b22576c6 538 # Only output readings within our rank range.
539 next if $startrank && $reading->rank < $startrank;
540 next if $endrank && $reading->rank > $endrank;
541 $used{$reading->id} = 1;
542 $subedges{$reading->id} = '#SUBSTART#'
543 if $startrank && $startrank == $reading->rank;
544 $subedges{$reading->id} = '#SUBEND#'
545 if $endrank && $endrank == $reading->rank;
910a0a6d 546 # Need not output nodes without separate labels
3a2ebbf4 547 next if $reading->id eq $reading->text;
c84275ff 548 my $label = $reading->punctuated_form;
8f9cab7b 549 $label =~ s/\"/\\\"/g;
550 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
df6d9812 551 }
3a2ebbf4 552
b22576c6 553 # Add substitute start and end edges if necessary
554 foreach my $node ( keys %subedges ) {
555 my @vector = ( $subedges{$node}, $node );
556 @vector = reverse( @vector ) if $vector[0] =~ /END/;
557 my $witstr = join( ', ', sort $self->reading_witnesses( $self->reading( $node ) ) );
558 my %variables = ( 'color' => '#000000',
910a0a6d 559 'fontcolor' => '#000000',
b22576c6 560 'label' => $witstr,
910a0a6d 561 );
562 my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
b22576c6 563 $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n", @vector, $varopts );
564 }
565
566 # Add the real edges
567 my @edges = $self->paths;
568 foreach my $edge ( @edges ) {
569 # Do we need to output this edge?
570 if( $used{$edge->[0]} && $used{$edge->[1]} ) {;
571 my %variables = ( 'color' => '#000000',
572 'fontcolor' => '#000000',
573 'label' => join( ', ', $self->path_display_label( $edge ) ),
574 );
575 my $varopts = join( ', ', map { $_.'="'.$variables{$_}.'"' } sort keys %variables );
576 # Account for the rank gap if necessary
577 my $rankgap = $self->reading( $edge->[1] )->rank
578 - $self->reading( $edge->[0] )->rank;
579 $varopts .= ", minlen=$rankgap" if $rankgap > 1;
580 $dot .= sprintf( "\t\"%s\" -> \"%s\" [ %s ];\n",
581 $edge->[0], $edge->[1], $varopts );
582 }
df6d9812 583 }
b22576c6 584
df6d9812 585 $dot .= "}\n";
586 return $dot;
587}
588
3a2ebbf4 589sub path_witnesses {
590 my( $self, @edge ) = @_;
591 # If edge is an arrayref, cope.
592 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
593 my $e = shift @edge;
594 @edge = @$e;
595 }
596 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
597 return sort @wits;
598}
599
8f9cab7b 600sub path_display_label {
601 my( $self, $edge ) = @_;
602 my @wits = $self->path_witnesses( $edge );
603 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
604 if( scalar @wits > $maj ) {
605 return 'majority';
606 } else {
607 return join( ', ', @wits );
608 }
609}
610
611
4e5a7b2c 612=head2 as_graphml
8e1394aa 613
4e5a7b2c 614Returns a GraphML representation of the collation. The GraphML will contain
615two graphs. The first expresses the attributes of the readings and the witness
616paths that link them; the second expresses the relationships that link the
617readings. This is the native transfer format for a tradition.
8e1394aa 618
56eefa04 619=begin testing
620
621use Text::Tradition;
622
623my $READINGS = 311;
624my $PATHS = 361;
625
626my $datafile = 't/data/florilegium_tei_ps.xml';
627my $tradition = Text::Tradition->new( 'input' => 'TEI',
628 'name' => 'test0',
629 'file' => $datafile,
630 'linear' => 1 );
631
632ok( $tradition, "Got a tradition object" );
633is( scalar $tradition->witnesses, 13, "Found all witnesses" );
634ok( $tradition->collation, "Tradition has a collation" );
635
636my $c = $tradition->collation;
637is( scalar $c->readings, $READINGS, "Collation has all readings" );
638is( scalar $c->paths, $PATHS, "Collation has all paths" );
639is( scalar $c->relationships, 0, "Collation has all relationships" );
640
641# Add a few relationships
642$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
643$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
644$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
645
646# Now write it to GraphML and parse it again.
647
648my $graphml = $c->as_graphml;
649my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
650is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
651is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
652is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
653
654=end testing
655
8e1394aa 656=cut
657
658sub as_graphml {
3a2ebbf4 659 my( $self ) = @_;
8e1394aa 660
661 # Some namespaces
662 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
663 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
664 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 665 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 666
667 # Create the document and root node
668 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
669 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
670 $graphml->setDocumentElement( $root );
671 $root->setNamespace( $xsi_ns, 'xsi', 0 );
672 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
673
e309421a 674 # Add the data keys for the graph
675 my %graph_data_keys;
676 my $gdi = 0;
1d310495 677 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 678 foreach my $datum ( @graph_attributes ) {
679 $graph_data_keys{$datum} = 'dg'.$gdi++;
680 my $key = $root->addNewChild( $graphml_ns, 'key' );
681 $key->setAttribute( 'attr.name', $datum );
682 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
683 $key->setAttribute( 'for', 'graph' );
684 $key->setAttribute( 'id', $graph_data_keys{$datum} );
685 }
f6066bac 686
8e1394aa 687 # Add the data keys for nodes
ef9d481f 688 my %node_data_keys;
689 my $ndi = 0;
3a2ebbf4 690 my %node_data = (
691 id => 'string',
255875b8 692 text => 'string',
3a2ebbf4 693 rank => 'string',
694 is_start => 'boolean',
695 is_end => 'boolean',
696 is_lacuna => 'boolean',
697 );
698 foreach my $datum ( keys %node_data ) {
910a0a6d 699 $node_data_keys{$datum} = 'dn'.$ndi++;
700 my $key = $root->addNewChild( $graphml_ns, 'key' );
701 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 702 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 703 $key->setAttribute( 'for', 'node' );
704 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 705 }
706
df6d9812 707 # Add the data keys for edges, i.e. witnesses
ef9d481f 708 my $edi = 0;
709 my %edge_data_keys;
3a2ebbf4 710 my %edge_data = (
f523c7a8 711 class => 'string', # Class, deprecated soon
3a2ebbf4 712 witness => 'string', # ID/label for a path
713 relationship => 'string', # ID/label for a relationship
714 extra => 'boolean', # Path key
c84275ff 715 scope => 'string', # Relationship key
3a2ebbf4 716 non_correctable => 'boolean', # Relationship key
717 non_independent => 'boolean', # Relationship key
718 );
719 foreach my $datum ( keys %edge_data ) {
720 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 721 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 722 $key->setAttribute( 'attr.name', $datum );
723 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 724 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 725 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 726 }
3a2ebbf4 727
22222af9 728 # Add the collation graph itself
2c669bca 729 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
730 $sgraph->setAttribute( 'edgedefault', 'directed' );
731 $sgraph->setAttribute( 'id', $self->tradition->name );
732 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
733 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
734 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
735 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
736 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 737
e309421a 738 # Collation attribute data
739 foreach my $datum ( @graph_attributes ) {
2c669bca 740 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
741 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 742 }
8e1394aa 743
744 my $node_ctr = 0;
745 my %node_hash;
22222af9 746 # Add our readings to the graph
3a2ebbf4 747 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 748 # Add to the main graph
749 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 750 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 751 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 752 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 753 foreach my $d ( keys %node_data ) {
754 my $nval = $n->$d;
c84275ff 755 $nval = $n->punctuated_form if $d eq 'text';
255875b8 756 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
757 if defined $nval;
758 }
b15511bf 759 }
760
2c669bca 761 # Add the path edges to the sequence graph
df6d9812 762 my $edge_ctr = 0;
3a2ebbf4 763 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
764 # We add an edge in the graphml for every witness in $e.
765 foreach my $wit ( $self->path_witnesses( $e ) ) {
766 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
767 $node_hash{ $e->[0] },
768 $node_hash{ $e->[1] } );
2c669bca 769 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 770 $edge_el->setAttribute( 'source', $from );
771 $edge_el->setAttribute( 'target', $to );
772 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 773
774 # It's a witness path, so add the witness
775 my $base = $wit;
776 my $key = $edge_data_keys{'witness'};
777 # Is this an ante-corr witness?
778 my $aclabel = $self->ac_label;
779 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
780 # Keep the base witness
781 $base = $1;
782 # ...and record that this is an 'extra' reading path
783 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
784 }
785 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 786 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 787 }
788 }
789
22222af9 790 # Add the relationship graph to the XML
2626f709 791 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
792 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 793
94c00c71 794 # Save and return the thing
795 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 796 return $result;
df6d9812 797}
798
b15511bf 799sub _add_graphml_data {
800 my( $el, $key, $value ) = @_;
b15511bf 801 return unless defined $value;
c9bf3dbf 802 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 803 $data_el->setAttribute( 'key', $key );
804 $data_el->appendText( $value );
8e1394aa 805}
806
4e5a7b2c 807=head2 as_csv
910a0a6d 808
809Returns a CSV alignment table representation of the collation graph, one
2c669bca 810row per witness (or witness uncorrected.)
910a0a6d 811
812=cut
813
814sub as_csv {
3a2ebbf4 815 my( $self ) = @_;
910a0a6d 816 my $table = $self->make_alignment_table;
817 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
818 my @result;
2c669bca 819 # Make the header row
820 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
821 push( @result, decode_utf8( $csv->string ) );
822 # Make the rest of the rows
823 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 824 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 825 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
826 $csv->combine( @row );
910a0a6d 827 push( @result, decode_utf8( $csv->string ) );
828 }
3a2ebbf4 829 return join( "\n", @result );
910a0a6d 830}
831
4e5a7b2c 832=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 833
566f4595 834Return a reference to an alignment table, in a slightly enhanced CollateX
835format which looks like this:
836
837 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 838 tokens => [ { t => "TEXT" }, ... ] },
566f4595 839 { witness => "SIG2",
4e5a7b2c 840 tokens => [ { t => "TEXT" }, ... ] },
566f4595 841 ... ],
842 length => TEXTLEN };
843
844If $use_refs is set to 1, the reading object is returned in the table
845instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 846
847If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 848keys have a true hash value will be included.
2c669bca 849
850=cut
9f3ba6f7 851
910a0a6d 852sub make_alignment_table {
08e0fb85 853 my( $self, $noderefs, $include ) = @_;
910a0a6d 854 unless( $self->linear ) {
855 warn "Need a linear graph in order to make an alignment table";
856 return;
857 }
2c669bca 858 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 859 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 860 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 861 if( $include ) {
566f4595 862 next unless $include->{$wit->sigil};
2c669bca 863 }
eca16057 864 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 865 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
866 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 867 push( @{$table->{'alignment'}},
868 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 869 if( $wit->is_layered ) {
870 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
871 $wit->sigil.$self->ac_label, $wit->sigil );
872 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 873 push( @{$table->{'alignment'}},
874 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 875 }
876 }
2c669bca 877 return $table;
910a0a6d 878}
879
880sub _make_witness_row {
0e476982 881 my( $path, $positions, $noderefs ) = @_;
910a0a6d 882 my %char_hash;
883 map { $char_hash{$_} = undef } @$positions;
2c669bca 884 my $debug = 0;
910a0a6d 885 foreach my $rdg ( @$path ) {
eca16057 886 my $rtext = $rdg->text;
887 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 888 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 889 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 890 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
891 : { 't' => $rtext };
910a0a6d 892 }
893 my @row = map { $char_hash{$_} } @$positions;
eca16057 894 # Fill in lacuna markers for undef spots in the row
895 my $last_el = shift @row;
896 my @filled_row = ( $last_el );
897 foreach my $el ( @row ) {
0e476982 898 # If we are using node reference, make the lacuna node appear many times
899 # in the table. If not, use the lacuna tag.
900 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 901 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 902 }
903 push( @filled_row, $el );
904 $last_el = $el;
905 }
906 return @filled_row;
910a0a6d 907}
908
0e476982 909# Tiny utility function to say if a table element is a lacuna
910sub _el_is_lacuna {
911 my $el = shift;
2c669bca 912 return 1 if $el->{'t'} eq '#LACUNA#';
913 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
914 && $el->{'t'}->is_lacuna;
0e476982 915 return 0;
916}
917
910a0a6d 918# Helper to turn the witnesses along columns rather than rows. Assumes
919# equal-sized rows.
920sub _turn_table {
921 my( $table ) = @_;
922 my $result = [];
923 return $result unless scalar @$table;
924 my $nrows = scalar @{$table->[0]};
925 foreach my $idx ( 0 .. $nrows - 1 ) {
926 foreach my $wit ( 0 .. $#{$table} ) {
927 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
928 }
929 }
930 return $result;
931}
932
4e5a7b2c 933=head1 NAVIGATION METHODS
910a0a6d 934
4e5a7b2c 935=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 936
937Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 938with $last, for the witness given in $sigil. If a $backup sigil is
939specified (e.g. when walking a layered witness), it will be used wherever
940no $sigil path exists. If there is a base text reading, that will be
941used wherever no path exists for $sigil or $backup.
e2902068 942
943=cut
944
910a0a6d 945# TODO Think about returning some lazy-eval iterator.
946
e2902068 947sub reading_sequence {
948 my( $self, $start, $end, $witness, $backup ) = @_;
949
930ff666 950 $witness = $self->baselabel unless $witness;
e2902068 951 my @readings = ( $start );
952 my %seen;
953 my $n = $start;
3a2ebbf4 954 while( $n && $n->id ne $end->id ) {
955 if( exists( $seen{$n->id} ) ) {
956 warn "Detected loop at " . $n->id;
910a0a6d 957 last;
958 }
3a2ebbf4 959 $seen{$n->id} = 1;
910a0a6d 960
961 my $next = $self->next_reading( $n, $witness, $backup );
44771cf2 962 unless( $next ) {
3a2ebbf4 963 warn "Did not find any path for $witness from reading " . $n->id;
44771cf2 964 last;
965 }
910a0a6d 966 push( @readings, $next );
967 $n = $next;
e2902068 968 }
969 # Check that the last reading is our end reading.
970 my $last = $readings[$#readings];
3a2ebbf4 971 warn "Last reading found from " . $start->text .
910a0a6d 972 " for witness $witness is not the end!"
3a2ebbf4 973 unless $last->id eq $end->id;
e2902068 974
975 return @readings;
976}
977
4e5a7b2c 978=head2 next_reading( $reading, $sigil );
8e1394aa 979
4a8828f0 980Returns the reading that follows the given reading along the given witness
930ff666 981path.
8e1394aa 982
983=cut
984
4a8828f0 985sub next_reading {
e2902068 986 # Return the successor via the corresponding path.
8e1394aa 987 my $self = shift;
3a2ebbf4 988 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 989 return undef unless $answer;
3a2ebbf4 990 return $self->reading( $answer );
8e1394aa 991}
992
4e5a7b2c 993=head2 prior_reading( $reading, $sigil )
8e1394aa 994
4a8828f0 995Returns the reading that precedes the given reading along the given witness
930ff666 996path.
8e1394aa 997
998=cut
999
4a8828f0 1000sub prior_reading {
e2902068 1001 # Return the predecessor via the corresponding path.
8e1394aa 1002 my $self = shift;
3a2ebbf4 1003 my $answer = $self->_find_linked_reading( 'prior', @_ );
1004 return $self->reading( $answer );
8e1394aa 1005}
1006
4a8828f0 1007sub _find_linked_reading {
e2902068 1008 my( $self, $direction, $node, $path, $alt_path ) = @_;
1009 my @linked_paths = $direction eq 'next'
3a2ebbf4 1010 ? $self->sequence->edges_from( $node )
1011 : $self->sequence->edges_to( $node );
e2902068 1012 return undef unless scalar( @linked_paths );
8e1394aa 1013
e2902068 1014 # We have to find the linked path that contains all of the
1015 # witnesses supplied in $path.
1016 my( @path_wits, @alt_path_wits );
4e5a7b2c 1017 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1018 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1019 my $base_le;
1020 my $alt_le;
1021 foreach my $le ( @linked_paths ) {
3a2ebbf4 1022 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1023 $base_le = $le;
910a0a6d 1024 }
3a2ebbf4 1025 my @le_wits = $self->path_witnesses( $le );
1026 if( _is_within( \@path_wits, \@le_wits ) ) {
1027 # This is the right path.
1028 return $direction eq 'next' ? $le->[1] : $le->[0];
1029 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1030 $alt_le = $le;
1031 }
8e1394aa 1032 }
e2902068 1033 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1034 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1035 if $alt_le;
e2902068 1036
1037 # Got this far? Return the base path if it exists.
3a2ebbf4 1038 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1039 if $base_le;
e2902068 1040
1041 # Got this far? We have no appropriate path.
2c669bca 1042 warn "Could not find $direction node from " . $node->id
910a0a6d 1043 . " along path $path";
8e1394aa 1044 return undef;
1045}
1046
4a8828f0 1047# Some set logic.
1048sub _is_within {
1049 my( $set1, $set2 ) = @_;
7854e12e 1050 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1051 foreach my $el ( @$set1 ) {
910a0a6d 1052 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1053 }
1054 return $ret;
1055}
1056
4e5a7b2c 1057# Return the string that joins together a list of witnesses for
1058# display on a single path.
1059sub _witnesses_of_label {
1060 my( $self, $label ) = @_;
1061 my $regex = $self->wit_list_separator;
1062 my @answer = split( /\Q$regex\E/, $label );
1063 return @answer;
1064}
1065
1066
1067=head1 INITIALIZATION METHODS
1068
1069These are mostly for use by parsers.
1070
1071=head2 make_witness_path( $witness )
1072
1073Link the array of readings contained in $witness->path (and in
1074$witness->uncorrected_path if it exists) into collation paths.
1075Clear out the arrays when finished.
de51424a 1076
4e5a7b2c 1077=head2 make_witness_paths
1078
1079Call make_witness_path for all witnesses in the tradition.
1080
1081=cut
930ff666 1082
7e450e44 1083# For use when a collation is constructed from a base text and an apparatus.
1084# We have the sequences of readings and just need to add path edges.
1f7aa795 1085# When we are done, clear out the witness path attributes, as they are no
1086# longer needed.
1087# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1088
6a222840 1089sub make_witness_paths {
1090 my( $self ) = @_;
910a0a6d 1091 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1092 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1093 $self->make_witness_path( $wit );
7854e12e 1094 }
7854e12e 1095}
1096
6a222840 1097sub make_witness_path {
7854e12e 1098 my( $self, $wit ) = @_;
1099 my @chain = @{$wit->path};
15d2d3df 1100 my $sig = $wit->sigil;
7854e12e 1101 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1102 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1103 }
1f7aa795 1104 if( $wit->is_layered ) {
d9e873d0 1105 @chain = @{$wit->uncorrected_path};
1106 foreach my $idx( 0 .. $#chain-1 ) {
1107 my $source = $chain[$idx];
1108 my $target = $chain[$idx+1];
1109 $self->add_path( $source, $target, $sig.$self->ac_label )
1110 unless $self->has_path( $source, $target, $sig );
1111 }
15d2d3df 1112 }
1f7aa795 1113 $wit->clear_path;
1114 $wit->clear_uncorrected_path;
e2902068 1115}
1116
4e5a7b2c 1117=head2 calculate_ranks
1118
1119Calculate the reading ranks (that is, their aligned positions relative
1120to each other) for the graph. This can only be called on linear collations.
1121
1122=cut
1123
910a0a6d 1124sub calculate_ranks {
1125 my $self = shift;
1126 # Walk a version of the graph where every node linked by a relationship
1127 # edge is fundamentally the same node, and do a topological ranking on
1128 # the nodes in this graph.
c9bf3dbf 1129 my $topo_graph = Graph->new();
910a0a6d 1130 my %rel_containers;
1131 my $rel_ctr = 0;
1132 # Add the nodes
1133 foreach my $r ( $self->readings ) {
3a2ebbf4 1134 next if exists $rel_containers{$r->id};
910a0a6d 1135 my @rels = $r->related_readings( 'colocated' );
1136 if( @rels ) {
1137 # Make a relationship container.
1138 push( @rels, $r );
c9bf3dbf 1139 my $rn = 'rel_container_' . $rel_ctr++;
1140 $topo_graph->add_vertex( $rn );
910a0a6d 1141 foreach( @rels ) {
3a2ebbf4 1142 $rel_containers{$_->id} = $rn;
910a0a6d 1143 }
1144 } else {
1145 # Add a new node to mirror the old node.
3a2ebbf4 1146 $rel_containers{$r->id} = $r->id;
1147 $topo_graph->add_vertex( $r->id );
910a0a6d 1148 }
4a8828f0 1149 }
3a1f2523 1150
3a2ebbf4 1151 # Add the edges.
910a0a6d 1152 foreach my $r ( $self->readings ) {
3a2ebbf4 1153 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1154 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1155 $rel_containers{$n} );
4e5a7b2c 1156 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1157 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1158 }
1159 }
1160
1161 # Now do the rankings, starting with the start node.
3a2ebbf4 1162 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1163 my $node_ranks = { $topo_start => 0 };
910a0a6d 1164 my @curr_origin = ( $topo_start );
1165 # A little iterative function.
1166 while( @curr_origin ) {
c9bf3dbf 1167 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1168 }
1169 # Transfer our rankings from the topological graph to the real one.
1170 foreach my $r ( $self->readings ) {
3a2ebbf4 1171 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1172 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1173 } else {
3a2ebbf4 1174 die "No rank calculated for node " . $r->id
67da8d6c 1175 . " - do you have a cycle in the graph?";
1176 }
de51424a 1177 }
8e1394aa 1178}
3a1f2523 1179
910a0a6d 1180sub _assign_rank {
c9bf3dbf 1181 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1182 # Look at each of the children of @current_nodes. If all the child's
1183 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1184 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1185 # parent gets a rank.
910a0a6d 1186 my @next_nodes;
1187 foreach my $c ( @current_nodes ) {
c9bf3dbf 1188 warn "Current reading $c has no rank!"
1189 unless exists $node_ranks->{$c};
1190 # print STDERR "Looking at child of node $c, rank "
1191 # . $node_ranks->{$c} . "\n";
1192 foreach my $child ( $graph->successors( $c ) ) {
1193 next if exists $node_ranks->{$child};
910a0a6d 1194 my $highest_rank = -1;
1195 my $skip = 0;
c9bf3dbf 1196 foreach my $parent ( $graph->predecessors( $child ) ) {
1197 if( exists $node_ranks->{$parent} ) {
1198 $highest_rank = $node_ranks->{$parent}
1199 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1200 } else {
1201 $skip = 1;
1202 last;
1203 }
1204 }
1205 next if $skip;
c9bf3dbf 1206 my $c_rank = $highest_rank + 1;
1207 # print STDERR "Assigning rank $c_rank to node $child \n";
1208 $node_ranks->{$child} = $c_rank;
910a0a6d 1209 push( @next_nodes, $child );
1210 }
1211 }
1212 return @next_nodes;
4cdd82f1 1213}
910a0a6d 1214
4e5a7b2c 1215=head2 flatten_ranks
1216
1217A convenience method for parsing collation data. Searches the graph for readings
1218with the same text at the same rank, and merges any that are found.
1219
1220=cut
1221
0e476982 1222sub flatten_ranks {
1223 my $self = shift;
1224 my %unique_rank_rdg;
1225 foreach my $rdg ( $self->readings ) {
1226 next unless $rdg->has_rank;
1227 my $key = $rdg->rank . "||" . $rdg->text;
1228 if( exists $unique_rank_rdg{$key} ) {
1229 # Combine!
56eefa04 1230 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1231 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1232 } else {
1233 $unique_rank_rdg{$key} = $rdg;
1234 }
1235 }
1236}
1237
1238
4e5a7b2c 1239=head1 UTILITY FUNCTIONS
1240
1241=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1242
4e5a7b2c 1243Find the last reading that occurs in sequence before both the given readings.
1244
1245=head2 common_successor( $reading_a, $reading_b )
1246
1247Find the first reading that occurs in sequence after both the given readings.
1248
22222af9 1249=begin testing
1250
1251use Text::Tradition;
1252
1253my $cxfile = 't/data/Collatex-16.xml';
1254my $t = Text::Tradition->new(
1255 'name' => 'inline',
1256 'input' => 'CollateX',
1257 'file' => $cxfile,
1258 );
1259my $c = $t->collation;
1260
4e5a7b2c 1261is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1262 'n20', "Found correct common predecessor" );
4e5a7b2c 1263is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1264 '#END#', "Found correct common successor" );
1265
4e5a7b2c 1266is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1267 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1268is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1269 '#END#', "Found correct common successor for readings on same path" );
1270
1271=end testing
1272
1273=cut
1274
1275## Return the closest reading that is a predecessor of both the given readings.
1276sub common_predecessor {
1277 my $self = shift;
4e5a7b2c 1278 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1279 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1280}
1281
1282sub common_successor {
1283 my $self = shift;
4e5a7b2c 1284 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1285 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1286}
1287
1288sub common_in_path {
1289 my( $self, $r1, $r2, $dir ) = @_;
1290 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1291 $iter = $self->end->rank - $iter if $dir eq 'successors';
1292 my @candidates;
1293 my @last_checked = ( $r1, $r2 );
1294 my %all_seen;
1295 while( !@candidates ) {
1296 my @new_lc;
1297 foreach my $lc ( @last_checked ) {
1298 foreach my $p ( $lc->$dir ) {
1299 if( $all_seen{$p->id} ) {
1300 push( @candidates, $p );
1301 } else {
1302 $all_seen{$p->id} = 1;
1303 push( @new_lc, $p );
1304 }
1305 }
1306 }
1307 @last_checked = @new_lc;
1308 }
1309 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1310 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1311}
1312
dd3b58b0 1313no Moose;
1314__PACKAGE__->meta->make_immutable;
e867486f 1315
1316=head1 BUGS / TODO
1317
1318=over
1319
4e5a7b2c 1320=item * Get rid of $backup in reading_sequence
e867486f 1321
1322=back