Merge branch 'master' of github.com:tla/stemmatology
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
df6d9812 10use XML::LibXML;
dd3b58b0 11use Moose;
12
3a2ebbf4 13has 'sequence' => (
d047cd52 14 is => 'ro',
3a2ebbf4 15 isa => 'Graph',
16 default => sub { Graph->new() },
d047cd52 17 handles => {
3a2ebbf4 18 paths => 'edges',
d047cd52 19 },
d047cd52 20 );
3a2ebbf4 21
22has 'relations' => (
23 is => 'ro',
22222af9 24 isa => 'Text::Tradition::Collation::RelationshipStore',
25 handles => {
26 relationships => 'relationships',
27 related_readings => 'related_readings',
28 },
29 writer => '_set_relations',
3a2ebbf4 30 );
dd3b58b0 31
3a2ebbf4 32has 'tradition' => (
33 is => 'ro',
d047cd52 34 isa => 'Text::Tradition',
8d9a1cd8 35 weak_ref => 1,
d047cd52 36 );
dd3b58b0 37
3a2ebbf4 38has 'readings' => (
39 isa => 'HashRef[Text::Tradition::Collation::Reading]',
40 traits => ['Hash'],
41 handles => {
42 reading => 'get',
43 _add_reading => 'set',
44 del_reading => 'delete',
45 has_reading => 'exists',
46 readings => 'values',
47 },
48 default => sub { {} },
49 );
910a0a6d 50
4a8828f0 51has 'wit_list_separator' => (
7854e12e 52 is => 'rw',
53 isa => 'Str',
54 default => ', ',
55 );
56
57has 'baselabel' => (
58 is => 'rw',
59 isa => 'Str',
60 default => 'base text',
61 );
4a8828f0 62
15d2d3df 63has 'linear' => (
64 is => 'rw',
65 isa => 'Bool',
66 default => 1,
67 );
c84275ff 68
ef9d481f 69has 'ac_label' => (
70 is => 'rw',
71 isa => 'Str',
72 default => ' (a.c.)',
73 );
3a2ebbf4 74
75has 'start' => (
76 is => 'ro',
77 isa => 'Text::Tradition::Collation::Reading',
78 writer => '_set_start',
79 weak_ref => 1,
80 );
81
82has 'end' => (
83 is => 'ro',
84 isa => 'Text::Tradition::Collation::Reading',
85 writer => '_set_end',
86 weak_ref => 1,
87 );
1f563ac3 88
4e5a7b2c 89=head1 NAME
90
91Text::Tradition::Collation - a software model for a text collation
92
93=head1 SYNOPSIS
94
95 use Text::Tradition;
96 my $t = Text::Tradition->new(
97 'name' => 'this is a text',
98 'input' => 'TEI',
99 'file' => '/path/to/tei_parallel_seg_file.xml' );
100
101 my $c = $t->collation;
102 my @readings = $c->readings;
103 my @paths = $c->paths;
104 my @relationships = $c->relationships;
105
106 my $svg_variant_graph = $t->collation->as_svg();
107
108=head1 DESCRIPTION
109
110Text::Tradition is a library for representation and analysis of collated
111texts, particularly medieval ones. The Collation is the central feature of
112a Tradition, where the text, its sequence of readings, and its relationships
113between readings are actually kept.
114
115=head1 CONSTRUCTOR
116
117=head2 new
118
119The constructor. Takes a hash or hashref of the following arguments:
120
121=over
122
123=item * tradition - The Text::Tradition object to which the collation
124belongs. Required.
125
126=item * linear - Whether the collation should be linear; that is, whether
127transposed readings should be treated as two linked readings rather than one,
128and therefore whether the collation graph is acyclic. Defaults to true.
129
4e5a7b2c 130=item * baselabel - The default label for the path taken by a base text
131(if any). Defaults to 'base text'.
132
133=item * wit_list_separator - The string to join a list of witnesses for
134purposes of making labels in display graphs. Defaults to ', '.
135
136=item * ac_label - The extra label to tack onto a witness sigil when
137representing another layer of path for the given witness - that is, when
138a text has more than one possible reading due to scribal corrections or
139the like. Defaults to ' (a.c.)'.
140
141=back
142
143=head1 ACCESSORS
144
145=head2 tradition
146
147=head2 linear
148
4e5a7b2c 149=head2 wit_list_separator
150
151=head2 baselabel
152
153=head2 ac_label
154
155Simple accessors for collation attributes.
156
157=head2 start
158
159The meta-reading at the start of every witness path.
160
161=head2 end
162
163The meta-reading at the end of every witness path.
164
165=head2 readings
166
167Returns all Reading objects in the graph.
168
169=head2 reading( $id )
170
171Returns the Reading object corresponding to the given ID.
172
173=head2 add_reading( $reading_args )
174
175Adds a new reading object to the collation.
176See L<Text::Tradition::Collation::Reading> for the available arguments.
177
178=head2 del_reading( $object_or_id )
179
180Removes the given reading from the collation, implicitly removing its
181paths and relationships.
182
183=head2 merge_readings( $main, $second )
184
185Merges the $second reading into the $main one.
186The arguments may be either readings or reading IDs.
187
188=head2 has_reading( $id )
189
190Predicate to see whether a given reading ID is in the graph.
191
192=head2 reading_witnesses( $object_or_id )
193
194Returns a list of sigils whose witnesses contain the reading.
195
196=head2 paths
197
198Returns all reading paths within the document - that is, all edges in the
199collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
200
201=head2 add_path( $source, $target, $sigil )
202
203Links the given readings in the collation in sequence, under the given witness
204sigil. The readings may be specified by object or ID.
205
206=head2 del_path( $source, $target, $sigil )
207
208Links the given readings in the collation in sequence, under the given witness
209sigil. The readings may be specified by object or ID.
210
211=head2 has_path( $source, $target );
212
213Returns true if the two readings are linked in sequence in any witness.
214The readings may be specified by object or ID.
215
216=head2 relationships
217
218Returns all Relationship objects in the collation.
219
220=head2 add_relationship( $reading, $other_reading, $options )
221
222Adds a new relationship of the type given in $options between the two readings,
223which may be specified by object or ID. Returns a value of ( $status, @vectors)
224where $status is true on success, and @vectors is a list of relationship edges
225that were ultimately added.
226See L<Text::Tradition::Collation::Relationship> for the available options.
227
228=cut
dd3b58b0 229
d047cd52 230sub BUILD {
3a2ebbf4 231 my $self = shift;
22222af9 232 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 233 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
234 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 235}
784877d9 236
3a2ebbf4 237### Reading construct/destruct functions
238
239sub add_reading {
240 my( $self, $reading ) = @_;
241 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
242 my %args = %$reading;
243 $reading = Text::Tradition::Collation::Reading->new(
244 'collation' => $self,
245 %args );
246 }
247 # First check to see if a reading with this ID exists.
248 if( $self->reading( $reading->id ) ) {
249 warn "Collation already has a reading with id " . $reading->id;
250 return undef;
251 }
252 $self->_add_reading( $reading->id => $reading );
253 # Once the reading has been added, put it in both graphs.
254 $self->sequence->add_vertex( $reading->id );
22222af9 255 $self->relations->add_reading( $reading->id );
3a2ebbf4 256 return $reading;
eca16057 257};
258
3a2ebbf4 259around del_reading => sub {
260 my $orig = shift;
261 my $self = shift;
262 my $arg = shift;
263
264 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
265 $arg = $arg->id;
266 }
3a2ebbf4 267 # Remove the reading from the graphs.
268 $self->sequence->delete_vertex( $arg );
22222af9 269 $self->relations->delete_reading( $arg );
3a2ebbf4 270
271 # Carry on.
272 $self->$orig( $arg );
273};
7854e12e 274
3a2ebbf4 275# merge_readings( $main, $to_be_deleted );
7854e12e 276
3a2ebbf4 277sub merge_readings {
278 my $self = shift;
279
280 # We only need the IDs for adding paths to the graph, not the reading
281 # objects themselves.
49d4f2ac 282 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 283
284 # The kept reading should inherit the paths and the relationships
285 # of the deleted reading.
286 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
287 my @vector = ( $kept );
288 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
289 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 290 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 291 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
292 $self->sequence->add_edge( @vector );
293 my $fwits = $self->sequence->get_edge_attributes( @vector );
294 @wits{keys %$fwits} = values %$fwits;
295 $self->sequence->set_edge_attributes( @vector, \%wits );
296 }
22222af9 297 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 298
299 # Do the deletion deed.
49d4f2ac 300 if( $combine_char ) {
301 my $kept_obj = $self->reading( $kept );
302 my $new_text = join( $combine_char, $kept_obj->text,
303 $self->reading( $deleted )->text );
304 $kept_obj->alter_text( $new_text );
305 }
3a2ebbf4 306 $self->del_reading( $deleted );
307}
7854e12e 308
3265b0ce 309
3a2ebbf4 310# Helper function for manipulating the graph.
311sub _stringify_args {
312 my( $self, $first, $second, $arg ) = @_;
313 $first = $first->id
314 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
315 $second = $second->id
316 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
317 return( $first, $second, $arg );
318}
df6d9812 319
4e5a7b2c 320# Helper function for manipulating the graph.
321sub _objectify_args {
322 my( $self, $first, $second, $arg ) = @_;
323 $first = $self->reading( $first )
324 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
325 $second = $self->reading( $second )
326 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
327 return( $first, $second, $arg );
328}
3a2ebbf4 329### Path logic
330
331sub add_path {
332 my $self = shift;
333
334 # We only need the IDs for adding paths to the graph, not the reading
335 # objects themselves.
336 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
337
338 # Connect the readings
339 $self->sequence->add_edge( $source, $target );
340 # Note the witness in question
341 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 342};
343
3a2ebbf4 344sub del_path {
345 my $self = shift;
49d4f2ac 346 my @args;
347 if( ref( $_[0] ) eq 'ARRAY' ) {
348 my $e = shift @_;
349 @args = ( @$e, @_ );
350 } else {
351 @args = @_;
352 }
3a2ebbf4 353
354 # We only need the IDs for adding paths to the graph, not the reading
355 # objects themselves.
49d4f2ac 356 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 357
358 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 359 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 360 }
361 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
362 $self->sequence->delete_edge( $source, $target );
363 }
784877d9 364}
365
3a2ebbf4 366
15d2d3df 367# Extra graph-alike utility
368sub has_path {
3a2ebbf4 369 my $self = shift;
370 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
371 return undef unless $self->sequence->has_edge( $source, $target );
372 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 373}
374
4e5a7b2c 375=head2 clear_witness( @sigil_list )
3a2ebbf4 376
4e5a7b2c 377Clear the given witnesses out of the collation entirely, removing references
378to them in paths, and removing readings that belong only to them. Should only
379be called via $tradition->del_witness.
3a2ebbf4 380
381=cut
382
4e5a7b2c 383sub clear_witness {
384 my( $self, @sigils ) = @_;
385
386 # Clear the witness(es) out of the paths
387 foreach my $e ( $self->paths ) {
388 foreach my $sig ( @sigils ) {
389 $self->del_path( $e, $sig );
390 }
391 }
392
393 # Clear out the newly unused readings
394 foreach my $r ( $self->readings ) {
395 unless( $self->reading_witnesses( $r ) ) {
396 $self->del_reading( $r );
397 }
398 }
399}
3a2ebbf4 400
401sub add_relationship {
402 my $self = shift;
22222af9 403 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
64ae6270 404 my( $ret, @vectors ) = $self->relations->add_relationship( $source,
405 $self->reading( $source ), $target, $self->reading( $target ), $opts );
406 # Force a full rank recalculation every time. Yuck.
407 $self->calculate_ranks() if $ret && $self->end->has_rank;
408 return( $ret, @vectors );
22222af9 409}
ef9d481f 410
22222af9 411=head2 reading_witnesses( $reading )
910a0a6d 412
22222af9 413Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 414
22222af9 415=cut
1d310495 416
1d310495 417sub reading_witnesses {
418 my( $self, $reading ) = @_;
419 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 420 # arbitrarily chosen "incoming". Thus, special-case the start node.
421 if( $reading eq $self->start ) {
422 return map { $_->sigil } $self->tradition->witnesses;
423 }
1d310495 424 my %all_witnesses;
425 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
426 my $wits = $self->sequence->get_edge_attributes( @$e );
427 @all_witnesses{ keys %$wits } = 1;
428 }
429 return keys %all_witnesses;
910a0a6d 430}
431
4e5a7b2c 432=head1 OUTPUT METHODS
8e1394aa 433
4e5a7b2c 434=head2 as_svg
8e1394aa 435
0068967c 436Returns an SVG string that represents the graph, via as_dot and graphviz.
8e1394aa 437
438=cut
439
440sub as_svg {
3a2ebbf4 441 my( $self ) = @_;
442
8e1394aa 443 my @cmd = qw/dot -Tsvg/;
444 my( $svg, $err );
910a0a6d 445 my $dotfile = File::Temp->new();
d9e873d0 446 ## TODO REMOVE
eca16057 447 # $dotfile->unlink_on_destroy(0);
910a0a6d 448 binmode $dotfile, ':utf8';
449 print $dotfile $self->as_dot();
450 push( @cmd, $dotfile->filename );
451 run( \@cmd, ">", binary(), \$svg );
452 $svg = decode_utf8( $svg );
8e1394aa 453 return $svg;
454}
455
4e5a7b2c 456=head2 svg_subgraph( $from, $to )
b22576c6 457
458Returns an SVG string that represents the portion of the graph given by the
459specified range. The $from and $to variables refer to ranks within the graph.
460
461=cut
462
463sub svg_subgraph {
464 my( $self, $from, $to ) = @_;
465
466 my $dot = $self->as_dot( $from, $to );
467 unless( $dot ) {
468 warn "Could not output a graph with range $from - $to";
469 return;
470 }
471
472 my @cmd = qw/dot -Tsvg/;
473 my( $svg, $err );
474 my $dotfile = File::Temp->new();
475 ## TODO REMOVE
476 # $dotfile->unlink_on_destroy(0);
477 binmode $dotfile, ':utf8';
478 print $dotfile $dot;
479 push( @cmd, $dotfile->filename );
480 run( \@cmd, ">", binary(), \$svg );
481 $svg = decode_utf8( $svg );
482 return $svg;
483}
484
485
4e5a7b2c 486=head2 as_dot( $from, $to )
df6d9812 487
488Returns a string that is the collation graph expressed in dot
4e5a7b2c 489(i.e. GraphViz) format. If $from or $to is passed, as_dot creates
490a subgraph rather than the entire graph.
df6d9812 491
492=cut
493
494sub as_dot {
b22576c6 495 my( $self, $startrank, $endrank ) = @_;
496
497 # Check the arguments
498 if( $startrank ) {
499 return if $endrank && $startrank > $endrank;
500 return if $startrank > $self->end->rank;
501 }
502 if( defined $endrank ) {
503 return if $endrank < 0;
f1b3b33a 504 $endrank = undef if $endrank == $self->end->rank;
b22576c6 505 }
506
df6d9812 507 # TODO consider making some of these things configurable
67da8d6c 508 my $graph_name = $self->tradition->name;
509 $graph_name =~ s/[^\w\s]//g;
510 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 511
512 my %graph_attrs = (
513 'rankdir' => 'LR',
514 'bgcolor' => 'none',
515 );
516 my %node_attrs = (
517 'fontsize' => 11,
518 'fillcolor' => 'white',
519 'style' => 'filled',
520 'shape' => 'ellipse'
521 );
522 my %edge_attrs = (
523 'arrowhead' => 'open',
524 'color' => '#000000',
525 'fontcolor' => '#000000',
526 );
527
67da8d6c 528 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 529 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
530 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 531
b22576c6 532 # Output substitute start/end readings if necessary
533 if( $startrank ) {
534 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
535 }
536 if( $endrank ) {
537 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
538 }
539 my %used; # Keep track of the readings that actually appear in the graph
df6d9812 540 foreach my $reading ( $self->readings ) {
b22576c6 541 # Only output readings within our rank range.
542 next if $startrank && $reading->rank < $startrank;
543 next if $endrank && $reading->rank > $endrank;
544 $used{$reading->id} = 1;
910a0a6d 545 # Need not output nodes without separate labels
3a2ebbf4 546 next if $reading->id eq $reading->text;
30f0df34 547 my $label = $reading->text;
8f9cab7b 548 $label =~ s/\"/\\\"/g;
549 $dot .= sprintf( "\t\"%s\" [ label=\"%s\" ];\n", $reading->id, $label );
df6d9812 550 }
3a2ebbf4 551
b22576c6 552 # Add the real edges
553 my @edges = $self->paths;
3bdec618 554 my( %substart, %subend );
b22576c6 555 foreach my $edge ( @edges ) {
556 # Do we need to output this edge?
557 if( $used{$edge->[0]} && $used{$edge->[1]} ) {;
f13b5582 558 my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
559 my $variables = { %edge_attrs, 'label' => $label };
b22576c6 560 # Account for the rank gap if necessary
561 my $rankgap = $self->reading( $edge->[1] )->rank
562 - $self->reading( $edge->[0] )->rank;
f13b5582 563 $variables->{'minlen'} = $rankgap if $rankgap > 1;
564 my $varopts = _dot_attr_string( $variables );
565 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
566 $edge->[0], $edge->[1], $varopts );
3bdec618 567 } elsif( $used{$edge->[0]} ) {
568 $subend{$edge->[0]} = 1;
569 } elsif( $used{$edge->[1]} ) {
570 $substart{$edge->[1]} = 1;
b22576c6 571 }
df6d9812 572 }
3bdec618 573 # Add substitute start and end edges if necessary
574 foreach my $node ( keys %substart ) {
575 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 576 my $variables = { %edge_attrs, 'label' => $witstr };
577 my $varopts = _dot_attr_string( $variables );
578 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 579 }
580 foreach my $node ( keys %subend ) {
581 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 582 my $variables = { %edge_attrs, 'label' => $witstr };
583 my $varopts = _dot_attr_string( $variables );
584 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 585 }
586
df6d9812 587 $dot .= "}\n";
588 return $dot;
589}
590
f13b5582 591sub _dot_attr_string {
592 my( $hash ) = @_;
593 my @attrs;
594 foreach my $k ( sort keys %$hash ) {
595 my $v = $hash->{$k};
596 push( @attrs, $k.'="'.$v.'"' );
597 }
598 return( '[ ' . join( ', ', @attrs ) . ' ]' );
599}
600
3a2ebbf4 601sub path_witnesses {
602 my( $self, @edge ) = @_;
603 # If edge is an arrayref, cope.
604 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
605 my $e = shift @edge;
606 @edge = @$e;
607 }
608 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
609 return sort @wits;
610}
611
8f9cab7b 612sub path_display_label {
3bdec618 613 my( $self, @wits ) = @_;
8f9cab7b 614 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
615 if( scalar @wits > $maj ) {
f13b5582 616 # TODO break out a.c. wits
8f9cab7b 617 return 'majority';
618 } else {
619 return join( ', ', @wits );
620 }
621}
622
623
4e5a7b2c 624=head2 as_graphml
8e1394aa 625
4e5a7b2c 626Returns a GraphML representation of the collation. The GraphML will contain
627two graphs. The first expresses the attributes of the readings and the witness
628paths that link them; the second expresses the relationships that link the
629readings. This is the native transfer format for a tradition.
8e1394aa 630
56eefa04 631=begin testing
632
633use Text::Tradition;
634
635my $READINGS = 311;
636my $PATHS = 361;
637
638my $datafile = 't/data/florilegium_tei_ps.xml';
639my $tradition = Text::Tradition->new( 'input' => 'TEI',
640 'name' => 'test0',
641 'file' => $datafile,
642 'linear' => 1 );
643
644ok( $tradition, "Got a tradition object" );
645is( scalar $tradition->witnesses, 13, "Found all witnesses" );
646ok( $tradition->collation, "Tradition has a collation" );
647
648my $c = $tradition->collation;
649is( scalar $c->readings, $READINGS, "Collation has all readings" );
650is( scalar $c->paths, $PATHS, "Collation has all paths" );
651is( scalar $c->relationships, 0, "Collation has all relationships" );
652
653# Add a few relationships
654$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
655$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
656$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
657
658# Now write it to GraphML and parse it again.
659
660my $graphml = $c->as_graphml;
661my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
662is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
663is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
664is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
665
666=end testing
667
8e1394aa 668=cut
669
670sub as_graphml {
3a2ebbf4 671 my( $self ) = @_;
8e1394aa 672
673 # Some namespaces
674 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
675 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
676 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 677 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 678
679 # Create the document and root node
680 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
681 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
682 $graphml->setDocumentElement( $root );
683 $root->setNamespace( $xsi_ns, 'xsi', 0 );
684 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
685
e309421a 686 # Add the data keys for the graph
687 my %graph_data_keys;
688 my $gdi = 0;
1d310495 689 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 690 foreach my $datum ( @graph_attributes ) {
691 $graph_data_keys{$datum} = 'dg'.$gdi++;
692 my $key = $root->addNewChild( $graphml_ns, 'key' );
693 $key->setAttribute( 'attr.name', $datum );
694 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
695 $key->setAttribute( 'for', 'graph' );
696 $key->setAttribute( 'id', $graph_data_keys{$datum} );
697 }
f6066bac 698
8e1394aa 699 # Add the data keys for nodes
ef9d481f 700 my %node_data_keys;
701 my $ndi = 0;
3a2ebbf4 702 my %node_data = (
703 id => 'string',
255875b8 704 text => 'string',
3a2ebbf4 705 rank => 'string',
706 is_start => 'boolean',
707 is_end => 'boolean',
708 is_lacuna => 'boolean',
709 );
710 foreach my $datum ( keys %node_data ) {
910a0a6d 711 $node_data_keys{$datum} = 'dn'.$ndi++;
712 my $key = $root->addNewChild( $graphml_ns, 'key' );
713 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 714 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 715 $key->setAttribute( 'for', 'node' );
716 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 717 }
718
df6d9812 719 # Add the data keys for edges, i.e. witnesses
ef9d481f 720 my $edi = 0;
721 my %edge_data_keys;
3a2ebbf4 722 my %edge_data = (
f523c7a8 723 class => 'string', # Class, deprecated soon
3a2ebbf4 724 witness => 'string', # ID/label for a path
725 relationship => 'string', # ID/label for a relationship
726 extra => 'boolean', # Path key
c84275ff 727 scope => 'string', # Relationship key
3a2ebbf4 728 non_correctable => 'boolean', # Relationship key
729 non_independent => 'boolean', # Relationship key
730 );
731 foreach my $datum ( keys %edge_data ) {
732 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 733 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 734 $key->setAttribute( 'attr.name', $datum );
735 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 736 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 737 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 738 }
3a2ebbf4 739
22222af9 740 # Add the collation graph itself
2c669bca 741 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
742 $sgraph->setAttribute( 'edgedefault', 'directed' );
743 $sgraph->setAttribute( 'id', $self->tradition->name );
744 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
745 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
746 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
747 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
748 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 749
e309421a 750 # Collation attribute data
751 foreach my $datum ( @graph_attributes ) {
2c669bca 752 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
753 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 754 }
8e1394aa 755
756 my $node_ctr = 0;
757 my %node_hash;
22222af9 758 # Add our readings to the graph
3a2ebbf4 759 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 760 # Add to the main graph
761 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 762 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 763 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 764 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 765 foreach my $d ( keys %node_data ) {
766 my $nval = $n->$d;
767 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
768 if defined $nval;
769 }
b15511bf 770 }
771
2c669bca 772 # Add the path edges to the sequence graph
df6d9812 773 my $edge_ctr = 0;
3a2ebbf4 774 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
775 # We add an edge in the graphml for every witness in $e.
776 foreach my $wit ( $self->path_witnesses( $e ) ) {
777 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
778 $node_hash{ $e->[0] },
779 $node_hash{ $e->[1] } );
2c669bca 780 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 781 $edge_el->setAttribute( 'source', $from );
782 $edge_el->setAttribute( 'target', $to );
783 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 784
785 # It's a witness path, so add the witness
786 my $base = $wit;
787 my $key = $edge_data_keys{'witness'};
788 # Is this an ante-corr witness?
789 my $aclabel = $self->ac_label;
790 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
791 # Keep the base witness
792 $base = $1;
793 # ...and record that this is an 'extra' reading path
794 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
795 }
796 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 797 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 798 }
799 }
800
22222af9 801 # Add the relationship graph to the XML
2626f709 802 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
803 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 804
94c00c71 805 # Save and return the thing
806 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 807 return $result;
df6d9812 808}
809
b15511bf 810sub _add_graphml_data {
811 my( $el, $key, $value ) = @_;
b15511bf 812 return unless defined $value;
c9bf3dbf 813 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 814 $data_el->setAttribute( 'key', $key );
815 $data_el->appendText( $value );
8e1394aa 816}
817
4e5a7b2c 818=head2 as_csv
910a0a6d 819
820Returns a CSV alignment table representation of the collation graph, one
2c669bca 821row per witness (or witness uncorrected.)
910a0a6d 822
823=cut
824
825sub as_csv {
3a2ebbf4 826 my( $self ) = @_;
910a0a6d 827 my $table = $self->make_alignment_table;
828 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
829 my @result;
2c669bca 830 # Make the header row
831 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
832 push( @result, decode_utf8( $csv->string ) );
833 # Make the rest of the rows
834 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 835 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 836 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
837 $csv->combine( @row );
910a0a6d 838 push( @result, decode_utf8( $csv->string ) );
839 }
3a2ebbf4 840 return join( "\n", @result );
910a0a6d 841}
842
4e5a7b2c 843=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 844
566f4595 845Return a reference to an alignment table, in a slightly enhanced CollateX
846format which looks like this:
847
848 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 849 tokens => [ { t => "TEXT" }, ... ] },
566f4595 850 { witness => "SIG2",
4e5a7b2c 851 tokens => [ { t => "TEXT" }, ... ] },
566f4595 852 ... ],
853 length => TEXTLEN };
854
855If $use_refs is set to 1, the reading object is returned in the table
856instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 857
858If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 859keys have a true hash value will be included.
2c669bca 860
861=cut
9f3ba6f7 862
910a0a6d 863sub make_alignment_table {
08e0fb85 864 my( $self, $noderefs, $include ) = @_;
910a0a6d 865 unless( $self->linear ) {
866 warn "Need a linear graph in order to make an alignment table";
867 return;
868 }
2c669bca 869 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 870 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 871 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 872 if( $include ) {
566f4595 873 next unless $include->{$wit->sigil};
2c669bca 874 }
eca16057 875 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 876 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
877 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 878 push( @{$table->{'alignment'}},
879 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 880 if( $wit->is_layered ) {
881 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
882 $wit->sigil.$self->ac_label, $wit->sigil );
883 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 884 push( @{$table->{'alignment'}},
885 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 886 }
887 }
2c669bca 888 return $table;
910a0a6d 889}
890
891sub _make_witness_row {
0e476982 892 my( $path, $positions, $noderefs ) = @_;
910a0a6d 893 my %char_hash;
894 map { $char_hash{$_} = undef } @$positions;
2c669bca 895 my $debug = 0;
910a0a6d 896 foreach my $rdg ( @$path ) {
eca16057 897 my $rtext = $rdg->text;
898 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 899 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 900 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 901 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
902 : { 't' => $rtext };
910a0a6d 903 }
904 my @row = map { $char_hash{$_} } @$positions;
eca16057 905 # Fill in lacuna markers for undef spots in the row
906 my $last_el = shift @row;
907 my @filled_row = ( $last_el );
908 foreach my $el ( @row ) {
0e476982 909 # If we are using node reference, make the lacuna node appear many times
910 # in the table. If not, use the lacuna tag.
911 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 912 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 913 }
914 push( @filled_row, $el );
915 $last_el = $el;
916 }
917 return @filled_row;
910a0a6d 918}
919
0e476982 920# Tiny utility function to say if a table element is a lacuna
921sub _el_is_lacuna {
922 my $el = shift;
2c669bca 923 return 1 if $el->{'t'} eq '#LACUNA#';
924 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
925 && $el->{'t'}->is_lacuna;
0e476982 926 return 0;
927}
928
910a0a6d 929# Helper to turn the witnesses along columns rather than rows. Assumes
930# equal-sized rows.
931sub _turn_table {
932 my( $table ) = @_;
933 my $result = [];
934 return $result unless scalar @$table;
935 my $nrows = scalar @{$table->[0]};
936 foreach my $idx ( 0 .. $nrows - 1 ) {
937 foreach my $wit ( 0 .. $#{$table} ) {
938 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
939 }
940 }
941 return $result;
942}
943
4e5a7b2c 944=head1 NAVIGATION METHODS
910a0a6d 945
4e5a7b2c 946=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 947
948Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 949with $last, for the witness given in $sigil. If a $backup sigil is
950specified (e.g. when walking a layered witness), it will be used wherever
951no $sigil path exists. If there is a base text reading, that will be
952used wherever no path exists for $sigil or $backup.
e2902068 953
954=cut
955
910a0a6d 956# TODO Think about returning some lazy-eval iterator.
957
e2902068 958sub reading_sequence {
959 my( $self, $start, $end, $witness, $backup ) = @_;
960
930ff666 961 $witness = $self->baselabel unless $witness;
e2902068 962 my @readings = ( $start );
963 my %seen;
964 my $n = $start;
3a2ebbf4 965 while( $n && $n->id ne $end->id ) {
966 if( exists( $seen{$n->id} ) ) {
967 warn "Detected loop at " . $n->id;
910a0a6d 968 last;
969 }
3a2ebbf4 970 $seen{$n->id} = 1;
910a0a6d 971
972 my $next = $self->next_reading( $n, $witness, $backup );
44771cf2 973 unless( $next ) {
3a2ebbf4 974 warn "Did not find any path for $witness from reading " . $n->id;
44771cf2 975 last;
976 }
910a0a6d 977 push( @readings, $next );
978 $n = $next;
e2902068 979 }
980 # Check that the last reading is our end reading.
981 my $last = $readings[$#readings];
3a2ebbf4 982 warn "Last reading found from " . $start->text .
910a0a6d 983 " for witness $witness is not the end!"
3a2ebbf4 984 unless $last->id eq $end->id;
e2902068 985
986 return @readings;
987}
988
4e5a7b2c 989=head2 next_reading( $reading, $sigil );
8e1394aa 990
4a8828f0 991Returns the reading that follows the given reading along the given witness
930ff666 992path.
8e1394aa 993
994=cut
995
4a8828f0 996sub next_reading {
e2902068 997 # Return the successor via the corresponding path.
8e1394aa 998 my $self = shift;
3a2ebbf4 999 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1000 return undef unless $answer;
3a2ebbf4 1001 return $self->reading( $answer );
8e1394aa 1002}
1003
4e5a7b2c 1004=head2 prior_reading( $reading, $sigil )
8e1394aa 1005
4a8828f0 1006Returns the reading that precedes the given reading along the given witness
930ff666 1007path.
8e1394aa 1008
1009=cut
1010
4a8828f0 1011sub prior_reading {
e2902068 1012 # Return the predecessor via the corresponding path.
8e1394aa 1013 my $self = shift;
3a2ebbf4 1014 my $answer = $self->_find_linked_reading( 'prior', @_ );
1015 return $self->reading( $answer );
8e1394aa 1016}
1017
4a8828f0 1018sub _find_linked_reading {
e2902068 1019 my( $self, $direction, $node, $path, $alt_path ) = @_;
1020 my @linked_paths = $direction eq 'next'
3a2ebbf4 1021 ? $self->sequence->edges_from( $node )
1022 : $self->sequence->edges_to( $node );
e2902068 1023 return undef unless scalar( @linked_paths );
8e1394aa 1024
e2902068 1025 # We have to find the linked path that contains all of the
1026 # witnesses supplied in $path.
1027 my( @path_wits, @alt_path_wits );
4e5a7b2c 1028 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1029 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1030 my $base_le;
1031 my $alt_le;
1032 foreach my $le ( @linked_paths ) {
3a2ebbf4 1033 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1034 $base_le = $le;
910a0a6d 1035 }
3a2ebbf4 1036 my @le_wits = $self->path_witnesses( $le );
1037 if( _is_within( \@path_wits, \@le_wits ) ) {
1038 # This is the right path.
1039 return $direction eq 'next' ? $le->[1] : $le->[0];
1040 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1041 $alt_le = $le;
1042 }
8e1394aa 1043 }
e2902068 1044 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1045 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1046 if $alt_le;
e2902068 1047
1048 # Got this far? Return the base path if it exists.
3a2ebbf4 1049 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1050 if $base_le;
e2902068 1051
1052 # Got this far? We have no appropriate path.
2c669bca 1053 warn "Could not find $direction node from " . $node->id
910a0a6d 1054 . " along path $path";
8e1394aa 1055 return undef;
1056}
1057
4a8828f0 1058# Some set logic.
1059sub _is_within {
1060 my( $set1, $set2 ) = @_;
7854e12e 1061 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1062 foreach my $el ( @$set1 ) {
910a0a6d 1063 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1064 }
1065 return $ret;
1066}
1067
4e5a7b2c 1068# Return the string that joins together a list of witnesses for
1069# display on a single path.
1070sub _witnesses_of_label {
1071 my( $self, $label ) = @_;
1072 my $regex = $self->wit_list_separator;
1073 my @answer = split( /\Q$regex\E/, $label );
1074 return @answer;
1075}
1076
1077
1078=head1 INITIALIZATION METHODS
1079
1080These are mostly for use by parsers.
1081
1082=head2 make_witness_path( $witness )
1083
1084Link the array of readings contained in $witness->path (and in
1085$witness->uncorrected_path if it exists) into collation paths.
1086Clear out the arrays when finished.
de51424a 1087
4e5a7b2c 1088=head2 make_witness_paths
1089
1090Call make_witness_path for all witnesses in the tradition.
1091
1092=cut
930ff666 1093
7e450e44 1094# For use when a collation is constructed from a base text and an apparatus.
1095# We have the sequences of readings and just need to add path edges.
1f7aa795 1096# When we are done, clear out the witness path attributes, as they are no
1097# longer needed.
1098# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1099
6a222840 1100sub make_witness_paths {
1101 my( $self ) = @_;
910a0a6d 1102 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1103 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1104 $self->make_witness_path( $wit );
7854e12e 1105 }
7854e12e 1106}
1107
6a222840 1108sub make_witness_path {
7854e12e 1109 my( $self, $wit ) = @_;
1110 my @chain = @{$wit->path};
15d2d3df 1111 my $sig = $wit->sigil;
7854e12e 1112 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1113 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1114 }
1f7aa795 1115 if( $wit->is_layered ) {
d9e873d0 1116 @chain = @{$wit->uncorrected_path};
1117 foreach my $idx( 0 .. $#chain-1 ) {
1118 my $source = $chain[$idx];
1119 my $target = $chain[$idx+1];
1120 $self->add_path( $source, $target, $sig.$self->ac_label )
1121 unless $self->has_path( $source, $target, $sig );
1122 }
15d2d3df 1123 }
1f7aa795 1124 $wit->clear_path;
1125 $wit->clear_uncorrected_path;
e2902068 1126}
1127
4e5a7b2c 1128=head2 calculate_ranks
1129
1130Calculate the reading ranks (that is, their aligned positions relative
1131to each other) for the graph. This can only be called on linear collations.
1132
1133=cut
1134
910a0a6d 1135sub calculate_ranks {
1136 my $self = shift;
1137 # Walk a version of the graph where every node linked by a relationship
1138 # edge is fundamentally the same node, and do a topological ranking on
1139 # the nodes in this graph.
c9bf3dbf 1140 my $topo_graph = Graph->new();
910a0a6d 1141 my %rel_containers;
1142 my $rel_ctr = 0;
1143 # Add the nodes
1144 foreach my $r ( $self->readings ) {
3a2ebbf4 1145 next if exists $rel_containers{$r->id};
910a0a6d 1146 my @rels = $r->related_readings( 'colocated' );
1147 if( @rels ) {
1148 # Make a relationship container.
1149 push( @rels, $r );
c9bf3dbf 1150 my $rn = 'rel_container_' . $rel_ctr++;
1151 $topo_graph->add_vertex( $rn );
910a0a6d 1152 foreach( @rels ) {
3a2ebbf4 1153 $rel_containers{$_->id} = $rn;
910a0a6d 1154 }
1155 } else {
1156 # Add a new node to mirror the old node.
3a2ebbf4 1157 $rel_containers{$r->id} = $r->id;
1158 $topo_graph->add_vertex( $r->id );
910a0a6d 1159 }
4a8828f0 1160 }
3a1f2523 1161
3a2ebbf4 1162 # Add the edges.
910a0a6d 1163 foreach my $r ( $self->readings ) {
3a2ebbf4 1164 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1165 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1166 $rel_containers{$n} );
4e5a7b2c 1167 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1168 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1169 }
1170 }
1171
1172 # Now do the rankings, starting with the start node.
3a2ebbf4 1173 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1174 my $node_ranks = { $topo_start => 0 };
910a0a6d 1175 my @curr_origin = ( $topo_start );
1176 # A little iterative function.
1177 while( @curr_origin ) {
c9bf3dbf 1178 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1179 }
1180 # Transfer our rankings from the topological graph to the real one.
1181 foreach my $r ( $self->readings ) {
3a2ebbf4 1182 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1183 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1184 } else {
3a2ebbf4 1185 die "No rank calculated for node " . $r->id
67da8d6c 1186 . " - do you have a cycle in the graph?";
1187 }
de51424a 1188 }
8e1394aa 1189}
3a1f2523 1190
910a0a6d 1191sub _assign_rank {
c9bf3dbf 1192 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1193 # Look at each of the children of @current_nodes. If all the child's
1194 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1195 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1196 # parent gets a rank.
910a0a6d 1197 my @next_nodes;
1198 foreach my $c ( @current_nodes ) {
c9bf3dbf 1199 warn "Current reading $c has no rank!"
1200 unless exists $node_ranks->{$c};
1201 # print STDERR "Looking at child of node $c, rank "
1202 # . $node_ranks->{$c} . "\n";
1203 foreach my $child ( $graph->successors( $c ) ) {
1204 next if exists $node_ranks->{$child};
910a0a6d 1205 my $highest_rank = -1;
1206 my $skip = 0;
c9bf3dbf 1207 foreach my $parent ( $graph->predecessors( $child ) ) {
1208 if( exists $node_ranks->{$parent} ) {
1209 $highest_rank = $node_ranks->{$parent}
1210 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1211 } else {
1212 $skip = 1;
1213 last;
1214 }
1215 }
1216 next if $skip;
c9bf3dbf 1217 my $c_rank = $highest_rank + 1;
1218 # print STDERR "Assigning rank $c_rank to node $child \n";
1219 $node_ranks->{$child} = $c_rank;
910a0a6d 1220 push( @next_nodes, $child );
1221 }
1222 }
1223 return @next_nodes;
4cdd82f1 1224}
910a0a6d 1225
4e5a7b2c 1226=head2 flatten_ranks
1227
1228A convenience method for parsing collation data. Searches the graph for readings
1229with the same text at the same rank, and merges any that are found.
1230
1231=cut
1232
0e476982 1233sub flatten_ranks {
1234 my $self = shift;
1235 my %unique_rank_rdg;
1236 foreach my $rdg ( $self->readings ) {
1237 next unless $rdg->has_rank;
1238 my $key = $rdg->rank . "||" . $rdg->text;
1239 if( exists $unique_rank_rdg{$key} ) {
1240 # Combine!
56eefa04 1241 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1242 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1243 } else {
1244 $unique_rank_rdg{$key} = $rdg;
1245 }
1246 }
1247}
1248
1249
4e5a7b2c 1250=head1 UTILITY FUNCTIONS
1251
1252=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1253
4e5a7b2c 1254Find the last reading that occurs in sequence before both the given readings.
1255
1256=head2 common_successor( $reading_a, $reading_b )
1257
1258Find the first reading that occurs in sequence after both the given readings.
1259
22222af9 1260=begin testing
1261
1262use Text::Tradition;
1263
1264my $cxfile = 't/data/Collatex-16.xml';
1265my $t = Text::Tradition->new(
1266 'name' => 'inline',
1267 'input' => 'CollateX',
1268 'file' => $cxfile,
1269 );
1270my $c = $t->collation;
1271
4e5a7b2c 1272is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1273 'n20', "Found correct common predecessor" );
4e5a7b2c 1274is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1275 '#END#', "Found correct common successor" );
1276
4e5a7b2c 1277is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1278 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1279is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1280 '#END#', "Found correct common successor for readings on same path" );
1281
1282=end testing
1283
1284=cut
1285
1286## Return the closest reading that is a predecessor of both the given readings.
1287sub common_predecessor {
1288 my $self = shift;
4e5a7b2c 1289 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1290 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1291}
1292
1293sub common_successor {
1294 my $self = shift;
4e5a7b2c 1295 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1296 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1297}
1298
1299sub common_in_path {
1300 my( $self, $r1, $r2, $dir ) = @_;
1301 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1302 $iter = $self->end->rank - $iter if $dir eq 'successors';
1303 my @candidates;
1304 my @last_checked = ( $r1, $r2 );
1305 my %all_seen;
1306 while( !@candidates ) {
1307 my @new_lc;
1308 foreach my $lc ( @last_checked ) {
1309 foreach my $p ( $lc->$dir ) {
1310 if( $all_seen{$p->id} ) {
1311 push( @candidates, $p );
1312 } else {
1313 $all_seen{$p->id} = 1;
1314 push( @new_lc, $p );
1315 }
1316 }
1317 }
1318 @last_checked = @new_lc;
1319 }
1320 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1321 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1322}
1323
dd3b58b0 1324no Moose;
1325__PACKAGE__->meta->make_immutable;
e867486f 1326
1327=head1 BUGS / TODO
1328
1329=over
1330
4e5a7b2c 1331=item * Get rid of $backup in reading_sequence
e867486f 1332
1333=back