make the rest of the tests work with the new Witness
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
df6d9812 12use XML::LibXML;
a344ffcf 13use XML::LibXML::XPathContext;
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
202ccb18 31 get_relationship => 'get_relationship',
ee801e17 32 del_relationship => 'del_relationship',
22222af9 33 },
34 writer => '_set_relations',
3a2ebbf4 35 );
dd3b58b0 36
3a2ebbf4 37has 'tradition' => (
38 is => 'ro',
d047cd52 39 isa => 'Text::Tradition',
8d9a1cd8 40 weak_ref => 1,
d047cd52 41 );
dd3b58b0 42
3a2ebbf4 43has 'readings' => (
44 isa => 'HashRef[Text::Tradition::Collation::Reading]',
45 traits => ['Hash'],
46 handles => {
47 reading => 'get',
48 _add_reading => 'set',
49 del_reading => 'delete',
50 has_reading => 'exists',
51 readings => 'values',
52 },
53 default => sub { {} },
54 );
910a0a6d 55
4a8828f0 56has 'wit_list_separator' => (
7854e12e 57 is => 'rw',
58 isa => 'Str',
59 default => ', ',
60 );
61
62has 'baselabel' => (
63 is => 'rw',
64 isa => 'Str',
65 default => 'base text',
66 );
4a8828f0 67
15d2d3df 68has 'linear' => (
69 is => 'rw',
70 isa => 'Bool',
71 default => 1,
72 );
c84275ff 73
ef9d481f 74has 'ac_label' => (
75 is => 'rw',
76 isa => 'Str',
77 default => ' (a.c.)',
78 );
3a2ebbf4 79
4e483aa5 80has 'wordsep' => (
81 is => 'rw',
82 isa => 'Str',
83 default => ' ',
84 );
85
3a2ebbf4 86has 'start' => (
87 is => 'ro',
88 isa => 'Text::Tradition::Collation::Reading',
89 writer => '_set_start',
90 weak_ref => 1,
91 );
92
93has 'end' => (
94 is => 'ro',
95 isa => 'Text::Tradition::Collation::Reading',
96 writer => '_set_end',
97 weak_ref => 1,
98 );
b365fbae 99
100has 'cached_svg' => (
101 is => 'rw',
102 isa => 'Str',
103 predicate => 'has_cached_svg',
104 clearer => 'wipe_svg',
105 );
1dd07bda 106
107has 'cached_table' => (
108 is => 'rw',
109 isa => 'HashRef',
110 predicate => 'has_cached_table',
111 clearer => 'wipe_table',
112 );
c1915ab9 113
114has '_graphcalc_done' => (
115 is => 'rw',
116 isa => 'Bool',
117 default => undef,
118 );
1f563ac3 119
4e5a7b2c 120=head1 NAME
121
122Text::Tradition::Collation - a software model for a text collation
123
124=head1 SYNOPSIS
125
126 use Text::Tradition;
127 my $t = Text::Tradition->new(
128 'name' => 'this is a text',
129 'input' => 'TEI',
130 'file' => '/path/to/tei_parallel_seg_file.xml' );
131
132 my $c = $t->collation;
133 my @readings = $c->readings;
134 my @paths = $c->paths;
135 my @relationships = $c->relationships;
136
137 my $svg_variant_graph = $t->collation->as_svg();
138
139=head1 DESCRIPTION
140
141Text::Tradition is a library for representation and analysis of collated
142texts, particularly medieval ones. The Collation is the central feature of
143a Tradition, where the text, its sequence of readings, and its relationships
144between readings are actually kept.
145
146=head1 CONSTRUCTOR
147
148=head2 new
149
150The constructor. Takes a hash or hashref of the following arguments:
151
152=over
153
154=item * tradition - The Text::Tradition object to which the collation
155belongs. Required.
156
157=item * linear - Whether the collation should be linear; that is, whether
158transposed readings should be treated as two linked readings rather than one,
159and therefore whether the collation graph is acyclic. Defaults to true.
160
4e5a7b2c 161=item * baselabel - The default label for the path taken by a base text
162(if any). Defaults to 'base text'.
163
164=item * wit_list_separator - The string to join a list of witnesses for
165purposes of making labels in display graphs. Defaults to ', '.
166
167=item * ac_label - The extra label to tack onto a witness sigil when
168representing another layer of path for the given witness - that is, when
169a text has more than one possible reading due to scribal corrections or
170the like. Defaults to ' (a.c.)'.
171
4e483aa5 172=item * wordsep - The string used to separate words in the original text.
173Defaults to ' '.
174
4e5a7b2c 175=back
176
177=head1 ACCESSORS
178
179=head2 tradition
180
181=head2 linear
182
4e5a7b2c 183=head2 wit_list_separator
184
185=head2 baselabel
186
187=head2 ac_label
188
4e483aa5 189=head2 wordsep
190
4e5a7b2c 191Simple accessors for collation attributes.
192
193=head2 start
194
195The meta-reading at the start of every witness path.
196
197=head2 end
198
199The meta-reading at the end of every witness path.
200
201=head2 readings
202
203Returns all Reading objects in the graph.
204
205=head2 reading( $id )
206
207Returns the Reading object corresponding to the given ID.
208
209=head2 add_reading( $reading_args )
210
211Adds a new reading object to the collation.
212See L<Text::Tradition::Collation::Reading> for the available arguments.
213
214=head2 del_reading( $object_or_id )
215
216Removes the given reading from the collation, implicitly removing its
217paths and relationships.
218
4e483aa5 219=head2 merge_readings( $main, $second, $concatenate, $with_str )
220
221Merges the $second reading into the $main one. If $concatenate is true, then
222the merged node will carry the text of both readings, concatenated with either
223$with_str (if specified) or a sensible default (the empty string if the
224appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 225
4e483aa5 226The first two arguments may be either readings or reading IDs.
4e5a7b2c 227
228=head2 has_reading( $id )
229
230Predicate to see whether a given reading ID is in the graph.
231
232=head2 reading_witnesses( $object_or_id )
233
234Returns a list of sigils whose witnesses contain the reading.
235
236=head2 paths
237
238Returns all reading paths within the document - that is, all edges in the
239collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
240
241=head2 add_path( $source, $target, $sigil )
242
243Links the given readings in the collation in sequence, under the given witness
244sigil. The readings may be specified by object or ID.
245
246=head2 del_path( $source, $target, $sigil )
247
248Links the given readings in the collation in sequence, under the given witness
249sigil. The readings may be specified by object or ID.
250
251=head2 has_path( $source, $target );
252
253Returns true if the two readings are linked in sequence in any witness.
254The readings may be specified by object or ID.
255
256=head2 relationships
257
258Returns all Relationship objects in the collation.
259
260=head2 add_relationship( $reading, $other_reading, $options )
261
262Adds a new relationship of the type given in $options between the two readings,
263which may be specified by object or ID. Returns a value of ( $status, @vectors)
264where $status is true on success, and @vectors is a list of relationship edges
265that were ultimately added.
266See L<Text::Tradition::Collation::Relationship> for the available options.
267
268=cut
dd3b58b0 269
d047cd52 270sub BUILD {
3a2ebbf4 271 my $self = shift;
22222af9 272 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 273 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
274 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 275}
784877d9 276
3a2ebbf4 277### Reading construct/destruct functions
278
279sub add_reading {
280 my( $self, $reading ) = @_;
281 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
282 my %args = %$reading;
283 $reading = Text::Tradition::Collation::Reading->new(
284 'collation' => $self,
285 %args );
286 }
287 # First check to see if a reading with this ID exists.
288 if( $self->reading( $reading->id ) ) {
63778331 289 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 290 }
c1915ab9 291 $self->_graphcalc_done(0);
3a2ebbf4 292 $self->_add_reading( $reading->id => $reading );
293 # Once the reading has been added, put it in both graphs.
294 $self->sequence->add_vertex( $reading->id );
22222af9 295 $self->relations->add_reading( $reading->id );
3a2ebbf4 296 return $reading;
eca16057 297};
298
3a2ebbf4 299around del_reading => sub {
300 my $orig = shift;
301 my $self = shift;
302 my $arg = shift;
303
304 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
305 $arg = $arg->id;
306 }
3a2ebbf4 307 # Remove the reading from the graphs.
c1915ab9 308 $self->_graphcalc_done(0);
4e483aa5 309 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 310 $self->sequence->delete_vertex( $arg );
22222af9 311 $self->relations->delete_reading( $arg );
3a2ebbf4 312
313 # Carry on.
314 $self->$orig( $arg );
315};
7854e12e 316
4e483aa5 317=begin testing
318
319use Text::Tradition;
320
321my $cxfile = 't/data/Collatex-16.xml';
322my $t = Text::Tradition->new(
323 'name' => 'inline',
324 'input' => 'CollateX',
325 'file' => $cxfile,
326 );
327my $c = $t->collation;
328
329my $rno = scalar $c->readings;
330# Split n21 for testing purposes
331my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
332my $old_r = $c->reading( 'n21' );
333$old_r->alter_text( 'to' );
334$c->del_path( 'n20', 'n21', 'A' );
335$c->add_path( 'n20', 'n21p0', 'A' );
336$c->add_path( 'n21p0', 'n21', 'A' );
337$c->flatten_ranks();
338ok( $c->reading( 'n21p0' ), "New reading exists" );
339is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
340
679f17e1 341# Combine n3 and n4 ( with his )
4e483aa5 342$c->merge_readings( 'n3', 'n4', 1 );
343ok( !$c->reading('n4'), "Reading n4 is gone" );
344is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
345
679f17e1 346# Collapse n9 and n10 ( rood / root )
347$c->merge_readings( 'n9', 'n10' );
348ok( !$c->reading('n10'), "Reading n10 is gone" );
349is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 350
351# Combine n21 and n21p0
352my $remaining = $c->reading('n21');
353$remaining ||= $c->reading('n22'); # one of these should still exist
354$c->merge_readings( 'n21p0', $remaining, 1 );
355ok( !$c->reading('n21'), "Reading $remaining is gone" );
356is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
357
358=end testing
359
360=cut
7854e12e 361
3a2ebbf4 362sub merge_readings {
363 my $self = shift;
364
365 # We only need the IDs for adding paths to the graph, not the reading
366 # objects themselves.
4e483aa5 367 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 368 $self->_graphcalc_done(0);
3a2ebbf4 369
370 # The kept reading should inherit the paths and the relationships
371 # of the deleted reading.
372 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
373 my @vector = ( $kept );
374 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
375 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 376 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 377 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
378 $self->sequence->add_edge( @vector );
379 my $fwits = $self->sequence->get_edge_attributes( @vector );
380 @wits{keys %$fwits} = values %$fwits;
381 $self->sequence->set_edge_attributes( @vector, \%wits );
382 }
22222af9 383 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 384
385 # Do the deletion deed.
4e483aa5 386 if( $combine ) {
49d4f2ac 387 my $kept_obj = $self->reading( $kept );
4e483aa5 388 my $del_obj = $self->reading( $deleted );
389 my $joinstr = $combine_char;
390 unless( defined $joinstr ) {
391 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
392 $joinstr = $self->wordsep unless defined $joinstr;
393 }
394 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 395 }
3a2ebbf4 396 $self->del_reading( $deleted );
397}
7854e12e 398
3265b0ce 399
3a2ebbf4 400# Helper function for manipulating the graph.
401sub _stringify_args {
4e483aa5 402 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 403 $first = $first->id
404 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
405 $second = $second->id
406 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 407 return( $first, $second, @args );
3a2ebbf4 408}
df6d9812 409
4e5a7b2c 410# Helper function for manipulating the graph.
411sub _objectify_args {
412 my( $self, $first, $second, $arg ) = @_;
413 $first = $self->reading( $first )
414 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
415 $second = $self->reading( $second )
416 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
417 return( $first, $second, $arg );
418}
3a2ebbf4 419### Path logic
420
421sub add_path {
422 my $self = shift;
423
424 # We only need the IDs for adding paths to the graph, not the reading
425 # objects themselves.
426 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
427
c1915ab9 428 $self->_graphcalc_done(0);
3a2ebbf4 429 # Connect the readings
430 $self->sequence->add_edge( $source, $target );
431 # Note the witness in question
432 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 433};
434
3a2ebbf4 435sub del_path {
436 my $self = shift;
49d4f2ac 437 my @args;
438 if( ref( $_[0] ) eq 'ARRAY' ) {
439 my $e = shift @_;
440 @args = ( @$e, @_ );
441 } else {
442 @args = @_;
443 }
3a2ebbf4 444
445 # We only need the IDs for adding paths to the graph, not the reading
446 # objects themselves.
49d4f2ac 447 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 448
c1915ab9 449 $self->_graphcalc_done(0);
3a2ebbf4 450 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 451 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 452 }
453 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
454 $self->sequence->delete_edge( $source, $target );
455 }
784877d9 456}
457
3a2ebbf4 458
15d2d3df 459# Extra graph-alike utility
460sub has_path {
3a2ebbf4 461 my $self = shift;
462 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
463 return undef unless $self->sequence->has_edge( $source, $target );
464 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 465}
466
4e5a7b2c 467=head2 clear_witness( @sigil_list )
3a2ebbf4 468
4e5a7b2c 469Clear the given witnesses out of the collation entirely, removing references
470to them in paths, and removing readings that belong only to them. Should only
471be called via $tradition->del_witness.
3a2ebbf4 472
473=cut
474
4e5a7b2c 475sub clear_witness {
476 my( $self, @sigils ) = @_;
477
c1915ab9 478 $self->_graphcalc_done(0);
4e5a7b2c 479 # Clear the witness(es) out of the paths
480 foreach my $e ( $self->paths ) {
481 foreach my $sig ( @sigils ) {
482 $self->del_path( $e, $sig );
483 }
484 }
485
486 # Clear out the newly unused readings
487 foreach my $r ( $self->readings ) {
488 unless( $self->reading_witnesses( $r ) ) {
489 $self->del_reading( $r );
490 }
491 }
492}
3a2ebbf4 493
494sub add_relationship {
495 my $self = shift;
22222af9 496 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 497 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 498 $self->reading( $source ), $target, $self->reading( $target ), $opts );
c1915ab9 499 $self->_graphcalc_done(0);
63778331 500 return @vectors;
22222af9 501}
ef9d481f 502
ca6e6095 503around qw/ get_relationship del_relationship / => sub {
504 my $orig = shift;
505 my $self = shift;
506 my @args = @_;
507 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
508 @args = @{$_[0]};
509 }
510 my( $source, $target ) = $self->_stringify_args( @args );
511 $self->$orig( $source, $target );
512};
513
22222af9 514=head2 reading_witnesses( $reading )
910a0a6d 515
22222af9 516Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 517
22222af9 518=cut
1d310495 519
1d310495 520sub reading_witnesses {
521 my( $self, $reading ) = @_;
522 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 523 # arbitrarily chosen "incoming". Thus, special-case the start node.
524 if( $reading eq $self->start ) {
525 return map { $_->sigil } $self->tradition->witnesses;
526 }
1d310495 527 my %all_witnesses;
528 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
529 my $wits = $self->sequence->get_edge_attributes( @$e );
530 @all_witnesses{ keys %$wits } = 1;
531 }
c12bb878 532 my $acstr = $self->ac_label;
533 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
534 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
535 }
1d310495 536 return keys %all_witnesses;
910a0a6d 537}
538
4e5a7b2c 539=head1 OUTPUT METHODS
8e1394aa 540
0ecb975c 541=head2 as_svg( \%options )
8e1394aa 542
0068967c 543Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 544See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 545
546=cut
547
548sub as_svg {
0ecb975c 549 my( $self, $opts ) = @_;
bfcbcecb 550 throw( "Need GraphViz installed to output SVG" )
551 unless File::Which::which( 'dot' );
e247aad1 552 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
c1915ab9 553 $self->calculate_ranks() unless $self->_graphcalc_done;
e247aad1 554 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 555 my @cmd = qw/dot -Tsvg/;
556 my( $svg, $err );
557 my $dotfile = File::Temp->new();
558 ## USE FOR DEBUGGING
559 # $dotfile->unlink_on_destroy(0);
560 binmode $dotfile, ':utf8';
561 print $dotfile $self->as_dot( $opts );
562 push( @cmd, $dotfile->filename );
563 run( \@cmd, ">", binary(), \$svg );
e247aad1 564 $svg = decode_utf8( $svg );
565 $self->cached_svg( $svg ) unless $want_subgraph;
566 return $svg;
567 } else {
568 return $self->cached_svg;
b365fbae 569 }
8e1394aa 570}
571
b22576c6 572
0ecb975c 573=head2 as_dot( \%options )
b22576c6 574
0ecb975c 575Returns a string that is the collation graph expressed in dot
576(i.e. GraphViz) format. Options include:
b22576c6 577
0ecb975c 578=over 4
b22576c6 579
0ecb975c 580=item * from
b22576c6 581
0ecb975c 582=item * to
df6d9812 583
0ecb975c 584=item * color_common
585
586=back
df6d9812 587
588=cut
589
590sub as_dot {
0ecb975c 591 my( $self, $opts ) = @_;
592 my $startrank = $opts->{'from'} if $opts;
593 my $endrank = $opts->{'to'} if $opts;
594 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 595 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
596 && $self->end->rank > 100;
6648ee3d 597 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 598
b22576c6 599 # Check the arguments
600 if( $startrank ) {
601 return if $endrank && $startrank > $endrank;
602 return if $startrank > $self->end->rank;
603 }
604 if( defined $endrank ) {
605 return if $endrank < 0;
f1b3b33a 606 $endrank = undef if $endrank == $self->end->rank;
b22576c6 607 }
608
67da8d6c 609 my $graph_name = $self->tradition->name;
610 $graph_name =~ s/[^\w\s]//g;
611 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 612
613 my %graph_attrs = (
614 'rankdir' => 'LR',
615 'bgcolor' => 'none',
616 );
617 my %node_attrs = (
b8990398 618 'fontsize' => 14,
f13b5582 619 'fillcolor' => 'white',
620 'style' => 'filled',
621 'shape' => 'ellipse'
622 );
623 my %edge_attrs = (
624 'arrowhead' => 'open',
625 'color' => '#000000',
626 'fontcolor' => '#000000',
627 );
628
67da8d6c 629 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 630 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
631 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 632
b22576c6 633 # Output substitute start/end readings if necessary
634 if( $startrank ) {
635 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
636 }
637 if( $endrank ) {
638 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
639 }
b365fbae 640 if( $STRAIGHTENHACK ) {
641 ## HACK part 1
6648ee3d 642 my $startlabel = $startrank ? 'SUBSTART' : 'START';
643 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 644 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
645 }
b22576c6 646 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 647 # Sort the readings by rank if we have ranks; this speeds layout.
648 my @all_readings = $self->end->has_rank
649 ? sort { $a->rank <=> $b->rank } $self->readings
650 : $self->readings;
4633f9e4 651 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 652 foreach my $reading ( @all_readings ) {
b22576c6 653 # Only output readings within our rank range.
654 next if $startrank && $reading->rank < $startrank;
655 next if $endrank && $reading->rank > $endrank;
656 $used{$reading->id} = 1;
910a0a6d 657 # Need not output nodes without separate labels
3a2ebbf4 658 next if $reading->id eq $reading->text;
d4b75f44 659 my $rattrs;
30f0df34 660 my $label = $reading->text;
629e27b0 661 $label .= '-' if $reading->join_next;
662 $label = "-$label" if $reading->join_prior;
8f9cab7b 663 $label =~ s/\"/\\\"/g;
d4b75f44 664 $rattrs->{'label'} = $label;
0ecb975c 665 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 666 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 667 }
3a2ebbf4 668
30ddc24c 669 # Add the real edges. Need to weight one edge per rank jump, in a
670 # continuous line.
b365fbae 671 # my $weighted = $self->_add_edge_weights;
b22576c6 672 my @edges = $self->paths;
3bdec618 673 my( %substart, %subend );
b22576c6 674 foreach my $edge ( @edges ) {
675 # Do we need to output this edge?
508fd430 676 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 677 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 678 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 679
b22576c6 680 # Account for the rank gap if necessary
30ddc24c 681 my $rank0 = $self->reading( $edge->[0] )->rank
682 if $self->reading( $edge->[0] )->has_rank;
683 my $rank1 = $self->reading( $edge->[1] )->rank
684 if $self->reading( $edge->[1] )->has_rank;
685 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
686 $variables->{'minlen'} = $rank1 - $rank0;
687 }
688
689 # Add the calculated edge weights
b365fbae 690 # if( exists $weighted->{$edge->[0]}
e247aad1 691 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
692 # # $variables->{'color'} = 'red';
693 # $variables->{'weight'} = 3.0;
694 # }
30ddc24c 695
508fd430 696 # EXPERIMENTAL: make edge width reflect no. of witnesses
697 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
698 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
699
f13b5582 700 my $varopts = _dot_attr_string( $variables );
701 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
702 $edge->[0], $edge->[1], $varopts );
3bdec618 703 } elsif( $used{$edge->[0]} ) {
704 $subend{$edge->[0]} = 1;
705 } elsif( $used{$edge->[1]} ) {
706 $substart{$edge->[1]} = 1;
b22576c6 707 }
df6d9812 708 }
3bdec618 709 # Add substitute start and end edges if necessary
710 foreach my $node ( keys %substart ) {
027d819c 711 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 712 my $variables = { %edge_attrs, 'label' => $witstr };
713 my $varopts = _dot_attr_string( $variables );
714 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 715 }
716 foreach my $node ( keys %subend ) {
027d819c 717 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 718 my $variables = { %edge_attrs, 'label' => $witstr };
719 my $varopts = _dot_attr_string( $variables );
720 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 721 }
b365fbae 722 # HACK part 2
723 if( $STRAIGHTENHACK ) {
6648ee3d 724 my $endlabel = $endrank ? 'SUBEND' : 'END';
725 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 726 }
30ddc24c 727
df6d9812 728 $dot .= "}\n";
729 return $dot;
730}
731
f13b5582 732sub _dot_attr_string {
733 my( $hash ) = @_;
734 my @attrs;
735 foreach my $k ( sort keys %$hash ) {
736 my $v = $hash->{$k};
737 push( @attrs, $k.'="'.$v.'"' );
738 }
739 return( '[ ' . join( ', ', @attrs ) . ' ]' );
740}
741
30ddc24c 742sub _add_edge_weights {
743 my $self = shift;
744 # Walk the graph from START to END, choosing the successor node with
745 # the largest number of witness paths each time.
746 my $weighted = {};
747 my $curr = $self->start->id;
008fc8a6 748 my $ranked = $self->end->has_rank;
30ddc24c 749 while( $curr ne $self->end->id ) {
008fc8a6 750 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 751 my @succ = sort { $self->path_witnesses( $curr, $a )
752 <=> $self->path_witnesses( $curr, $b ) }
753 $self->sequence->successors( $curr );
754 my $next = pop @succ;
008fc8a6 755 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 756 # Try to avoid lacunae in the weighted path.
008fc8a6 757 while( @succ &&
758 ( $self->reading( $next )->is_lacuna ||
759 $nextrank - $rank > 1 ) ){
30ddc24c 760 $next = pop @succ;
761 }
762 $weighted->{$curr} = $next;
763 $curr = $next;
764 }
765 return $weighted;
766}
767
027d819c 768=head2 path_witnesses( $edge )
769
770Returns the list of sigils whose witnesses are associated with the given edge.
771The edge can be passed as either an array or an arrayref of ( $source, $target ).
772
773=cut
774
3a2ebbf4 775sub path_witnesses {
776 my( $self, @edge ) = @_;
777 # If edge is an arrayref, cope.
778 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
779 my $e = shift @edge;
780 @edge = @$e;
781 }
782 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 783 return @wits;
3a2ebbf4 784}
785
7f9f05e8 786# Helper function. Make a display label for the given witnesses, showing a.c.
787# witnesses only where the main witness is not also in the list.
027d819c 788sub _path_display_label {
508fd430 789 my $self = shift;
7f9f05e8 790 my %wits;
791 map { $wits{$_} = 1 } @_;
792
793 # If an a.c. wit is listed, remove it if the main wit is also listed.
794 # Otherwise keep it for explicit listing.
795 my $aclabel = $self->ac_label;
796 my @disp_ac;
797 foreach my $w ( sort keys %wits ) {
798 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
799 if( exists $wits{$1} ) {
800 delete $wits{$w};
801 } else {
802 push( @disp_ac, $w );
803 }
804 }
805 }
806
807 # See if we are in a majority situation.
8f9cab7b 808 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 809 if( scalar keys %wits > $maj ) {
810 unshift( @disp_ac, 'majority' );
811 return join( ', ', @disp_ac );
8f9cab7b 812 } else {
7f9f05e8 813 return join( ', ', sort keys %wits );
8f9cab7b 814 }
815}
1dd07bda 816
bf6e338d 817=head2 readings_at_rank( $rank )
1dd07bda 818
bf6e338d 819Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 820
821=cut
822
bf6e338d 823sub readings_at_rank {
1dd07bda 824 my( $self, $rank ) = @_;
bf6e338d 825 my $table = $self->alignment_table;
826 # Table rank is real rank - 1.
827 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
828 my %readings;
829 foreach my $e ( @elements ) {
830 next unless ref( $e ) eq 'HASH';
831 next unless exists $e->{'t'};
832 $readings{$e->{'t'}->id} = $e->{'t'};
833 }
834 return values %readings;
1dd07bda 835}
8f9cab7b 836
4e5a7b2c 837=head2 as_graphml
8e1394aa 838
4e5a7b2c 839Returns a GraphML representation of the collation. The GraphML will contain
840two graphs. The first expresses the attributes of the readings and the witness
841paths that link them; the second expresses the relationships that link the
842readings. This is the native transfer format for a tradition.
8e1394aa 843
56eefa04 844=begin testing
845
846use Text::Tradition;
847
848my $READINGS = 311;
849my $PATHS = 361;
850
851my $datafile = 't/data/florilegium_tei_ps.xml';
852my $tradition = Text::Tradition->new( 'input' => 'TEI',
853 'name' => 'test0',
854 'file' => $datafile,
855 'linear' => 1 );
856
857ok( $tradition, "Got a tradition object" );
858is( scalar $tradition->witnesses, 13, "Found all witnesses" );
859ok( $tradition->collation, "Tradition has a collation" );
860
861my $c = $tradition->collation;
862is( scalar $c->readings, $READINGS, "Collation has all readings" );
863is( scalar $c->paths, $PATHS, "Collation has all paths" );
864is( scalar $c->relationships, 0, "Collation has all relationships" );
865
866# Add a few relationships
867$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
868$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
869$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
870
871# Now write it to GraphML and parse it again.
872
873my $graphml = $c->as_graphml;
874my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
875is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
876is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
877is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
878
879=end testing
880
8e1394aa 881=cut
882
883sub as_graphml {
3a2ebbf4 884 my( $self ) = @_;
3d14b48e 885 $self->calculate_ranks unless $self->_graphcalc_done;
886
8e1394aa 887 # Some namespaces
888 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
889 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
890 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 891 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 892
893 # Create the document and root node
894 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
895 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
896 $graphml->setDocumentElement( $root );
897 $root->setNamespace( $xsi_ns, 'xsi', 0 );
898 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 899
900 # List of attribute types to save on our objects and their corresponding
901 # GraphML types
902 my %save_types = (
903 'Str' => 'string',
904 'Int' => 'int',
905 'Bool' => 'boolean',
906 'RelationshipType' => 'string',
907 'RelationshipScope' => 'string',
908 );
909
910 # List of attribute names *not* to save on our objects.
911 # We will also not save any attribute beginning with _.
912 my %skipsave;
913 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 914
bbd064a9 915 # Add the data keys for the graph. Include an extra key 'version' for the
916 # GraphML output version.
e309421a 917 my %graph_data_keys;
918 my $gdi = 0;
bbd064a9 919 my %graph_attributes = ( 'version' => 'string' );
920 # Graph attributes include those of Tradition and those of Collation.
921 my %gattr_from;
922 my $tmeta = $self->tradition->meta;
923 my $cmeta = $self->meta;
924 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
925 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
926 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
927 next if $attr->name =~ /^_/;
928 next if $skipsave{$attr->name};
929 next unless $save_types{$attr->type_constraint->name};
930 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
931 }
932
933 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 934 $graph_data_keys{$datum} = 'dg'.$gdi++;
935 my $key = $root->addNewChild( $graphml_ns, 'key' );
936 $key->setAttribute( 'attr.name', $datum );
bbd064a9 937 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 938 $key->setAttribute( 'for', 'graph' );
939 $key->setAttribute( 'id', $graph_data_keys{$datum} );
940 }
f6066bac 941
bbd064a9 942 # Add the data keys for reading nodes
943 my %reading_attributes;
944 my $rmeta = Text::Tradition::Collation::Reading->meta;
945 foreach my $attr( $rmeta->get_all_attributes ) {
946 next if $attr->name =~ /^_/;
947 next if $skipsave{$attr->name};
948 next unless $save_types{$attr->type_constraint->name};
949 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
950 }
ef9d481f 951 my %node_data_keys;
952 my $ndi = 0;
bbd064a9 953 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 954 $node_data_keys{$datum} = 'dn'.$ndi++;
955 my $key = $root->addNewChild( $graphml_ns, 'key' );
956 $key->setAttribute( 'attr.name', $datum );
bbd064a9 957 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 958 $key->setAttribute( 'for', 'node' );
959 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 960 }
961
bbd064a9 962 # Add the data keys for edges, that is, paths and relationships. Path
963 # data does not come from a Moose class so is here manually.
ef9d481f 964 my $edi = 0;
965 my %edge_data_keys;
bbd064a9 966 my %edge_attributes = (
3a2ebbf4 967 witness => 'string', # ID/label for a path
3a2ebbf4 968 extra => 'boolean', # Path key
3a2ebbf4 969 );
bbd064a9 970 my @path_attributes = keys %edge_attributes; # track our manual additions
971 my $pmeta = Text::Tradition::Collation::Relationship->meta;
972 foreach my $attr( $pmeta->get_all_attributes ) {
973 next if $attr->name =~ /^_/;
974 next if $skipsave{$attr->name};
975 next unless $save_types{$attr->type_constraint->name};
976 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
977 }
978 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 979 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 980 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 981 $key->setAttribute( 'attr.name', $datum );
bbd064a9 982 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 983 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 984 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 985 }
3a2ebbf4 986
22222af9 987 # Add the collation graph itself
2c669bca 988 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
989 $sgraph->setAttribute( 'edgedefault', 'directed' );
990 $sgraph->setAttribute( 'id', $self->tradition->name );
991 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
992 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
993 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
994 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
995 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 996
e309421a 997 # Collation attribute data
bbd064a9 998 foreach my $datum ( keys %graph_attributes ) {
999 my $value;
1000 if( $datum eq 'version' ) {
1001 $value = '3.1';
1002 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1003 $value = $self->tradition->$datum;
1004 } else {
1005 $value = $self->$datum;
1006 }
2c669bca 1007 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1008 }
8e1394aa 1009
1010 my $node_ctr = 0;
1011 my %node_hash;
22222af9 1012 # Add our readings to the graph
3a2ebbf4 1013 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 1014 # Add to the main graph
1015 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1016 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1017 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1018 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1019 foreach my $d ( keys %reading_attributes ) {
255875b8 1020 my $nval = $n->$d;
1021 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1022 if defined $nval;
1023 }
b15511bf 1024 }
1025
2c669bca 1026 # Add the path edges to the sequence graph
df6d9812 1027 my $edge_ctr = 0;
3a2ebbf4 1028 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1029 # We add an edge in the graphml for every witness in $e.
508fd430 1030 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 1031 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1032 $node_hash{ $e->[0] },
1033 $node_hash{ $e->[1] } );
2c669bca 1034 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1035 $edge_el->setAttribute( 'source', $from );
1036 $edge_el->setAttribute( 'target', $to );
1037 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1038
1039 # It's a witness path, so add the witness
1040 my $base = $wit;
1041 my $key = $edge_data_keys{'witness'};
1042 # Is this an ante-corr witness?
1043 my $aclabel = $self->ac_label;
1044 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1045 # Keep the base witness
1046 $base = $1;
1047 # ...and record that this is an 'extra' reading path
1048 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1049 }
1050 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1051 }
1052 }
1053
22222af9 1054 # Add the relationship graph to the XML
bbd064a9 1055 map { delete $edge_data_keys{$_} } @path_attributes;
027d819c 1056 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 1057 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1058
94c00c71 1059 # Save and return the thing
1060 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1061 return $result;
df6d9812 1062}
1063
b15511bf 1064sub _add_graphml_data {
1065 my( $el, $key, $value ) = @_;
b15511bf 1066 return unless defined $value;
c9bf3dbf 1067 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1068 $data_el->setAttribute( 'key', $key );
1069 $data_el->appendText( $value );
8e1394aa 1070}
1071
4e5a7b2c 1072=head2 as_csv
910a0a6d 1073
1074Returns a CSV alignment table representation of the collation graph, one
2c669bca 1075row per witness (or witness uncorrected.)
910a0a6d 1076
1077=cut
1078
1079sub as_csv {
3a2ebbf4 1080 my( $self ) = @_;
1dd07bda 1081 my $table = $self->alignment_table;
82fa4d57 1082 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1083 my @result;
2c669bca 1084 # Make the header row
1085 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1086 push( @result, decode_utf8( $csv->string ) );
1087 # Make the rest of the rows
1088 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1089 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1090 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1091 $csv->combine( @row );
910a0a6d 1092 push( @result, decode_utf8( $csv->string ) );
1093 }
3a2ebbf4 1094 return join( "\n", @result );
910a0a6d 1095}
1096
1dd07bda 1097=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1098
566f4595 1099Return a reference to an alignment table, in a slightly enhanced CollateX
1100format which looks like this:
1101
1102 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1103 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1104 { witness => "SIG2",
4e5a7b2c 1105 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1106 ... ],
1107 length => TEXTLEN };
1108
1109If $use_refs is set to 1, the reading object is returned in the table
1110instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1111
1112If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1113keys have a true hash value will be included.
2c669bca 1114
1115=cut
9f3ba6f7 1116
1dd07bda 1117sub alignment_table {
1118 my( $self ) = @_;
c1915ab9 1119 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1120 return $self->cached_table if $self->has_cached_table;
1121
0ecb975c 1122 # Make sure we can do this
1123 throw( "Need a linear graph in order to make an alignment table" )
1124 unless $self->linear;
1125 $self->calculate_ranks unless $self->end->has_rank;
1126
2c669bca 1127 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1128 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1129 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1130 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1131 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1132 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1133 push( @{$table->{'alignment'}},
1134 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1135 if( $wit->is_layered ) {
1136 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1137 $wit->sigil.$self->ac_label );
1dd07bda 1138 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1139 push( @{$table->{'alignment'}},
1140 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1141 }
1142 }
1dd07bda 1143 $self->cached_table( $table );
1144 return $table;
910a0a6d 1145}
1146
1147sub _make_witness_row {
1dd07bda 1148 my( $path, $positions ) = @_;
910a0a6d 1149 my %char_hash;
1150 map { $char_hash{$_} = undef } @$positions;
2c669bca 1151 my $debug = 0;
910a0a6d 1152 foreach my $rdg ( @$path ) {
eca16057 1153 my $rtext = $rdg->text;
1154 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1155 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1156 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1157 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1158 }
1159 my @row = map { $char_hash{$_} } @$positions;
eca16057 1160 # Fill in lacuna markers for undef spots in the row
1161 my $last_el = shift @row;
1162 my @filled_row = ( $last_el );
1163 foreach my $el ( @row ) {
0e476982 1164 # If we are using node reference, make the lacuna node appear many times
1165 # in the table. If not, use the lacuna tag.
1dd07bda 1166 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1167 $el = $last_el;
eca16057 1168 }
1169 push( @filled_row, $el );
1170 $last_el = $el;
1171 }
1172 return @filled_row;
910a0a6d 1173}
1174
4e5a7b2c 1175=head1 NAVIGATION METHODS
910a0a6d 1176
4e5a7b2c 1177=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1178
1179Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1180with $last, for the witness given in $sigil. If a $backup sigil is
1181specified (e.g. when walking a layered witness), it will be used wherever
1182no $sigil path exists. If there is a base text reading, that will be
1183used wherever no path exists for $sigil or $backup.
e2902068 1184
1185=cut
1186
910a0a6d 1187# TODO Think about returning some lazy-eval iterator.
b0b4421a 1188# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1189
e2902068 1190sub reading_sequence {
861c3e27 1191 my( $self, $start, $end, $witness ) = @_;
e2902068 1192
930ff666 1193 $witness = $self->baselabel unless $witness;
e2902068 1194 my @readings = ( $start );
1195 my %seen;
1196 my $n = $start;
3a2ebbf4 1197 while( $n && $n->id ne $end->id ) {
1198 if( exists( $seen{$n->id} ) ) {
63778331 1199 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1200 }
3a2ebbf4 1201 $seen{$n->id} = 1;
910a0a6d 1202
861c3e27 1203 my $next = $self->next_reading( $n, $witness );
44771cf2 1204 unless( $next ) {
63778331 1205 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1206 }
910a0a6d 1207 push( @readings, $next );
1208 $n = $next;
e2902068 1209 }
1210 # Check that the last reading is our end reading.
1211 my $last = $readings[$#readings];
63778331 1212 throw( "Last reading found from " . $start->text .
1213 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1214 unless $last->id eq $end->id;
e2902068 1215
1216 return @readings;
1217}
1218
4e5a7b2c 1219=head2 next_reading( $reading, $sigil );
8e1394aa 1220
4a8828f0 1221Returns the reading that follows the given reading along the given witness
930ff666 1222path.
8e1394aa 1223
1224=cut
1225
4a8828f0 1226sub next_reading {
e2902068 1227 # Return the successor via the corresponding path.
8e1394aa 1228 my $self = shift;
3a2ebbf4 1229 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1230 return undef unless $answer;
3a2ebbf4 1231 return $self->reading( $answer );
8e1394aa 1232}
1233
4e5a7b2c 1234=head2 prior_reading( $reading, $sigil )
8e1394aa 1235
4a8828f0 1236Returns the reading that precedes the given reading along the given witness
930ff666 1237path.
8e1394aa 1238
1239=cut
1240
4a8828f0 1241sub prior_reading {
e2902068 1242 # Return the predecessor via the corresponding path.
8e1394aa 1243 my $self = shift;
3a2ebbf4 1244 my $answer = $self->_find_linked_reading( 'prior', @_ );
1245 return $self->reading( $answer );
8e1394aa 1246}
1247
4a8828f0 1248sub _find_linked_reading {
861c3e27 1249 my( $self, $direction, $node, $path ) = @_;
1250
1251 # Get a backup if we are dealing with a layered witness
1252 my $alt_path;
1253 my $aclabel = $self->ac_label;
1254 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1255 $alt_path = $1;
1256 }
1257
e2902068 1258 my @linked_paths = $direction eq 'next'
3a2ebbf4 1259 ? $self->sequence->edges_from( $node )
1260 : $self->sequence->edges_to( $node );
e2902068 1261 return undef unless scalar( @linked_paths );
8e1394aa 1262
e2902068 1263 # We have to find the linked path that contains all of the
1264 # witnesses supplied in $path.
1265 my( @path_wits, @alt_path_wits );
4e5a7b2c 1266 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1267 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1268 my $base_le;
1269 my $alt_le;
1270 foreach my $le ( @linked_paths ) {
3a2ebbf4 1271 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1272 $base_le = $le;
910a0a6d 1273 }
508fd430 1274 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1275 if( _is_within( \@path_wits, \@le_wits ) ) {
1276 # This is the right path.
1277 return $direction eq 'next' ? $le->[1] : $le->[0];
1278 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1279 $alt_le = $le;
1280 }
8e1394aa 1281 }
e2902068 1282 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1283 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1284 if $alt_le;
e2902068 1285
1286 # Got this far? Return the base path if it exists.
3a2ebbf4 1287 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1288 if $base_le;
e2902068 1289
1290 # Got this far? We have no appropriate path.
2c669bca 1291 warn "Could not find $direction node from " . $node->id
910a0a6d 1292 . " along path $path";
8e1394aa 1293 return undef;
1294}
1295
4a8828f0 1296# Some set logic.
1297sub _is_within {
1298 my( $set1, $set2 ) = @_;
7854e12e 1299 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1300 foreach my $el ( @$set1 ) {
910a0a6d 1301 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1302 }
1303 return $ret;
1304}
1305
4e5a7b2c 1306# Return the string that joins together a list of witnesses for
1307# display on a single path.
1308sub _witnesses_of_label {
1309 my( $self, $label ) = @_;
1310 my $regex = $self->wit_list_separator;
1311 my @answer = split( /\Q$regex\E/, $label );
1312 return @answer;
b0b4421a 1313}
1314
d4b75f44 1315=head2 common_readings
1316
1317Returns the list of common readings in the graph (i.e. those readings that are
1318shared by all non-lacunose witnesses.)
1319
1320=cut
1321
1322sub common_readings {
1323 my $self = shift;
1324 my @common = grep { $_->is_common } $self->readings;
1325 return @common;
1326}
1327
fae52efd 1328=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1329
1330Returns the text of a witness (plus its backup, if we are using a layer)
1331as stored in the collation. The text is returned as a string, where the
1332individual readings are joined with spaces and the meta-readings (e.g.
1333lacunae) are omitted. Optional specification of $start and $end allows
1334the generation of a subset of the witness text.
4e5a7b2c 1335
b0b4421a 1336=cut
1337
1338sub path_text {
861c3e27 1339 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1340 $start = $self->start unless $start;
1341 $end = $self->end unless $end;
861c3e27 1342 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1343 my $pathtext = '';
1344 my $last;
1345 foreach my $r ( @path ) {
1346 if( $r->join_prior || !$last || $last->join_next ) {
1347 $pathtext .= $r->text;
1348 } else {
1349 $pathtext .= ' ' . $r->text;
1350 }
1351 $last = $r;
1352 }
1353 return $pathtext;
b0b4421a 1354}
4e5a7b2c 1355
1356=head1 INITIALIZATION METHODS
1357
1358These are mostly for use by parsers.
1359
1360=head2 make_witness_path( $witness )
1361
1362Link the array of readings contained in $witness->path (and in
1363$witness->uncorrected_path if it exists) into collation paths.
1364Clear out the arrays when finished.
de51424a 1365
4e5a7b2c 1366=head2 make_witness_paths
1367
1368Call make_witness_path for all witnesses in the tradition.
1369
1370=cut
930ff666 1371
7e450e44 1372# For use when a collation is constructed from a base text and an apparatus.
1373# We have the sequences of readings and just need to add path edges.
1f7aa795 1374# When we are done, clear out the witness path attributes, as they are no
1375# longer needed.
1376# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1377
6a222840 1378sub make_witness_paths {
1379 my( $self ) = @_;
910a0a6d 1380 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1381 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1382 $self->make_witness_path( $wit );
7854e12e 1383 }
7854e12e 1384}
1385
6a222840 1386sub make_witness_path {
7854e12e 1387 my( $self, $wit ) = @_;
1388 my @chain = @{$wit->path};
15d2d3df 1389 my $sig = $wit->sigil;
fae52efd 1390 # Add start and end if necessary
1391 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1392 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1393 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1394 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1395 }
1f7aa795 1396 if( $wit->is_layered ) {
d9e873d0 1397 @chain = @{$wit->uncorrected_path};
fae52efd 1398 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1399 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1400 foreach my $idx( 0 .. $#chain-1 ) {
1401 my $source = $chain[$idx];
1402 my $target = $chain[$idx+1];
1403 $self->add_path( $source, $target, $sig.$self->ac_label )
1404 unless $self->has_path( $source, $target, $sig );
1405 }
15d2d3df 1406 }
1f7aa795 1407 $wit->clear_path;
1408 $wit->clear_uncorrected_path;
e2902068 1409}
1410
4e5a7b2c 1411=head2 calculate_ranks
1412
1413Calculate the reading ranks (that is, their aligned positions relative
1414to each other) for the graph. This can only be called on linear collations.
1415
b365fbae 1416=begin testing
1417
1418use Text::Tradition;
1419
1420my $cxfile = 't/data/Collatex-16.xml';
1421my $t = Text::Tradition->new(
1422 'name' => 'inline',
1423 'input' => 'CollateX',
1424 'file' => $cxfile,
1425 );
1426my $c = $t->collation;
1427
1428# Make an svg
bfcbcecb 1429my $table = $c->alignment_table;
1430ok( $c->has_cached_table, "Alignment table was cached" );
1431is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1432$c->calculate_ranks;
bfcbcecb 1433is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1434$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1435isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1436
1437=end testing
1438
4e5a7b2c 1439=cut
1440
910a0a6d 1441sub calculate_ranks {
1442 my $self = shift;
b365fbae 1443 # Save the existing ranks, in case we need to invalidate the cached SVG.
1444 my %existing_ranks;
910a0a6d 1445 # Walk a version of the graph where every node linked by a relationship
1446 # edge is fundamentally the same node, and do a topological ranking on
1447 # the nodes in this graph.
c9bf3dbf 1448 my $topo_graph = Graph->new();
910a0a6d 1449 my %rel_containers;
1450 my $rel_ctr = 0;
1451 # Add the nodes
1452 foreach my $r ( $self->readings ) {
3a2ebbf4 1453 next if exists $rel_containers{$r->id};
910a0a6d 1454 my @rels = $r->related_readings( 'colocated' );
1455 if( @rels ) {
1456 # Make a relationship container.
1457 push( @rels, $r );
c9bf3dbf 1458 my $rn = 'rel_container_' . $rel_ctr++;
1459 $topo_graph->add_vertex( $rn );
910a0a6d 1460 foreach( @rels ) {
3a2ebbf4 1461 $rel_containers{$_->id} = $rn;
910a0a6d 1462 }
1463 } else {
1464 # Add a new node to mirror the old node.
3a2ebbf4 1465 $rel_containers{$r->id} = $r->id;
1466 $topo_graph->add_vertex( $r->id );
910a0a6d 1467 }
4a8828f0 1468 }
3a1f2523 1469
3a2ebbf4 1470 # Add the edges.
910a0a6d 1471 foreach my $r ( $self->readings ) {
b365fbae 1472 $existing_ranks{$r} = $r->rank;
3a2ebbf4 1473 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1474 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1475 $rel_containers{$n} );
4e5a7b2c 1476 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1477 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1478 }
1479 }
1480
1481 # Now do the rankings, starting with the start node.
3a2ebbf4 1482 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1483 my $node_ranks = { $topo_start => 0 };
910a0a6d 1484 my @curr_origin = ( $topo_start );
1485 # A little iterative function.
1486 while( @curr_origin ) {
c9bf3dbf 1487 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1488 }
1489 # Transfer our rankings from the topological graph to the real one.
1490 foreach my $r ( $self->readings ) {
3a2ebbf4 1491 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1492 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1493 } else {
63778331 1494 # Die. Find the last rank we calculated.
1495 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1496 <=> $node_ranks->{$rel_containers{$b->id}} }
1497 $self->readings;
1498 my $last = pop @all_defined;
1499 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1500 }
de51424a 1501 }
bfcbcecb 1502 # Do we need to invalidate the cached data?
1503 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1504 foreach my $r ( $self->readings ) {
7c293912 1505 next if defined( $existing_ranks{$r} )
1506 && $existing_ranks{$r} == $r->rank;
c1915ab9 1507 # Something has changed, so clear the cache
bfcbcecb 1508 $self->_clear_cache;
c1915ab9 1509 # ...and recalculate the common readings.
1510 $self->calculate_common_readings();
b365fbae 1511 last;
1512 }
1513 }
c1915ab9 1514 # The graph calculation information is now up to date.
1515 $self->_graphcalc_done(1);
8e1394aa 1516}
3a1f2523 1517
910a0a6d 1518sub _assign_rank {
c9bf3dbf 1519 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1520 # Look at each of the children of @current_nodes. If all the child's
1521 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1522 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1523 # parent gets a rank.
910a0a6d 1524 my @next_nodes;
1525 foreach my $c ( @current_nodes ) {
c9bf3dbf 1526 warn "Current reading $c has no rank!"
1527 unless exists $node_ranks->{$c};
1528 # print STDERR "Looking at child of node $c, rank "
1529 # . $node_ranks->{$c} . "\n";
1530 foreach my $child ( $graph->successors( $c ) ) {
1531 next if exists $node_ranks->{$child};
910a0a6d 1532 my $highest_rank = -1;
1533 my $skip = 0;
c9bf3dbf 1534 foreach my $parent ( $graph->predecessors( $child ) ) {
1535 if( exists $node_ranks->{$parent} ) {
1536 $highest_rank = $node_ranks->{$parent}
1537 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1538 } else {
1539 $skip = 1;
1540 last;
1541 }
1542 }
1543 next if $skip;
c9bf3dbf 1544 my $c_rank = $highest_rank + 1;
1545 # print STDERR "Assigning rank $c_rank to node $child \n";
1546 $node_ranks->{$child} = $c_rank;
910a0a6d 1547 push( @next_nodes, $child );
1548 }
1549 }
1550 return @next_nodes;
4cdd82f1 1551}
910a0a6d 1552
c1915ab9 1553sub _clear_cache {
1554 my $self = shift;
1555 $self->wipe_svg if $self->has_cached_svg;
1556 $self->wipe_table if $self->has_cached_table;
1557}
1558
1559
4e5a7b2c 1560=head2 flatten_ranks
1561
1562A convenience method for parsing collation data. Searches the graph for readings
1563with the same text at the same rank, and merges any that are found.
1564
1565=cut
1566
0e476982 1567sub flatten_ranks {
1568 my $self = shift;
1569 my %unique_rank_rdg;
bf6e338d 1570 my $changed;
0e476982 1571 foreach my $rdg ( $self->readings ) {
1572 next unless $rdg->has_rank;
1573 my $key = $rdg->rank . "||" . $rdg->text;
1574 if( exists $unique_rank_rdg{$key} ) {
1575 # Combine!
56eefa04 1576 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1577 $changed = 1;
0e476982 1578 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1579 # TODO see if this now makes a common point.
0e476982 1580 } else {
1581 $unique_rank_rdg{$key} = $rdg;
1582 }
1583 }
bf6e338d 1584 # If we merged readings, the ranks are still fine but the alignment
1585 # table is wrong. Wipe it.
1586 $self->wipe_table() if $changed;
0e476982 1587}
4633f9e4 1588
1589
d4b75f44 1590=head2 calculate_common_readings
1591
1592Goes through the graph identifying the readings that appear in every witness
1593(apart from those with lacunae at that spot.) Marks them as common and returns
1594the list.
1595
1596=begin testing
1597
1598use Text::Tradition;
1599
1600my $cxfile = 't/data/Collatex-16.xml';
1601my $t = Text::Tradition->new(
1602 'name' => 'inline',
1603 'input' => 'CollateX',
1604 'file' => $cxfile,
1605 );
1606my $c = $t->collation;
1607
1608my @common = $c->calculate_common_readings();
1609is( scalar @common, 8, "Found correct number of common readings" );
1610my @marked = sort $c->common_readings();
1611is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1612my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1613is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1614
1615=end testing
1616
1617=cut
1618
1619sub calculate_common_readings {
1620 my $self = shift;
1621 my @common;
c1915ab9 1622 map { $_->is_common( 0 ) } $self->readings;
1623 # Implicitly calls calculate_ranks
1dd07bda 1624 my $table = $self->alignment_table;
d4b75f44 1625 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1626 my @row = map { $_->{'tokens'}->[$idx]
1627 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1628 @{$table->{'alignment'}};
d4b75f44 1629 my %hash;
1630 foreach my $r ( @row ) {
1631 if( $r ) {
1632 $hash{$r->id} = $r unless $r->is_meta;
1633 } else {
1634 $hash{'UNDEF'} = $r;
1635 }
1636 }
1637 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1638 my( $r ) = values %hash;
1639 $r->is_common( 1 );
1640 push( @common, $r );
1641 }
1642 }
1643 return @common;
1644}
1645
861c3e27 1646=head2 text_from_paths
1647
1648Calculate the text array for all witnesses from the path, for later consistency
1649checking. Only to be used if there is no non-graph-based way to know the
1650original texts.
1651
1652=cut
1653
1654sub text_from_paths {
1655 my $self = shift;
1656 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1657 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1658 my @text;
1659 foreach my $r ( @readings ) {
1660 next if $r->is_meta;
1661 push( @text, $r->text );
1662 }
861c3e27 1663 $wit->text( \@text );
1664 if( $wit->is_layered ) {
5164a6f0 1665 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1666 $wit->sigil.$self->ac_label );
1667 my @uctext;
1668 foreach my $r ( @ucrdgs ) {
1669 next if $r->is_meta;
1670 push( @uctext, $r->text );
1671 }
1672 $wit->layertext( \@uctext );
861c3e27 1673 }
1674 }
1675}
0e476982 1676
4e5a7b2c 1677=head1 UTILITY FUNCTIONS
1678
1679=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1680
4e5a7b2c 1681Find the last reading that occurs in sequence before both the given readings.
1682
1683=head2 common_successor( $reading_a, $reading_b )
1684
1685Find the first reading that occurs in sequence after both the given readings.
1686
22222af9 1687=begin testing
1688
1689use Text::Tradition;
1690
1691my $cxfile = 't/data/Collatex-16.xml';
1692my $t = Text::Tradition->new(
1693 'name' => 'inline',
1694 'input' => 'CollateX',
1695 'file' => $cxfile,
1696 );
1697my $c = $t->collation;
1698
679f17e1 1699is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1700 'n20', "Found correct common predecessor" );
679f17e1 1701is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1702 '#END#', "Found correct common successor" );
1703
4e5a7b2c 1704is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1705 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1706is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1707 '#END#', "Found correct common successor for readings on same path" );
1708
1709=end testing
1710
1711=cut
1712
1713## Return the closest reading that is a predecessor of both the given readings.
1714sub common_predecessor {
1715 my $self = shift;
4e5a7b2c 1716 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1717 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1718}
1719
1720sub common_successor {
1721 my $self = shift;
4e5a7b2c 1722 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1723 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1724}
1725
027d819c 1726sub _common_in_path {
22222af9 1727 my( $self, $r1, $r2, $dir ) = @_;
1728 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1729 $iter = $self->end->rank - $iter if $dir eq 'successors';
1730 my @candidates;
1731 my @last_checked = ( $r1, $r2 );
1732 my %all_seen;
1733 while( !@candidates ) {
1734 my @new_lc;
1735 foreach my $lc ( @last_checked ) {
1736 foreach my $p ( $lc->$dir ) {
1737 if( $all_seen{$p->id} ) {
1738 push( @candidates, $p );
1739 } else {
1740 $all_seen{$p->id} = 1;
1741 push( @new_lc, $p );
1742 }
1743 }
1744 }
1745 @last_checked = @new_lc;
1746 }
1747 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1748 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1749}
1750
63778331 1751sub throw {
1752 Text::Tradition::Error->throw(
1753 'ident' => 'Collation error',
1754 'message' => $_[0],
1755 );
1756}
1757
dd3b58b0 1758no Moose;
1759__PACKAGE__->meta->make_immutable;
e867486f 1760
027d819c 1761=head1 LICENSE
e867486f 1762
027d819c 1763This package is free software and is provided "as is" without express
1764or implied warranty. You can redistribute it and/or modify it under
1765the same terms as Perl itself.
e867486f 1766
027d819c 1767=head1 AUTHOR
e867486f 1768
027d819c 1769Tara L Andrews E<lt>aurum@cpan.orgE<gt>