accessor method for fixing a database bug
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
910a0a6d 8use Text::CSV_XS;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
df6d9812 12use XML::LibXML;
a344ffcf 13use XML::LibXML::XPathContext;
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
202ccb18 31 get_relationship => 'get_relationship',
ee801e17 32 del_relationship => 'del_relationship',
22222af9 33 },
34 writer => '_set_relations',
3a2ebbf4 35 );
dd3b58b0 36
3a2ebbf4 37has 'tradition' => (
38 is => 'ro',
d047cd52 39 isa => 'Text::Tradition',
8cfd99c4 40 writer => '_set_tradition',
8d9a1cd8 41 weak_ref => 1,
d047cd52 42 );
dd3b58b0 43
3a2ebbf4 44has 'readings' => (
45 isa => 'HashRef[Text::Tradition::Collation::Reading]',
46 traits => ['Hash'],
47 handles => {
48 reading => 'get',
49 _add_reading => 'set',
50 del_reading => 'delete',
51 has_reading => 'exists',
52 readings => 'values',
53 },
54 default => sub { {} },
55 );
910a0a6d 56
4a8828f0 57has 'wit_list_separator' => (
7854e12e 58 is => 'rw',
59 isa => 'Str',
60 default => ', ',
61 );
62
63has 'baselabel' => (
64 is => 'rw',
65 isa => 'Str',
66 default => 'base text',
67 );
4a8828f0 68
15d2d3df 69has 'linear' => (
70 is => 'rw',
71 isa => 'Bool',
72 default => 1,
73 );
c84275ff 74
ef9d481f 75has 'ac_label' => (
76 is => 'rw',
77 isa => 'Str',
78 default => ' (a.c.)',
79 );
3a2ebbf4 80
4e483aa5 81has 'wordsep' => (
82 is => 'rw',
83 isa => 'Str',
84 default => ' ',
85 );
86
3a2ebbf4 87has 'start' => (
88 is => 'ro',
89 isa => 'Text::Tradition::Collation::Reading',
90 writer => '_set_start',
91 weak_ref => 1,
92 );
93
94has 'end' => (
95 is => 'ro',
96 isa => 'Text::Tradition::Collation::Reading',
97 writer => '_set_end',
98 weak_ref => 1,
99 );
b365fbae 100
101has 'cached_svg' => (
102 is => 'rw',
103 isa => 'Str',
104 predicate => 'has_cached_svg',
105 clearer => 'wipe_svg',
106 );
1dd07bda 107
108has 'cached_table' => (
109 is => 'rw',
110 isa => 'HashRef',
111 predicate => 'has_cached_table',
112 clearer => 'wipe_table',
113 );
c1915ab9 114
115has '_graphcalc_done' => (
116 is => 'rw',
117 isa => 'Bool',
118 default => undef,
119 );
1f563ac3 120
4e5a7b2c 121=head1 NAME
122
123Text::Tradition::Collation - a software model for a text collation
124
125=head1 SYNOPSIS
126
127 use Text::Tradition;
128 my $t = Text::Tradition->new(
129 'name' => 'this is a text',
130 'input' => 'TEI',
131 'file' => '/path/to/tei_parallel_seg_file.xml' );
132
133 my $c = $t->collation;
134 my @readings = $c->readings;
135 my @paths = $c->paths;
136 my @relationships = $c->relationships;
137
138 my $svg_variant_graph = $t->collation->as_svg();
139
140=head1 DESCRIPTION
141
142Text::Tradition is a library for representation and analysis of collated
143texts, particularly medieval ones. The Collation is the central feature of
144a Tradition, where the text, its sequence of readings, and its relationships
145between readings are actually kept.
146
147=head1 CONSTRUCTOR
148
149=head2 new
150
151The constructor. Takes a hash or hashref of the following arguments:
152
153=over
154
155=item * tradition - The Text::Tradition object to which the collation
156belongs. Required.
157
158=item * linear - Whether the collation should be linear; that is, whether
159transposed readings should be treated as two linked readings rather than one,
160and therefore whether the collation graph is acyclic. Defaults to true.
161
4e5a7b2c 162=item * baselabel - The default label for the path taken by a base text
163(if any). Defaults to 'base text'.
164
165=item * wit_list_separator - The string to join a list of witnesses for
166purposes of making labels in display graphs. Defaults to ', '.
167
168=item * ac_label - The extra label to tack onto a witness sigil when
169representing another layer of path for the given witness - that is, when
170a text has more than one possible reading due to scribal corrections or
171the like. Defaults to ' (a.c.)'.
172
4e483aa5 173=item * wordsep - The string used to separate words in the original text.
174Defaults to ' '.
175
4e5a7b2c 176=back
177
178=head1 ACCESSORS
179
180=head2 tradition
181
182=head2 linear
183
4e5a7b2c 184=head2 wit_list_separator
185
186=head2 baselabel
187
188=head2 ac_label
189
4e483aa5 190=head2 wordsep
191
4e5a7b2c 192Simple accessors for collation attributes.
193
194=head2 start
195
196The meta-reading at the start of every witness path.
197
198=head2 end
199
200The meta-reading at the end of every witness path.
201
202=head2 readings
203
204Returns all Reading objects in the graph.
205
206=head2 reading( $id )
207
208Returns the Reading object corresponding to the given ID.
209
210=head2 add_reading( $reading_args )
211
212Adds a new reading object to the collation.
213See L<Text::Tradition::Collation::Reading> for the available arguments.
214
215=head2 del_reading( $object_or_id )
216
217Removes the given reading from the collation, implicitly removing its
218paths and relationships.
219
4e483aa5 220=head2 merge_readings( $main, $second, $concatenate, $with_str )
221
222Merges the $second reading into the $main one. If $concatenate is true, then
223the merged node will carry the text of both readings, concatenated with either
224$with_str (if specified) or a sensible default (the empty string if the
225appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 226
4e483aa5 227The first two arguments may be either readings or reading IDs.
4e5a7b2c 228
229=head2 has_reading( $id )
230
231Predicate to see whether a given reading ID is in the graph.
232
233=head2 reading_witnesses( $object_or_id )
234
235Returns a list of sigils whose witnesses contain the reading.
236
237=head2 paths
238
239Returns all reading paths within the document - that is, all edges in the
240collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
241
242=head2 add_path( $source, $target, $sigil )
243
244Links the given readings in the collation in sequence, under the given witness
245sigil. The readings may be specified by object or ID.
246
247=head2 del_path( $source, $target, $sigil )
248
249Links the given readings in the collation in sequence, under the given witness
250sigil. The readings may be specified by object or ID.
251
252=head2 has_path( $source, $target );
253
254Returns true if the two readings are linked in sequence in any witness.
255The readings may be specified by object or ID.
256
257=head2 relationships
258
259Returns all Relationship objects in the collation.
260
261=head2 add_relationship( $reading, $other_reading, $options )
262
263Adds a new relationship of the type given in $options between the two readings,
264which may be specified by object or ID. Returns a value of ( $status, @vectors)
265where $status is true on success, and @vectors is a list of relationship edges
266that were ultimately added.
267See L<Text::Tradition::Collation::Relationship> for the available options.
268
269=cut
dd3b58b0 270
d047cd52 271sub BUILD {
3a2ebbf4 272 my $self = shift;
22222af9 273 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 274 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
275 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 276}
784877d9 277
3a2ebbf4 278### Reading construct/destruct functions
279
280sub add_reading {
281 my( $self, $reading ) = @_;
282 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
283 my %args = %$reading;
284 $reading = Text::Tradition::Collation::Reading->new(
285 'collation' => $self,
286 %args );
287 }
288 # First check to see if a reading with this ID exists.
289 if( $self->reading( $reading->id ) ) {
63778331 290 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 291 }
c1915ab9 292 $self->_graphcalc_done(0);
3a2ebbf4 293 $self->_add_reading( $reading->id => $reading );
294 # Once the reading has been added, put it in both graphs.
295 $self->sequence->add_vertex( $reading->id );
22222af9 296 $self->relations->add_reading( $reading->id );
3a2ebbf4 297 return $reading;
eca16057 298};
299
3a2ebbf4 300around del_reading => sub {
301 my $orig = shift;
302 my $self = shift;
303 my $arg = shift;
304
305 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
306 $arg = $arg->id;
307 }
3a2ebbf4 308 # Remove the reading from the graphs.
c1915ab9 309 $self->_graphcalc_done(0);
4e483aa5 310 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 311 $self->sequence->delete_vertex( $arg );
22222af9 312 $self->relations->delete_reading( $arg );
3a2ebbf4 313
314 # Carry on.
315 $self->$orig( $arg );
316};
7854e12e 317
4e483aa5 318=begin testing
319
320use Text::Tradition;
321
322my $cxfile = 't/data/Collatex-16.xml';
323my $t = Text::Tradition->new(
324 'name' => 'inline',
325 'input' => 'CollateX',
326 'file' => $cxfile,
327 );
328my $c = $t->collation;
329
330my $rno = scalar $c->readings;
331# Split n21 for testing purposes
332my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
333my $old_r = $c->reading( 'n21' );
334$old_r->alter_text( 'to' );
335$c->del_path( 'n20', 'n21', 'A' );
336$c->add_path( 'n20', 'n21p0', 'A' );
337$c->add_path( 'n21p0', 'n21', 'A' );
338$c->flatten_ranks();
339ok( $c->reading( 'n21p0' ), "New reading exists" );
340is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
341
679f17e1 342# Combine n3 and n4 ( with his )
4e483aa5 343$c->merge_readings( 'n3', 'n4', 1 );
344ok( !$c->reading('n4'), "Reading n4 is gone" );
345is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
346
679f17e1 347# Collapse n9 and n10 ( rood / root )
348$c->merge_readings( 'n9', 'n10' );
349ok( !$c->reading('n10'), "Reading n10 is gone" );
350is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 351
352# Combine n21 and n21p0
353my $remaining = $c->reading('n21');
354$remaining ||= $c->reading('n22'); # one of these should still exist
355$c->merge_readings( 'n21p0', $remaining, 1 );
356ok( !$c->reading('n21'), "Reading $remaining is gone" );
357is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
358
359=end testing
360
361=cut
7854e12e 362
3a2ebbf4 363sub merge_readings {
364 my $self = shift;
365
366 # We only need the IDs for adding paths to the graph, not the reading
367 # objects themselves.
4e483aa5 368 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 369 $self->_graphcalc_done(0);
3a2ebbf4 370
371 # The kept reading should inherit the paths and the relationships
372 # of the deleted reading.
373 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
374 my @vector = ( $kept );
375 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
376 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 377 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 378 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
379 $self->sequence->add_edge( @vector );
380 my $fwits = $self->sequence->get_edge_attributes( @vector );
381 @wits{keys %$fwits} = values %$fwits;
382 $self->sequence->set_edge_attributes( @vector, \%wits );
383 }
22222af9 384 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 385
386 # Do the deletion deed.
4e483aa5 387 if( $combine ) {
49d4f2ac 388 my $kept_obj = $self->reading( $kept );
4e483aa5 389 my $del_obj = $self->reading( $deleted );
390 my $joinstr = $combine_char;
391 unless( defined $joinstr ) {
392 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
393 $joinstr = $self->wordsep unless defined $joinstr;
394 }
395 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 396 }
3a2ebbf4 397 $self->del_reading( $deleted );
398}
7854e12e 399
3265b0ce 400
3a2ebbf4 401# Helper function for manipulating the graph.
402sub _stringify_args {
4e483aa5 403 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 404 $first = $first->id
405 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
406 $second = $second->id
407 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 408 return( $first, $second, @args );
3a2ebbf4 409}
df6d9812 410
4e5a7b2c 411# Helper function for manipulating the graph.
412sub _objectify_args {
413 my( $self, $first, $second, $arg ) = @_;
414 $first = $self->reading( $first )
415 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
416 $second = $self->reading( $second )
417 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
418 return( $first, $second, $arg );
419}
3a2ebbf4 420### Path logic
421
422sub add_path {
423 my $self = shift;
424
425 # We only need the IDs for adding paths to the graph, not the reading
426 # objects themselves.
427 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
428
c1915ab9 429 $self->_graphcalc_done(0);
3a2ebbf4 430 # Connect the readings
431 $self->sequence->add_edge( $source, $target );
432 # Note the witness in question
433 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 434};
435
3a2ebbf4 436sub del_path {
437 my $self = shift;
49d4f2ac 438 my @args;
439 if( ref( $_[0] ) eq 'ARRAY' ) {
440 my $e = shift @_;
441 @args = ( @$e, @_ );
442 } else {
443 @args = @_;
444 }
3a2ebbf4 445
446 # We only need the IDs for adding paths to the graph, not the reading
447 # objects themselves.
49d4f2ac 448 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 449
c1915ab9 450 $self->_graphcalc_done(0);
3a2ebbf4 451 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 452 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 453 }
454 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
455 $self->sequence->delete_edge( $source, $target );
456 }
784877d9 457}
458
3a2ebbf4 459
15d2d3df 460# Extra graph-alike utility
461sub has_path {
3a2ebbf4 462 my $self = shift;
463 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
464 return undef unless $self->sequence->has_edge( $source, $target );
465 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 466}
467
4e5a7b2c 468=head2 clear_witness( @sigil_list )
3a2ebbf4 469
4e5a7b2c 470Clear the given witnesses out of the collation entirely, removing references
471to them in paths, and removing readings that belong only to them. Should only
472be called via $tradition->del_witness.
3a2ebbf4 473
474=cut
475
4e5a7b2c 476sub clear_witness {
477 my( $self, @sigils ) = @_;
478
c1915ab9 479 $self->_graphcalc_done(0);
4e5a7b2c 480 # Clear the witness(es) out of the paths
481 foreach my $e ( $self->paths ) {
482 foreach my $sig ( @sigils ) {
483 $self->del_path( $e, $sig );
484 }
485 }
486
487 # Clear out the newly unused readings
488 foreach my $r ( $self->readings ) {
489 unless( $self->reading_witnesses( $r ) ) {
490 $self->del_reading( $r );
491 }
492 }
493}
3a2ebbf4 494
495sub add_relationship {
496 my $self = shift;
22222af9 497 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 498 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 499 $self->reading( $source ), $target, $self->reading( $target ), $opts );
c1915ab9 500 $self->_graphcalc_done(0);
63778331 501 return @vectors;
22222af9 502}
ef9d481f 503
ca6e6095 504around qw/ get_relationship del_relationship / => sub {
505 my $orig = shift;
506 my $self = shift;
507 my @args = @_;
508 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
509 @args = @{$_[0]};
510 }
511 my( $source, $target ) = $self->_stringify_args( @args );
512 $self->$orig( $source, $target );
513};
514
22222af9 515=head2 reading_witnesses( $reading )
910a0a6d 516
22222af9 517Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 518
22222af9 519=cut
1d310495 520
1d310495 521sub reading_witnesses {
522 my( $self, $reading ) = @_;
523 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 524 # arbitrarily chosen "incoming". Thus, special-case the start node.
525 if( $reading eq $self->start ) {
526 return map { $_->sigil } $self->tradition->witnesses;
527 }
1d310495 528 my %all_witnesses;
529 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
530 my $wits = $self->sequence->get_edge_attributes( @$e );
531 @all_witnesses{ keys %$wits } = 1;
532 }
c12bb878 533 my $acstr = $self->ac_label;
534 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
535 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
536 }
1d310495 537 return keys %all_witnesses;
910a0a6d 538}
539
4e5a7b2c 540=head1 OUTPUT METHODS
8e1394aa 541
0ecb975c 542=head2 as_svg( \%options )
8e1394aa 543
0068967c 544Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 545See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 546
547=cut
548
549sub as_svg {
0ecb975c 550 my( $self, $opts ) = @_;
bfcbcecb 551 throw( "Need GraphViz installed to output SVG" )
552 unless File::Which::which( 'dot' );
e247aad1 553 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
c1915ab9 554 $self->calculate_ranks() unless $self->_graphcalc_done;
e247aad1 555 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 556 my @cmd = qw/dot -Tsvg/;
557 my( $svg, $err );
558 my $dotfile = File::Temp->new();
559 ## USE FOR DEBUGGING
560 # $dotfile->unlink_on_destroy(0);
561 binmode $dotfile, ':utf8';
562 print $dotfile $self->as_dot( $opts );
563 push( @cmd, $dotfile->filename );
564 run( \@cmd, ">", binary(), \$svg );
e247aad1 565 $svg = decode_utf8( $svg );
566 $self->cached_svg( $svg ) unless $want_subgraph;
567 return $svg;
568 } else {
569 return $self->cached_svg;
b365fbae 570 }
8e1394aa 571}
572
b22576c6 573
0ecb975c 574=head2 as_dot( \%options )
b22576c6 575
0ecb975c 576Returns a string that is the collation graph expressed in dot
577(i.e. GraphViz) format. Options include:
b22576c6 578
0ecb975c 579=over 4
b22576c6 580
0ecb975c 581=item * from
b22576c6 582
0ecb975c 583=item * to
df6d9812 584
0ecb975c 585=item * color_common
586
587=back
df6d9812 588
589=cut
590
591sub as_dot {
0ecb975c 592 my( $self, $opts ) = @_;
593 my $startrank = $opts->{'from'} if $opts;
594 my $endrank = $opts->{'to'} if $opts;
595 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 596 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
597 && $self->end->rank > 100;
6648ee3d 598 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 599
b22576c6 600 # Check the arguments
601 if( $startrank ) {
602 return if $endrank && $startrank > $endrank;
603 return if $startrank > $self->end->rank;
604 }
605 if( defined $endrank ) {
606 return if $endrank < 0;
f1b3b33a 607 $endrank = undef if $endrank == $self->end->rank;
b22576c6 608 }
609
67da8d6c 610 my $graph_name = $self->tradition->name;
611 $graph_name =~ s/[^\w\s]//g;
612 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 613
614 my %graph_attrs = (
615 'rankdir' => 'LR',
616 'bgcolor' => 'none',
617 );
618 my %node_attrs = (
b8990398 619 'fontsize' => 14,
f13b5582 620 'fillcolor' => 'white',
621 'style' => 'filled',
622 'shape' => 'ellipse'
623 );
624 my %edge_attrs = (
625 'arrowhead' => 'open',
626 'color' => '#000000',
627 'fontcolor' => '#000000',
628 );
629
67da8d6c 630 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 631 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
632 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 633
b22576c6 634 # Output substitute start/end readings if necessary
635 if( $startrank ) {
636 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
637 }
638 if( $endrank ) {
639 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
640 }
b365fbae 641 if( $STRAIGHTENHACK ) {
642 ## HACK part 1
6648ee3d 643 my $startlabel = $startrank ? 'SUBSTART' : 'START';
644 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 645 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
646 }
b22576c6 647 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 648 # Sort the readings by rank if we have ranks; this speeds layout.
649 my @all_readings = $self->end->has_rank
650 ? sort { $a->rank <=> $b->rank } $self->readings
651 : $self->readings;
4633f9e4 652 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 653 foreach my $reading ( @all_readings ) {
b22576c6 654 # Only output readings within our rank range.
655 next if $startrank && $reading->rank < $startrank;
656 next if $endrank && $reading->rank > $endrank;
657 $used{$reading->id} = 1;
910a0a6d 658 # Need not output nodes without separate labels
3a2ebbf4 659 next if $reading->id eq $reading->text;
d4b75f44 660 my $rattrs;
30f0df34 661 my $label = $reading->text;
629e27b0 662 $label .= '-' if $reading->join_next;
663 $label = "-$label" if $reading->join_prior;
8f9cab7b 664 $label =~ s/\"/\\\"/g;
d4b75f44 665 $rattrs->{'label'} = $label;
0ecb975c 666 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 667 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 668 }
3a2ebbf4 669
30ddc24c 670 # Add the real edges. Need to weight one edge per rank jump, in a
671 # continuous line.
b365fbae 672 # my $weighted = $self->_add_edge_weights;
b22576c6 673 my @edges = $self->paths;
3bdec618 674 my( %substart, %subend );
b22576c6 675 foreach my $edge ( @edges ) {
676 # Do we need to output this edge?
508fd430 677 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 678 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 679 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 680
b22576c6 681 # Account for the rank gap if necessary
30ddc24c 682 my $rank0 = $self->reading( $edge->[0] )->rank
683 if $self->reading( $edge->[0] )->has_rank;
684 my $rank1 = $self->reading( $edge->[1] )->rank
685 if $self->reading( $edge->[1] )->has_rank;
686 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
687 $variables->{'minlen'} = $rank1 - $rank0;
688 }
689
690 # Add the calculated edge weights
b365fbae 691 # if( exists $weighted->{$edge->[0]}
e247aad1 692 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
693 # # $variables->{'color'} = 'red';
694 # $variables->{'weight'} = 3.0;
695 # }
30ddc24c 696
508fd430 697 # EXPERIMENTAL: make edge width reflect no. of witnesses
698 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
699 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
700
f13b5582 701 my $varopts = _dot_attr_string( $variables );
702 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
703 $edge->[0], $edge->[1], $varopts );
3bdec618 704 } elsif( $used{$edge->[0]} ) {
705 $subend{$edge->[0]} = 1;
706 } elsif( $used{$edge->[1]} ) {
707 $substart{$edge->[1]} = 1;
b22576c6 708 }
df6d9812 709 }
3bdec618 710 # Add substitute start and end edges if necessary
711 foreach my $node ( keys %substart ) {
027d819c 712 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 713 my $variables = { %edge_attrs, 'label' => $witstr };
714 my $varopts = _dot_attr_string( $variables );
715 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 716 }
717 foreach my $node ( keys %subend ) {
027d819c 718 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 719 my $variables = { %edge_attrs, 'label' => $witstr };
720 my $varopts = _dot_attr_string( $variables );
721 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 722 }
b365fbae 723 # HACK part 2
724 if( $STRAIGHTENHACK ) {
6648ee3d 725 my $endlabel = $endrank ? 'SUBEND' : 'END';
726 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 727 }
30ddc24c 728
df6d9812 729 $dot .= "}\n";
730 return $dot;
731}
732
f13b5582 733sub _dot_attr_string {
734 my( $hash ) = @_;
735 my @attrs;
736 foreach my $k ( sort keys %$hash ) {
737 my $v = $hash->{$k};
738 push( @attrs, $k.'="'.$v.'"' );
739 }
740 return( '[ ' . join( ', ', @attrs ) . ' ]' );
741}
742
30ddc24c 743sub _add_edge_weights {
744 my $self = shift;
745 # Walk the graph from START to END, choosing the successor node with
746 # the largest number of witness paths each time.
747 my $weighted = {};
748 my $curr = $self->start->id;
008fc8a6 749 my $ranked = $self->end->has_rank;
30ddc24c 750 while( $curr ne $self->end->id ) {
008fc8a6 751 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 752 my @succ = sort { $self->path_witnesses( $curr, $a )
753 <=> $self->path_witnesses( $curr, $b ) }
754 $self->sequence->successors( $curr );
755 my $next = pop @succ;
008fc8a6 756 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 757 # Try to avoid lacunae in the weighted path.
008fc8a6 758 while( @succ &&
759 ( $self->reading( $next )->is_lacuna ||
760 $nextrank - $rank > 1 ) ){
30ddc24c 761 $next = pop @succ;
762 }
763 $weighted->{$curr} = $next;
764 $curr = $next;
765 }
766 return $weighted;
767}
768
027d819c 769=head2 path_witnesses( $edge )
770
771Returns the list of sigils whose witnesses are associated with the given edge.
772The edge can be passed as either an array or an arrayref of ( $source, $target ).
773
774=cut
775
3a2ebbf4 776sub path_witnesses {
777 my( $self, @edge ) = @_;
778 # If edge is an arrayref, cope.
779 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
780 my $e = shift @edge;
781 @edge = @$e;
782 }
783 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 784 return @wits;
3a2ebbf4 785}
786
7f9f05e8 787# Helper function. Make a display label for the given witnesses, showing a.c.
788# witnesses only where the main witness is not also in the list.
027d819c 789sub _path_display_label {
508fd430 790 my $self = shift;
7f9f05e8 791 my %wits;
792 map { $wits{$_} = 1 } @_;
793
794 # If an a.c. wit is listed, remove it if the main wit is also listed.
795 # Otherwise keep it for explicit listing.
796 my $aclabel = $self->ac_label;
797 my @disp_ac;
798 foreach my $w ( sort keys %wits ) {
799 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
800 if( exists $wits{$1} ) {
801 delete $wits{$w};
802 } else {
803 push( @disp_ac, $w );
804 }
805 }
806 }
807
808 # See if we are in a majority situation.
8f9cab7b 809 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 810 if( scalar keys %wits > $maj ) {
811 unshift( @disp_ac, 'majority' );
812 return join( ', ', @disp_ac );
8f9cab7b 813 } else {
7f9f05e8 814 return join( ', ', sort keys %wits );
8f9cab7b 815 }
816}
1dd07bda 817
bf6e338d 818=head2 readings_at_rank( $rank )
1dd07bda 819
bf6e338d 820Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 821
822=cut
823
bf6e338d 824sub readings_at_rank {
1dd07bda 825 my( $self, $rank ) = @_;
bf6e338d 826 my $table = $self->alignment_table;
827 # Table rank is real rank - 1.
828 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
829 my %readings;
830 foreach my $e ( @elements ) {
831 next unless ref( $e ) eq 'HASH';
832 next unless exists $e->{'t'};
833 $readings{$e->{'t'}->id} = $e->{'t'};
834 }
835 return values %readings;
1dd07bda 836}
8f9cab7b 837
4e5a7b2c 838=head2 as_graphml
8e1394aa 839
4e5a7b2c 840Returns a GraphML representation of the collation. The GraphML will contain
841two graphs. The first expresses the attributes of the readings and the witness
842paths that link them; the second expresses the relationships that link the
843readings. This is the native transfer format for a tradition.
8e1394aa 844
56eefa04 845=begin testing
846
847use Text::Tradition;
848
849my $READINGS = 311;
850my $PATHS = 361;
851
852my $datafile = 't/data/florilegium_tei_ps.xml';
853my $tradition = Text::Tradition->new( 'input' => 'TEI',
854 'name' => 'test0',
855 'file' => $datafile,
856 'linear' => 1 );
857
858ok( $tradition, "Got a tradition object" );
859is( scalar $tradition->witnesses, 13, "Found all witnesses" );
860ok( $tradition->collation, "Tradition has a collation" );
861
862my $c = $tradition->collation;
863is( scalar $c->readings, $READINGS, "Collation has all readings" );
864is( scalar $c->paths, $PATHS, "Collation has all paths" );
865is( scalar $c->relationships, 0, "Collation has all relationships" );
866
867# Add a few relationships
868$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
869$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
870$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
871
872# Now write it to GraphML and parse it again.
873
874my $graphml = $c->as_graphml;
875my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
876is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
877is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
878is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
879
880=end testing
881
8e1394aa 882=cut
883
884sub as_graphml {
3a2ebbf4 885 my( $self ) = @_;
3d14b48e 886 $self->calculate_ranks unless $self->_graphcalc_done;
887
8e1394aa 888 # Some namespaces
889 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
890 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
891 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 892 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 893
894 # Create the document and root node
895 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
896 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
897 $graphml->setDocumentElement( $root );
898 $root->setNamespace( $xsi_ns, 'xsi', 0 );
899 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 900
901 # List of attribute types to save on our objects and their corresponding
902 # GraphML types
903 my %save_types = (
904 'Str' => 'string',
905 'Int' => 'int',
906 'Bool' => 'boolean',
907 'RelationshipType' => 'string',
908 'RelationshipScope' => 'string',
909 );
910
911 # List of attribute names *not* to save on our objects.
912 # We will also not save any attribute beginning with _.
913 my %skipsave;
914 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 915
bbd064a9 916 # Add the data keys for the graph. Include an extra key 'version' for the
917 # GraphML output version.
e309421a 918 my %graph_data_keys;
919 my $gdi = 0;
bbd064a9 920 my %graph_attributes = ( 'version' => 'string' );
921 # Graph attributes include those of Tradition and those of Collation.
922 my %gattr_from;
923 my $tmeta = $self->tradition->meta;
924 my $cmeta = $self->meta;
925 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
926 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
927 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
928 next if $attr->name =~ /^_/;
929 next if $skipsave{$attr->name};
930 next unless $save_types{$attr->type_constraint->name};
931 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
932 }
933
934 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 935 $graph_data_keys{$datum} = 'dg'.$gdi++;
936 my $key = $root->addNewChild( $graphml_ns, 'key' );
937 $key->setAttribute( 'attr.name', $datum );
bbd064a9 938 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 939 $key->setAttribute( 'for', 'graph' );
940 $key->setAttribute( 'id', $graph_data_keys{$datum} );
941 }
f6066bac 942
bbd064a9 943 # Add the data keys for reading nodes
944 my %reading_attributes;
945 my $rmeta = Text::Tradition::Collation::Reading->meta;
946 foreach my $attr( $rmeta->get_all_attributes ) {
947 next if $attr->name =~ /^_/;
948 next if $skipsave{$attr->name};
949 next unless $save_types{$attr->type_constraint->name};
950 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
951 }
ef9d481f 952 my %node_data_keys;
953 my $ndi = 0;
bbd064a9 954 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 955 $node_data_keys{$datum} = 'dn'.$ndi++;
956 my $key = $root->addNewChild( $graphml_ns, 'key' );
957 $key->setAttribute( 'attr.name', $datum );
bbd064a9 958 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 959 $key->setAttribute( 'for', 'node' );
960 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 961 }
962
bbd064a9 963 # Add the data keys for edges, that is, paths and relationships. Path
964 # data does not come from a Moose class so is here manually.
ef9d481f 965 my $edi = 0;
966 my %edge_data_keys;
bbd064a9 967 my %edge_attributes = (
3a2ebbf4 968 witness => 'string', # ID/label for a path
3a2ebbf4 969 extra => 'boolean', # Path key
3a2ebbf4 970 );
bbd064a9 971 my @path_attributes = keys %edge_attributes; # track our manual additions
972 my $pmeta = Text::Tradition::Collation::Relationship->meta;
973 foreach my $attr( $pmeta->get_all_attributes ) {
974 next if $attr->name =~ /^_/;
975 next if $skipsave{$attr->name};
976 next unless $save_types{$attr->type_constraint->name};
977 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
978 }
979 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 980 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 981 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 982 $key->setAttribute( 'attr.name', $datum );
bbd064a9 983 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 984 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 985 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 986 }
3a2ebbf4 987
22222af9 988 # Add the collation graph itself
2c669bca 989 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
990 $sgraph->setAttribute( 'edgedefault', 'directed' );
991 $sgraph->setAttribute( 'id', $self->tradition->name );
992 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
993 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
994 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
995 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
996 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 997
e309421a 998 # Collation attribute data
bbd064a9 999 foreach my $datum ( keys %graph_attributes ) {
1000 my $value;
1001 if( $datum eq 'version' ) {
1002 $value = '3.1';
1003 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1004 $value = $self->tradition->$datum;
1005 } else {
1006 $value = $self->$datum;
1007 }
2c669bca 1008 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1009 }
8e1394aa 1010
1011 my $node_ctr = 0;
1012 my %node_hash;
22222af9 1013 # Add our readings to the graph
3a2ebbf4 1014 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 1015 # Add to the main graph
1016 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1017 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1018 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1019 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1020 foreach my $d ( keys %reading_attributes ) {
255875b8 1021 my $nval = $n->$d;
1022 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1023 if defined $nval;
1024 }
b15511bf 1025 }
1026
2c669bca 1027 # Add the path edges to the sequence graph
df6d9812 1028 my $edge_ctr = 0;
3a2ebbf4 1029 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1030 # We add an edge in the graphml for every witness in $e.
508fd430 1031 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 1032 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1033 $node_hash{ $e->[0] },
1034 $node_hash{ $e->[1] } );
2c669bca 1035 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1036 $edge_el->setAttribute( 'source', $from );
1037 $edge_el->setAttribute( 'target', $to );
1038 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1039
1040 # It's a witness path, so add the witness
1041 my $base = $wit;
1042 my $key = $edge_data_keys{'witness'};
1043 # Is this an ante-corr witness?
1044 my $aclabel = $self->ac_label;
1045 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1046 # Keep the base witness
1047 $base = $1;
1048 # ...and record that this is an 'extra' reading path
1049 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1050 }
1051 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1052 }
1053 }
1054
22222af9 1055 # Add the relationship graph to the XML
bbd064a9 1056 map { delete $edge_data_keys{$_} } @path_attributes;
027d819c 1057 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 1058 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1059
94c00c71 1060 # Save and return the thing
1061 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1062 return $result;
df6d9812 1063}
1064
b15511bf 1065sub _add_graphml_data {
1066 my( $el, $key, $value ) = @_;
b15511bf 1067 return unless defined $value;
c9bf3dbf 1068 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1069 $data_el->setAttribute( 'key', $key );
1070 $data_el->appendText( $value );
8e1394aa 1071}
1072
4e5a7b2c 1073=head2 as_csv
910a0a6d 1074
1075Returns a CSV alignment table representation of the collation graph, one
2c669bca 1076row per witness (or witness uncorrected.)
910a0a6d 1077
1078=cut
1079
1080sub as_csv {
3a2ebbf4 1081 my( $self ) = @_;
1dd07bda 1082 my $table = $self->alignment_table;
910a0a6d 1083 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
1084 my @result;
2c669bca 1085 # Make the header row
1086 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1087 push( @result, decode_utf8( $csv->string ) );
1088 # Make the rest of the rows
1089 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1090 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1091 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1092 $csv->combine( @row );
910a0a6d 1093 push( @result, decode_utf8( $csv->string ) );
1094 }
3a2ebbf4 1095 return join( "\n", @result );
910a0a6d 1096}
1097
1dd07bda 1098=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1099
566f4595 1100Return a reference to an alignment table, in a slightly enhanced CollateX
1101format which looks like this:
1102
1103 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1104 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1105 { witness => "SIG2",
4e5a7b2c 1106 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1107 ... ],
1108 length => TEXTLEN };
1109
1110If $use_refs is set to 1, the reading object is returned in the table
1111instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1112
1113If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1114keys have a true hash value will be included.
2c669bca 1115
1116=cut
9f3ba6f7 1117
1dd07bda 1118sub alignment_table {
1119 my( $self ) = @_;
c1915ab9 1120 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1121 return $self->cached_table if $self->has_cached_table;
1122
0ecb975c 1123 # Make sure we can do this
1124 throw( "Need a linear graph in order to make an alignment table" )
1125 unless $self->linear;
1126 $self->calculate_ranks unless $self->end->has_rank;
1127
2c669bca 1128 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1129 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1130 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1131 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1132 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1133 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1134 push( @{$table->{'alignment'}},
1135 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1136 if( $wit->is_layered ) {
1137 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1138 $wit->sigil.$self->ac_label );
1dd07bda 1139 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1140 push( @{$table->{'alignment'}},
1141 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1142 }
1143 }
1dd07bda 1144 $self->cached_table( $table );
1145 return $table;
910a0a6d 1146}
1147
1148sub _make_witness_row {
1dd07bda 1149 my( $path, $positions ) = @_;
910a0a6d 1150 my %char_hash;
1151 map { $char_hash{$_} = undef } @$positions;
2c669bca 1152 my $debug = 0;
910a0a6d 1153 foreach my $rdg ( @$path ) {
eca16057 1154 my $rtext = $rdg->text;
1155 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1156 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1157 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1158 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1159 }
1160 my @row = map { $char_hash{$_} } @$positions;
eca16057 1161 # Fill in lacuna markers for undef spots in the row
1162 my $last_el = shift @row;
1163 my @filled_row = ( $last_el );
1164 foreach my $el ( @row ) {
0e476982 1165 # If we are using node reference, make the lacuna node appear many times
1166 # in the table. If not, use the lacuna tag.
1dd07bda 1167 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1168 $el = $last_el;
eca16057 1169 }
1170 push( @filled_row, $el );
1171 $last_el = $el;
1172 }
1173 return @filled_row;
910a0a6d 1174}
1175
4e5a7b2c 1176=head1 NAVIGATION METHODS
910a0a6d 1177
4e5a7b2c 1178=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1179
1180Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1181with $last, for the witness given in $sigil. If a $backup sigil is
1182specified (e.g. when walking a layered witness), it will be used wherever
1183no $sigil path exists. If there is a base text reading, that will be
1184used wherever no path exists for $sigil or $backup.
e2902068 1185
1186=cut
1187
910a0a6d 1188# TODO Think about returning some lazy-eval iterator.
b0b4421a 1189# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1190
e2902068 1191sub reading_sequence {
861c3e27 1192 my( $self, $start, $end, $witness ) = @_;
e2902068 1193
930ff666 1194 $witness = $self->baselabel unless $witness;
e2902068 1195 my @readings = ( $start );
1196 my %seen;
1197 my $n = $start;
3a2ebbf4 1198 while( $n && $n->id ne $end->id ) {
1199 if( exists( $seen{$n->id} ) ) {
63778331 1200 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1201 }
3a2ebbf4 1202 $seen{$n->id} = 1;
910a0a6d 1203
861c3e27 1204 my $next = $self->next_reading( $n, $witness );
44771cf2 1205 unless( $next ) {
63778331 1206 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1207 }
910a0a6d 1208 push( @readings, $next );
1209 $n = $next;
e2902068 1210 }
1211 # Check that the last reading is our end reading.
1212 my $last = $readings[$#readings];
63778331 1213 throw( "Last reading found from " . $start->text .
1214 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1215 unless $last->id eq $end->id;
e2902068 1216
1217 return @readings;
1218}
1219
4e5a7b2c 1220=head2 next_reading( $reading, $sigil );
8e1394aa 1221
4a8828f0 1222Returns the reading that follows the given reading along the given witness
930ff666 1223path.
8e1394aa 1224
1225=cut
1226
4a8828f0 1227sub next_reading {
e2902068 1228 # Return the successor via the corresponding path.
8e1394aa 1229 my $self = shift;
3a2ebbf4 1230 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1231 return undef unless $answer;
3a2ebbf4 1232 return $self->reading( $answer );
8e1394aa 1233}
1234
4e5a7b2c 1235=head2 prior_reading( $reading, $sigil )
8e1394aa 1236
4a8828f0 1237Returns the reading that precedes the given reading along the given witness
930ff666 1238path.
8e1394aa 1239
1240=cut
1241
4a8828f0 1242sub prior_reading {
e2902068 1243 # Return the predecessor via the corresponding path.
8e1394aa 1244 my $self = shift;
3a2ebbf4 1245 my $answer = $self->_find_linked_reading( 'prior', @_ );
1246 return $self->reading( $answer );
8e1394aa 1247}
1248
4a8828f0 1249sub _find_linked_reading {
861c3e27 1250 my( $self, $direction, $node, $path ) = @_;
1251
1252 # Get a backup if we are dealing with a layered witness
1253 my $alt_path;
1254 my $aclabel = $self->ac_label;
1255 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1256 $alt_path = $1;
1257 }
1258
e2902068 1259 my @linked_paths = $direction eq 'next'
3a2ebbf4 1260 ? $self->sequence->edges_from( $node )
1261 : $self->sequence->edges_to( $node );
e2902068 1262 return undef unless scalar( @linked_paths );
8e1394aa 1263
e2902068 1264 # We have to find the linked path that contains all of the
1265 # witnesses supplied in $path.
1266 my( @path_wits, @alt_path_wits );
4e5a7b2c 1267 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1268 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1269 my $base_le;
1270 my $alt_le;
1271 foreach my $le ( @linked_paths ) {
3a2ebbf4 1272 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1273 $base_le = $le;
910a0a6d 1274 }
508fd430 1275 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1276 if( _is_within( \@path_wits, \@le_wits ) ) {
1277 # This is the right path.
1278 return $direction eq 'next' ? $le->[1] : $le->[0];
1279 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1280 $alt_le = $le;
1281 }
8e1394aa 1282 }
e2902068 1283 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1284 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1285 if $alt_le;
e2902068 1286
1287 # Got this far? Return the base path if it exists.
3a2ebbf4 1288 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1289 if $base_le;
e2902068 1290
1291 # Got this far? We have no appropriate path.
2c669bca 1292 warn "Could not find $direction node from " . $node->id
910a0a6d 1293 . " along path $path";
8e1394aa 1294 return undef;
1295}
1296
4a8828f0 1297# Some set logic.
1298sub _is_within {
1299 my( $set1, $set2 ) = @_;
7854e12e 1300 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1301 foreach my $el ( @$set1 ) {
910a0a6d 1302 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1303 }
1304 return $ret;
1305}
1306
4e5a7b2c 1307# Return the string that joins together a list of witnesses for
1308# display on a single path.
1309sub _witnesses_of_label {
1310 my( $self, $label ) = @_;
1311 my $regex = $self->wit_list_separator;
1312 my @answer = split( /\Q$regex\E/, $label );
1313 return @answer;
b0b4421a 1314}
1315
d4b75f44 1316=head2 common_readings
1317
1318Returns the list of common readings in the graph (i.e. those readings that are
1319shared by all non-lacunose witnesses.)
1320
1321=cut
1322
1323sub common_readings {
1324 my $self = shift;
1325 my @common = grep { $_->is_common } $self->readings;
1326 return @common;
1327}
1328
b0b4421a 1329=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1330
1331Returns the text of a witness (plus its backup, if we are using a layer)
1332as stored in the collation. The text is returned as a string, where the
1333individual readings are joined with spaces and the meta-readings (e.g.
1334lacunae) are omitted. Optional specification of $start and $end allows
1335the generation of a subset of the witness text.
4e5a7b2c 1336
b0b4421a 1337=cut
1338
1339sub path_text {
861c3e27 1340 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1341 $start = $self->start unless $start;
1342 $end = $self->end unless $end;
861c3e27 1343 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1344 my $pathtext = '';
1345 my $last;
1346 foreach my $r ( @path ) {
1347 if( $r->join_prior || !$last || $last->join_next ) {
1348 $pathtext .= $r->text;
1349 } else {
1350 $pathtext .= ' ' . $r->text;
1351 }
1352 $last = $r;
1353 }
1354 return $pathtext;
b0b4421a 1355}
4e5a7b2c 1356
1357=head1 INITIALIZATION METHODS
1358
1359These are mostly for use by parsers.
1360
1361=head2 make_witness_path( $witness )
1362
1363Link the array of readings contained in $witness->path (and in
1364$witness->uncorrected_path if it exists) into collation paths.
1365Clear out the arrays when finished.
de51424a 1366
4e5a7b2c 1367=head2 make_witness_paths
1368
1369Call make_witness_path for all witnesses in the tradition.
1370
1371=cut
930ff666 1372
7e450e44 1373# For use when a collation is constructed from a base text and an apparatus.
1374# We have the sequences of readings and just need to add path edges.
1f7aa795 1375# When we are done, clear out the witness path attributes, as they are no
1376# longer needed.
1377# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1378
6a222840 1379sub make_witness_paths {
1380 my( $self ) = @_;
910a0a6d 1381 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1382 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1383 $self->make_witness_path( $wit );
7854e12e 1384 }
7854e12e 1385}
1386
6a222840 1387sub make_witness_path {
7854e12e 1388 my( $self, $wit ) = @_;
1389 my @chain = @{$wit->path};
15d2d3df 1390 my $sig = $wit->sigil;
7854e12e 1391 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1392 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1393 }
1f7aa795 1394 if( $wit->is_layered ) {
d9e873d0 1395 @chain = @{$wit->uncorrected_path};
1396 foreach my $idx( 0 .. $#chain-1 ) {
1397 my $source = $chain[$idx];
1398 my $target = $chain[$idx+1];
1399 $self->add_path( $source, $target, $sig.$self->ac_label )
1400 unless $self->has_path( $source, $target, $sig );
1401 }
15d2d3df 1402 }
1f7aa795 1403 $wit->clear_path;
1404 $wit->clear_uncorrected_path;
e2902068 1405}
1406
4e5a7b2c 1407=head2 calculate_ranks
1408
1409Calculate the reading ranks (that is, their aligned positions relative
1410to each other) for the graph. This can only be called on linear collations.
1411
b365fbae 1412=begin testing
1413
1414use Text::Tradition;
1415
1416my $cxfile = 't/data/Collatex-16.xml';
1417my $t = Text::Tradition->new(
1418 'name' => 'inline',
1419 'input' => 'CollateX',
1420 'file' => $cxfile,
1421 );
1422my $c = $t->collation;
1423
1424# Make an svg
bfcbcecb 1425my $table = $c->alignment_table;
1426ok( $c->has_cached_table, "Alignment table was cached" );
1427is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1428$c->calculate_ranks;
bfcbcecb 1429is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1430$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1431isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1432
1433=end testing
1434
4e5a7b2c 1435=cut
1436
910a0a6d 1437sub calculate_ranks {
1438 my $self = shift;
b365fbae 1439 # Save the existing ranks, in case we need to invalidate the cached SVG.
1440 my %existing_ranks;
910a0a6d 1441 # Walk a version of the graph where every node linked by a relationship
1442 # edge is fundamentally the same node, and do a topological ranking on
1443 # the nodes in this graph.
c9bf3dbf 1444 my $topo_graph = Graph->new();
910a0a6d 1445 my %rel_containers;
1446 my $rel_ctr = 0;
1447 # Add the nodes
1448 foreach my $r ( $self->readings ) {
3a2ebbf4 1449 next if exists $rel_containers{$r->id};
910a0a6d 1450 my @rels = $r->related_readings( 'colocated' );
1451 if( @rels ) {
1452 # Make a relationship container.
1453 push( @rels, $r );
c9bf3dbf 1454 my $rn = 'rel_container_' . $rel_ctr++;
1455 $topo_graph->add_vertex( $rn );
910a0a6d 1456 foreach( @rels ) {
3a2ebbf4 1457 $rel_containers{$_->id} = $rn;
910a0a6d 1458 }
1459 } else {
1460 # Add a new node to mirror the old node.
3a2ebbf4 1461 $rel_containers{$r->id} = $r->id;
1462 $topo_graph->add_vertex( $r->id );
910a0a6d 1463 }
4a8828f0 1464 }
3a1f2523 1465
3a2ebbf4 1466 # Add the edges.
910a0a6d 1467 foreach my $r ( $self->readings ) {
b365fbae 1468 $existing_ranks{$r} = $r->rank;
3a2ebbf4 1469 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1470 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1471 $rel_containers{$n} );
4e5a7b2c 1472 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1473 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1474 }
1475 }
1476
1477 # Now do the rankings, starting with the start node.
3a2ebbf4 1478 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1479 my $node_ranks = { $topo_start => 0 };
910a0a6d 1480 my @curr_origin = ( $topo_start );
1481 # A little iterative function.
1482 while( @curr_origin ) {
c9bf3dbf 1483 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1484 }
1485 # Transfer our rankings from the topological graph to the real one.
1486 foreach my $r ( $self->readings ) {
3a2ebbf4 1487 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1488 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1489 } else {
63778331 1490 # Die. Find the last rank we calculated.
1491 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1492 <=> $node_ranks->{$rel_containers{$b->id}} }
1493 $self->readings;
1494 my $last = pop @all_defined;
1495 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1496 }
de51424a 1497 }
bfcbcecb 1498 # Do we need to invalidate the cached data?
1499 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1500 foreach my $r ( $self->readings ) {
7c293912 1501 next if defined( $existing_ranks{$r} )
1502 && $existing_ranks{$r} == $r->rank;
c1915ab9 1503 # Something has changed, so clear the cache
bfcbcecb 1504 $self->_clear_cache;
c1915ab9 1505 # ...and recalculate the common readings.
1506 $self->calculate_common_readings();
b365fbae 1507 last;
1508 }
1509 }
c1915ab9 1510 # The graph calculation information is now up to date.
1511 $self->_graphcalc_done(1);
8e1394aa 1512}
3a1f2523 1513
910a0a6d 1514sub _assign_rank {
c9bf3dbf 1515 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1516 # Look at each of the children of @current_nodes. If all the child's
1517 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1518 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1519 # parent gets a rank.
910a0a6d 1520 my @next_nodes;
1521 foreach my $c ( @current_nodes ) {
c9bf3dbf 1522 warn "Current reading $c has no rank!"
1523 unless exists $node_ranks->{$c};
1524 # print STDERR "Looking at child of node $c, rank "
1525 # . $node_ranks->{$c} . "\n";
1526 foreach my $child ( $graph->successors( $c ) ) {
1527 next if exists $node_ranks->{$child};
910a0a6d 1528 my $highest_rank = -1;
1529 my $skip = 0;
c9bf3dbf 1530 foreach my $parent ( $graph->predecessors( $child ) ) {
1531 if( exists $node_ranks->{$parent} ) {
1532 $highest_rank = $node_ranks->{$parent}
1533 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1534 } else {
1535 $skip = 1;
1536 last;
1537 }
1538 }
1539 next if $skip;
c9bf3dbf 1540 my $c_rank = $highest_rank + 1;
1541 # print STDERR "Assigning rank $c_rank to node $child \n";
1542 $node_ranks->{$child} = $c_rank;
910a0a6d 1543 push( @next_nodes, $child );
1544 }
1545 }
1546 return @next_nodes;
4cdd82f1 1547}
910a0a6d 1548
c1915ab9 1549sub _clear_cache {
1550 my $self = shift;
1551 $self->wipe_svg if $self->has_cached_svg;
1552 $self->wipe_table if $self->has_cached_table;
1553}
1554
1555
4e5a7b2c 1556=head2 flatten_ranks
1557
1558A convenience method for parsing collation data. Searches the graph for readings
1559with the same text at the same rank, and merges any that are found.
1560
1561=cut
1562
0e476982 1563sub flatten_ranks {
1564 my $self = shift;
1565 my %unique_rank_rdg;
bf6e338d 1566 my $changed;
0e476982 1567 foreach my $rdg ( $self->readings ) {
1568 next unless $rdg->has_rank;
1569 my $key = $rdg->rank . "||" . $rdg->text;
1570 if( exists $unique_rank_rdg{$key} ) {
1571 # Combine!
56eefa04 1572 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1573 $changed = 1;
0e476982 1574 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1575 # TODO see if this now makes a common point.
0e476982 1576 } else {
1577 $unique_rank_rdg{$key} = $rdg;
1578 }
1579 }
bf6e338d 1580 # If we merged readings, the ranks are still fine but the alignment
1581 # table is wrong. Wipe it.
1582 $self->wipe_table() if $changed;
0e476982 1583}
4633f9e4 1584
1585
d4b75f44 1586=head2 calculate_common_readings
1587
1588Goes through the graph identifying the readings that appear in every witness
1589(apart from those with lacunae at that spot.) Marks them as common and returns
1590the list.
1591
1592=begin testing
1593
1594use Text::Tradition;
1595
1596my $cxfile = 't/data/Collatex-16.xml';
1597my $t = Text::Tradition->new(
1598 'name' => 'inline',
1599 'input' => 'CollateX',
1600 'file' => $cxfile,
1601 );
1602my $c = $t->collation;
1603
1604my @common = $c->calculate_common_readings();
1605is( scalar @common, 8, "Found correct number of common readings" );
1606my @marked = sort $c->common_readings();
1607is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1608my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1609is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1610
1611=end testing
1612
1613=cut
1614
1615sub calculate_common_readings {
1616 my $self = shift;
1617 my @common;
c1915ab9 1618 map { $_->is_common( 0 ) } $self->readings;
1619 # Implicitly calls calculate_ranks
1dd07bda 1620 my $table = $self->alignment_table;
d4b75f44 1621 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1622 my @row = map { $_->{'tokens'}->[$idx]
1623 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1624 @{$table->{'alignment'}};
d4b75f44 1625 my %hash;
1626 foreach my $r ( @row ) {
1627 if( $r ) {
1628 $hash{$r->id} = $r unless $r->is_meta;
1629 } else {
1630 $hash{'UNDEF'} = $r;
1631 }
1632 }
1633 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1634 my( $r ) = values %hash;
1635 $r->is_common( 1 );
1636 push( @common, $r );
1637 }
1638 }
1639 return @common;
1640}
1641
861c3e27 1642=head2 text_from_paths
1643
1644Calculate the text array for all witnesses from the path, for later consistency
1645checking. Only to be used if there is no non-graph-based way to know the
1646original texts.
1647
1648=cut
1649
1650sub text_from_paths {
1651 my $self = shift;
1652 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1653 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1654 my @text;
1655 foreach my $r ( @readings ) {
1656 next if $r->is_meta;
1657 push( @text, $r->text );
1658 }
861c3e27 1659 $wit->text( \@text );
1660 if( $wit->is_layered ) {
5164a6f0 1661 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1662 $wit->sigil.$self->ac_label );
1663 my @uctext;
1664 foreach my $r ( @ucrdgs ) {
1665 next if $r->is_meta;
1666 push( @uctext, $r->text );
1667 }
1668 $wit->layertext( \@uctext );
861c3e27 1669 }
1670 }
1671}
0e476982 1672
4e5a7b2c 1673=head1 UTILITY FUNCTIONS
1674
1675=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1676
4e5a7b2c 1677Find the last reading that occurs in sequence before both the given readings.
1678
1679=head2 common_successor( $reading_a, $reading_b )
1680
1681Find the first reading that occurs in sequence after both the given readings.
1682
22222af9 1683=begin testing
1684
1685use Text::Tradition;
1686
1687my $cxfile = 't/data/Collatex-16.xml';
1688my $t = Text::Tradition->new(
1689 'name' => 'inline',
1690 'input' => 'CollateX',
1691 'file' => $cxfile,
1692 );
1693my $c = $t->collation;
1694
679f17e1 1695is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1696 'n20', "Found correct common predecessor" );
679f17e1 1697is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1698 '#END#', "Found correct common successor" );
1699
4e5a7b2c 1700is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1701 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1702is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1703 '#END#', "Found correct common successor for readings on same path" );
1704
1705=end testing
1706
1707=cut
1708
1709## Return the closest reading that is a predecessor of both the given readings.
1710sub common_predecessor {
1711 my $self = shift;
4e5a7b2c 1712 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1713 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1714}
1715
1716sub common_successor {
1717 my $self = shift;
4e5a7b2c 1718 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1719 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1720}
1721
027d819c 1722sub _common_in_path {
22222af9 1723 my( $self, $r1, $r2, $dir ) = @_;
1724 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1725 $iter = $self->end->rank - $iter if $dir eq 'successors';
1726 my @candidates;
1727 my @last_checked = ( $r1, $r2 );
1728 my %all_seen;
1729 while( !@candidates ) {
1730 my @new_lc;
1731 foreach my $lc ( @last_checked ) {
1732 foreach my $p ( $lc->$dir ) {
1733 if( $all_seen{$p->id} ) {
1734 push( @candidates, $p );
1735 } else {
1736 $all_seen{$p->id} = 1;
1737 push( @new_lc, $p );
1738 }
1739 }
1740 }
1741 @last_checked = @new_lc;
1742 }
1743 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1744 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1745}
1746
63778331 1747sub throw {
1748 Text::Tradition::Error->throw(
1749 'ident' => 'Collation error',
1750 'message' => $_[0],
1751 );
1752}
1753
dd3b58b0 1754no Moose;
1755__PACKAGE__->meta->make_immutable;
e867486f 1756
027d819c 1757=head1 LICENSE
e867486f 1758
027d819c 1759This package is free software and is provided "as is" without express
1760or implied warranty. You can redistribute it and/or modify it under
1761the same terms as Perl itself.
e867486f 1762
027d819c 1763=head1 AUTHOR
e867486f 1764
027d819c 1765Tara L Andrews E<lt>aurum@cpan.orgE<gt>