add failing tests for relationship logic, next we fix them
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
df6d9812 12use XML::LibXML;
a344ffcf 13use XML::LibXML::XPathContext;
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
202ccb18 31 get_relationship => 'get_relationship',
ee801e17 32 del_relationship => 'del_relationship',
22222af9 33 },
34 writer => '_set_relations',
3a2ebbf4 35 );
dd3b58b0 36
3a2ebbf4 37has 'tradition' => (
38 is => 'ro',
d047cd52 39 isa => 'Text::Tradition',
8cfd99c4 40 writer => '_set_tradition',
8d9a1cd8 41 weak_ref => 1,
d047cd52 42 );
dd3b58b0 43
3a2ebbf4 44has 'readings' => (
45 isa => 'HashRef[Text::Tradition::Collation::Reading]',
46 traits => ['Hash'],
47 handles => {
48 reading => 'get',
49 _add_reading => 'set',
50 del_reading => 'delete',
51 has_reading => 'exists',
52 readings => 'values',
53 },
54 default => sub { {} },
55 );
910a0a6d 56
4a8828f0 57has 'wit_list_separator' => (
7854e12e 58 is => 'rw',
59 isa => 'Str',
60 default => ', ',
61 );
62
63has 'baselabel' => (
64 is => 'rw',
65 isa => 'Str',
66 default => 'base text',
67 );
4a8828f0 68
15d2d3df 69has 'linear' => (
70 is => 'rw',
71 isa => 'Bool',
72 default => 1,
73 );
c84275ff 74
ef9d481f 75has 'ac_label' => (
76 is => 'rw',
77 isa => 'Str',
78 default => ' (a.c.)',
79 );
3a2ebbf4 80
4e483aa5 81has 'wordsep' => (
82 is => 'rw',
83 isa => 'Str',
84 default => ' ',
85 );
86
3a2ebbf4 87has 'start' => (
88 is => 'ro',
89 isa => 'Text::Tradition::Collation::Reading',
90 writer => '_set_start',
91 weak_ref => 1,
92 );
93
94has 'end' => (
95 is => 'ro',
96 isa => 'Text::Tradition::Collation::Reading',
97 writer => '_set_end',
98 weak_ref => 1,
99 );
b365fbae 100
101has 'cached_svg' => (
102 is => 'rw',
103 isa => 'Str',
104 predicate => 'has_cached_svg',
105 clearer => 'wipe_svg',
106 );
1dd07bda 107
108has 'cached_table' => (
109 is => 'rw',
110 isa => 'HashRef',
111 predicate => 'has_cached_table',
112 clearer => 'wipe_table',
113 );
c1915ab9 114
115has '_graphcalc_done' => (
116 is => 'rw',
117 isa => 'Bool',
118 default => undef,
119 );
1f563ac3 120
4e5a7b2c 121=head1 NAME
122
123Text::Tradition::Collation - a software model for a text collation
124
125=head1 SYNOPSIS
126
127 use Text::Tradition;
128 my $t = Text::Tradition->new(
129 'name' => 'this is a text',
130 'input' => 'TEI',
131 'file' => '/path/to/tei_parallel_seg_file.xml' );
132
133 my $c = $t->collation;
134 my @readings = $c->readings;
135 my @paths = $c->paths;
136 my @relationships = $c->relationships;
137
138 my $svg_variant_graph = $t->collation->as_svg();
139
140=head1 DESCRIPTION
141
142Text::Tradition is a library for representation and analysis of collated
143texts, particularly medieval ones. The Collation is the central feature of
144a Tradition, where the text, its sequence of readings, and its relationships
145between readings are actually kept.
146
147=head1 CONSTRUCTOR
148
149=head2 new
150
151The constructor. Takes a hash or hashref of the following arguments:
152
153=over
154
155=item * tradition - The Text::Tradition object to which the collation
156belongs. Required.
157
158=item * linear - Whether the collation should be linear; that is, whether
159transposed readings should be treated as two linked readings rather than one,
160and therefore whether the collation graph is acyclic. Defaults to true.
161
4e5a7b2c 162=item * baselabel - The default label for the path taken by a base text
163(if any). Defaults to 'base text'.
164
165=item * wit_list_separator - The string to join a list of witnesses for
166purposes of making labels in display graphs. Defaults to ', '.
167
168=item * ac_label - The extra label to tack onto a witness sigil when
169representing another layer of path for the given witness - that is, when
170a text has more than one possible reading due to scribal corrections or
171the like. Defaults to ' (a.c.)'.
172
4e483aa5 173=item * wordsep - The string used to separate words in the original text.
174Defaults to ' '.
175
4e5a7b2c 176=back
177
178=head1 ACCESSORS
179
180=head2 tradition
181
182=head2 linear
183
4e5a7b2c 184=head2 wit_list_separator
185
186=head2 baselabel
187
188=head2 ac_label
189
4e483aa5 190=head2 wordsep
191
4e5a7b2c 192Simple accessors for collation attributes.
193
194=head2 start
195
196The meta-reading at the start of every witness path.
197
198=head2 end
199
200The meta-reading at the end of every witness path.
201
202=head2 readings
203
204Returns all Reading objects in the graph.
205
206=head2 reading( $id )
207
208Returns the Reading object corresponding to the given ID.
209
210=head2 add_reading( $reading_args )
211
212Adds a new reading object to the collation.
213See L<Text::Tradition::Collation::Reading> for the available arguments.
214
215=head2 del_reading( $object_or_id )
216
217Removes the given reading from the collation, implicitly removing its
218paths and relationships.
219
4e483aa5 220=head2 merge_readings( $main, $second, $concatenate, $with_str )
221
222Merges the $second reading into the $main one. If $concatenate is true, then
223the merged node will carry the text of both readings, concatenated with either
224$with_str (if specified) or a sensible default (the empty string if the
225appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 226
4e483aa5 227The first two arguments may be either readings or reading IDs.
4e5a7b2c 228
229=head2 has_reading( $id )
230
231Predicate to see whether a given reading ID is in the graph.
232
233=head2 reading_witnesses( $object_or_id )
234
235Returns a list of sigils whose witnesses contain the reading.
236
237=head2 paths
238
239Returns all reading paths within the document - that is, all edges in the
240collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
241
242=head2 add_path( $source, $target, $sigil )
243
244Links the given readings in the collation in sequence, under the given witness
245sigil. The readings may be specified by object or ID.
246
247=head2 del_path( $source, $target, $sigil )
248
249Links the given readings in the collation in sequence, under the given witness
250sigil. The readings may be specified by object or ID.
251
252=head2 has_path( $source, $target );
253
254Returns true if the two readings are linked in sequence in any witness.
255The readings may be specified by object or ID.
256
257=head2 relationships
258
259Returns all Relationship objects in the collation.
260
261=head2 add_relationship( $reading, $other_reading, $options )
262
263Adds a new relationship of the type given in $options between the two readings,
264which may be specified by object or ID. Returns a value of ( $status, @vectors)
265where $status is true on success, and @vectors is a list of relationship edges
266that were ultimately added.
267See L<Text::Tradition::Collation::Relationship> for the available options.
268
269=cut
dd3b58b0 270
d047cd52 271sub BUILD {
3a2ebbf4 272 my $self = shift;
22222af9 273 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 274 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
275 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 276}
784877d9 277
3a2ebbf4 278### Reading construct/destruct functions
279
280sub add_reading {
281 my( $self, $reading ) = @_;
282 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
283 my %args = %$reading;
284 $reading = Text::Tradition::Collation::Reading->new(
285 'collation' => $self,
286 %args );
287 }
288 # First check to see if a reading with this ID exists.
289 if( $self->reading( $reading->id ) ) {
63778331 290 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 291 }
c1915ab9 292 $self->_graphcalc_done(0);
3a2ebbf4 293 $self->_add_reading( $reading->id => $reading );
294 # Once the reading has been added, put it in both graphs.
295 $self->sequence->add_vertex( $reading->id );
22222af9 296 $self->relations->add_reading( $reading->id );
3a2ebbf4 297 return $reading;
eca16057 298};
299
3a2ebbf4 300around del_reading => sub {
301 my $orig = shift;
302 my $self = shift;
303 my $arg = shift;
304
305 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
306 $arg = $arg->id;
307 }
3a2ebbf4 308 # Remove the reading from the graphs.
c1915ab9 309 $self->_graphcalc_done(0);
4e483aa5 310 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 311 $self->sequence->delete_vertex( $arg );
22222af9 312 $self->relations->delete_reading( $arg );
3a2ebbf4 313
314 # Carry on.
315 $self->$orig( $arg );
316};
7854e12e 317
4e483aa5 318=begin testing
319
320use Text::Tradition;
321
322my $cxfile = 't/data/Collatex-16.xml';
323my $t = Text::Tradition->new(
324 'name' => 'inline',
325 'input' => 'CollateX',
326 'file' => $cxfile,
327 );
328my $c = $t->collation;
329
330my $rno = scalar $c->readings;
331# Split n21 for testing purposes
332my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
333my $old_r = $c->reading( 'n21' );
334$old_r->alter_text( 'to' );
335$c->del_path( 'n20', 'n21', 'A' );
336$c->add_path( 'n20', 'n21p0', 'A' );
337$c->add_path( 'n21p0', 'n21', 'A' );
338$c->flatten_ranks();
339ok( $c->reading( 'n21p0' ), "New reading exists" );
340is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
341
679f17e1 342# Combine n3 and n4 ( with his )
4e483aa5 343$c->merge_readings( 'n3', 'n4', 1 );
344ok( !$c->reading('n4'), "Reading n4 is gone" );
345is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
346
679f17e1 347# Collapse n9 and n10 ( rood / root )
348$c->merge_readings( 'n9', 'n10' );
349ok( !$c->reading('n10'), "Reading n10 is gone" );
350is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 351
352# Combine n21 and n21p0
353my $remaining = $c->reading('n21');
354$remaining ||= $c->reading('n22'); # one of these should still exist
355$c->merge_readings( 'n21p0', $remaining, 1 );
356ok( !$c->reading('n21'), "Reading $remaining is gone" );
357is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
358
359=end testing
360
361=cut
7854e12e 362
3a2ebbf4 363sub merge_readings {
364 my $self = shift;
365
366 # We only need the IDs for adding paths to the graph, not the reading
367 # objects themselves.
4e483aa5 368 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 369 $self->_graphcalc_done(0);
3a2ebbf4 370
371 # The kept reading should inherit the paths and the relationships
372 # of the deleted reading.
373 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
374 my @vector = ( $kept );
375 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
376 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 377 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 378 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
379 $self->sequence->add_edge( @vector );
380 my $fwits = $self->sequence->get_edge_attributes( @vector );
381 @wits{keys %$fwits} = values %$fwits;
382 $self->sequence->set_edge_attributes( @vector, \%wits );
383 }
22222af9 384 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 385
386 # Do the deletion deed.
4e483aa5 387 if( $combine ) {
49d4f2ac 388 my $kept_obj = $self->reading( $kept );
4e483aa5 389 my $del_obj = $self->reading( $deleted );
390 my $joinstr = $combine_char;
391 unless( defined $joinstr ) {
392 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
393 $joinstr = $self->wordsep unless defined $joinstr;
394 }
395 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 396 }
3a2ebbf4 397 $self->del_reading( $deleted );
398}
7854e12e 399
3265b0ce 400
3a2ebbf4 401# Helper function for manipulating the graph.
402sub _stringify_args {
4e483aa5 403 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 404 $first = $first->id
405 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
406 $second = $second->id
407 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 408 return( $first, $second, @args );
3a2ebbf4 409}
df6d9812 410
4e5a7b2c 411# Helper function for manipulating the graph.
412sub _objectify_args {
413 my( $self, $first, $second, $arg ) = @_;
414 $first = $self->reading( $first )
415 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
416 $second = $self->reading( $second )
417 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
418 return( $first, $second, $arg );
419}
3a2ebbf4 420### Path logic
421
422sub add_path {
423 my $self = shift;
424
425 # We only need the IDs for adding paths to the graph, not the reading
426 # objects themselves.
427 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
428
c1915ab9 429 $self->_graphcalc_done(0);
3a2ebbf4 430 # Connect the readings
431 $self->sequence->add_edge( $source, $target );
432 # Note the witness in question
433 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 434};
435
3a2ebbf4 436sub del_path {
437 my $self = shift;
49d4f2ac 438 my @args;
439 if( ref( $_[0] ) eq 'ARRAY' ) {
440 my $e = shift @_;
441 @args = ( @$e, @_ );
442 } else {
443 @args = @_;
444 }
3a2ebbf4 445
446 # We only need the IDs for adding paths to the graph, not the reading
447 # objects themselves.
49d4f2ac 448 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 449
c1915ab9 450 $self->_graphcalc_done(0);
3a2ebbf4 451 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 452 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 453 }
454 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
455 $self->sequence->delete_edge( $source, $target );
456 }
784877d9 457}
458
3a2ebbf4 459
15d2d3df 460# Extra graph-alike utility
461sub has_path {
3a2ebbf4 462 my $self = shift;
463 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
464 return undef unless $self->sequence->has_edge( $source, $target );
465 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 466}
467
4e5a7b2c 468=head2 clear_witness( @sigil_list )
3a2ebbf4 469
4e5a7b2c 470Clear the given witnesses out of the collation entirely, removing references
471to them in paths, and removing readings that belong only to them. Should only
472be called via $tradition->del_witness.
3a2ebbf4 473
474=cut
475
4e5a7b2c 476sub clear_witness {
477 my( $self, @sigils ) = @_;
478
c1915ab9 479 $self->_graphcalc_done(0);
4e5a7b2c 480 # Clear the witness(es) out of the paths
481 foreach my $e ( $self->paths ) {
482 foreach my $sig ( @sigils ) {
483 $self->del_path( $e, $sig );
484 }
485 }
486
487 # Clear out the newly unused readings
488 foreach my $r ( $self->readings ) {
489 unless( $self->reading_witnesses( $r ) ) {
490 $self->del_reading( $r );
491 }
492 }
493}
3a2ebbf4 494
495sub add_relationship {
496 my $self = shift;
22222af9 497 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 498 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 499 $self->reading( $source ), $target, $self->reading( $target ), $opts );
c1915ab9 500 $self->_graphcalc_done(0);
63778331 501 return @vectors;
22222af9 502}
ef9d481f 503
ca6e6095 504around qw/ get_relationship del_relationship / => sub {
505 my $orig = shift;
506 my $self = shift;
507 my @args = @_;
508 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
509 @args = @{$_[0]};
510 }
511 my( $source, $target ) = $self->_stringify_args( @args );
512 $self->$orig( $source, $target );
513};
514
22222af9 515=head2 reading_witnesses( $reading )
910a0a6d 516
22222af9 517Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 518
22222af9 519=cut
1d310495 520
1d310495 521sub reading_witnesses {
522 my( $self, $reading ) = @_;
523 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 524 # arbitrarily chosen "incoming". Thus, special-case the start node.
525 if( $reading eq $self->start ) {
526 return map { $_->sigil } $self->tradition->witnesses;
527 }
1d310495 528 my %all_witnesses;
529 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
530 my $wits = $self->sequence->get_edge_attributes( @$e );
531 @all_witnesses{ keys %$wits } = 1;
532 }
c12bb878 533 my $acstr = $self->ac_label;
534 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
535 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
536 }
1d310495 537 return keys %all_witnesses;
910a0a6d 538}
539
4e5a7b2c 540=head1 OUTPUT METHODS
8e1394aa 541
0ecb975c 542=head2 as_svg( \%options )
8e1394aa 543
0068967c 544Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 545See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 546
547=cut
548
549sub as_svg {
0ecb975c 550 my( $self, $opts ) = @_;
bfcbcecb 551 throw( "Need GraphViz installed to output SVG" )
552 unless File::Which::which( 'dot' );
e247aad1 553 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 554 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 555 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 556 my @cmd = qw/dot -Tsvg/;
557 my( $svg, $err );
558 my $dotfile = File::Temp->new();
559 ## USE FOR DEBUGGING
560 # $dotfile->unlink_on_destroy(0);
561 binmode $dotfile, ':utf8';
562 print $dotfile $self->as_dot( $opts );
563 push( @cmd, $dotfile->filename );
564 run( \@cmd, ">", binary(), \$svg );
e247aad1 565 $svg = decode_utf8( $svg );
566 $self->cached_svg( $svg ) unless $want_subgraph;
567 return $svg;
568 } else {
569 return $self->cached_svg;
b365fbae 570 }
8e1394aa 571}
572
b22576c6 573
0ecb975c 574=head2 as_dot( \%options )
b22576c6 575
0ecb975c 576Returns a string that is the collation graph expressed in dot
577(i.e. GraphViz) format. Options include:
b22576c6 578
0ecb975c 579=over 4
b22576c6 580
0ecb975c 581=item * from
b22576c6 582
0ecb975c 583=item * to
df6d9812 584
0ecb975c 585=item * color_common
586
587=back
df6d9812 588
589=cut
590
591sub as_dot {
0ecb975c 592 my( $self, $opts ) = @_;
593 my $startrank = $opts->{'from'} if $opts;
594 my $endrank = $opts->{'to'} if $opts;
595 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 596 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
597 && $self->end->rank > 100;
6648ee3d 598 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 599
b22576c6 600 # Check the arguments
601 if( $startrank ) {
602 return if $endrank && $startrank > $endrank;
603 return if $startrank > $self->end->rank;
604 }
605 if( defined $endrank ) {
606 return if $endrank < 0;
f1b3b33a 607 $endrank = undef if $endrank == $self->end->rank;
b22576c6 608 }
609
67da8d6c 610 my $graph_name = $self->tradition->name;
611 $graph_name =~ s/[^\w\s]//g;
612 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 613
614 my %graph_attrs = (
615 'rankdir' => 'LR',
616 'bgcolor' => 'none',
617 );
618 my %node_attrs = (
b8990398 619 'fontsize' => 14,
f13b5582 620 'fillcolor' => 'white',
621 'style' => 'filled',
622 'shape' => 'ellipse'
623 );
624 my %edge_attrs = (
625 'arrowhead' => 'open',
626 'color' => '#000000',
627 'fontcolor' => '#000000',
628 );
629
67da8d6c 630 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 631 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
632 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 633
b22576c6 634 # Output substitute start/end readings if necessary
635 if( $startrank ) {
636 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
637 }
638 if( $endrank ) {
639 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
640 }
b365fbae 641 if( $STRAIGHTENHACK ) {
642 ## HACK part 1
6648ee3d 643 my $startlabel = $startrank ? 'SUBSTART' : 'START';
644 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 645 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
646 }
b22576c6 647 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 648 # Sort the readings by rank if we have ranks; this speeds layout.
649 my @all_readings = $self->end->has_rank
650 ? sort { $a->rank <=> $b->rank } $self->readings
651 : $self->readings;
4633f9e4 652 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 653 foreach my $reading ( @all_readings ) {
b22576c6 654 # Only output readings within our rank range.
655 next if $startrank && $reading->rank < $startrank;
656 next if $endrank && $reading->rank > $endrank;
657 $used{$reading->id} = 1;
910a0a6d 658 # Need not output nodes without separate labels
3a2ebbf4 659 next if $reading->id eq $reading->text;
d4b75f44 660 my $rattrs;
30f0df34 661 my $label = $reading->text;
629e27b0 662 $label .= '-' if $reading->join_next;
663 $label = "-$label" if $reading->join_prior;
8f9cab7b 664 $label =~ s/\"/\\\"/g;
d4b75f44 665 $rattrs->{'label'} = $label;
0ecb975c 666 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 667 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 668 }
3a2ebbf4 669
30ddc24c 670 # Add the real edges. Need to weight one edge per rank jump, in a
671 # continuous line.
b365fbae 672 # my $weighted = $self->_add_edge_weights;
b22576c6 673 my @edges = $self->paths;
3bdec618 674 my( %substart, %subend );
b22576c6 675 foreach my $edge ( @edges ) {
676 # Do we need to output this edge?
508fd430 677 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 678 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 679 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 680
b22576c6 681 # Account for the rank gap if necessary
30ddc24c 682 my $rank0 = $self->reading( $edge->[0] )->rank
683 if $self->reading( $edge->[0] )->has_rank;
684 my $rank1 = $self->reading( $edge->[1] )->rank
685 if $self->reading( $edge->[1] )->has_rank;
686 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
687 $variables->{'minlen'} = $rank1 - $rank0;
688 }
689
690 # Add the calculated edge weights
b365fbae 691 # if( exists $weighted->{$edge->[0]}
e247aad1 692 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
693 # # $variables->{'color'} = 'red';
694 # $variables->{'weight'} = 3.0;
695 # }
30ddc24c 696
508fd430 697 # EXPERIMENTAL: make edge width reflect no. of witnesses
698 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
699 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
700
f13b5582 701 my $varopts = _dot_attr_string( $variables );
702 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
703 $edge->[0], $edge->[1], $varopts );
3bdec618 704 } elsif( $used{$edge->[0]} ) {
705 $subend{$edge->[0]} = 1;
706 } elsif( $used{$edge->[1]} ) {
707 $substart{$edge->[1]} = 1;
b22576c6 708 }
df6d9812 709 }
3bdec618 710 # Add substitute start and end edges if necessary
711 foreach my $node ( keys %substart ) {
027d819c 712 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 713 my $variables = { %edge_attrs, 'label' => $witstr };
714 my $varopts = _dot_attr_string( $variables );
715 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 716 }
717 foreach my $node ( keys %subend ) {
027d819c 718 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 719 my $variables = { %edge_attrs, 'label' => $witstr };
720 my $varopts = _dot_attr_string( $variables );
721 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 722 }
b365fbae 723 # HACK part 2
724 if( $STRAIGHTENHACK ) {
6648ee3d 725 my $endlabel = $endrank ? 'SUBEND' : 'END';
726 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 727 }
30ddc24c 728
df6d9812 729 $dot .= "}\n";
730 return $dot;
731}
732
f13b5582 733sub _dot_attr_string {
734 my( $hash ) = @_;
735 my @attrs;
736 foreach my $k ( sort keys %$hash ) {
737 my $v = $hash->{$k};
738 push( @attrs, $k.'="'.$v.'"' );
739 }
740 return( '[ ' . join( ', ', @attrs ) . ' ]' );
741}
742
30ddc24c 743sub _add_edge_weights {
744 my $self = shift;
745 # Walk the graph from START to END, choosing the successor node with
746 # the largest number of witness paths each time.
747 my $weighted = {};
748 my $curr = $self->start->id;
008fc8a6 749 my $ranked = $self->end->has_rank;
30ddc24c 750 while( $curr ne $self->end->id ) {
008fc8a6 751 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 752 my @succ = sort { $self->path_witnesses( $curr, $a )
753 <=> $self->path_witnesses( $curr, $b ) }
754 $self->sequence->successors( $curr );
755 my $next = pop @succ;
008fc8a6 756 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 757 # Try to avoid lacunae in the weighted path.
008fc8a6 758 while( @succ &&
759 ( $self->reading( $next )->is_lacuna ||
760 $nextrank - $rank > 1 ) ){
30ddc24c 761 $next = pop @succ;
762 }
763 $weighted->{$curr} = $next;
764 $curr = $next;
765 }
766 return $weighted;
767}
768
027d819c 769=head2 path_witnesses( $edge )
770
771Returns the list of sigils whose witnesses are associated with the given edge.
772The edge can be passed as either an array or an arrayref of ( $source, $target ).
773
774=cut
775
3a2ebbf4 776sub path_witnesses {
777 my( $self, @edge ) = @_;
778 # If edge is an arrayref, cope.
779 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
780 my $e = shift @edge;
781 @edge = @$e;
782 }
783 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 784 return @wits;
3a2ebbf4 785}
786
7f9f05e8 787# Helper function. Make a display label for the given witnesses, showing a.c.
788# witnesses only where the main witness is not also in the list.
027d819c 789sub _path_display_label {
508fd430 790 my $self = shift;
7f9f05e8 791 my %wits;
792 map { $wits{$_} = 1 } @_;
793
794 # If an a.c. wit is listed, remove it if the main wit is also listed.
795 # Otherwise keep it for explicit listing.
796 my $aclabel = $self->ac_label;
797 my @disp_ac;
798 foreach my $w ( sort keys %wits ) {
799 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
800 if( exists $wits{$1} ) {
801 delete $wits{$w};
802 } else {
803 push( @disp_ac, $w );
804 }
805 }
806 }
807
808 # See if we are in a majority situation.
8f9cab7b 809 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 810 if( scalar keys %wits > $maj ) {
811 unshift( @disp_ac, 'majority' );
812 return join( ', ', @disp_ac );
8f9cab7b 813 } else {
7f9f05e8 814 return join( ', ', sort keys %wits );
8f9cab7b 815 }
816}
1dd07bda 817
bf6e338d 818=head2 readings_at_rank( $rank )
1dd07bda 819
bf6e338d 820Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 821
822=cut
823
bf6e338d 824sub readings_at_rank {
1dd07bda 825 my( $self, $rank ) = @_;
bf6e338d 826 my $table = $self->alignment_table;
827 # Table rank is real rank - 1.
828 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
829 my %readings;
830 foreach my $e ( @elements ) {
831 next unless ref( $e ) eq 'HASH';
832 next unless exists $e->{'t'};
833 $readings{$e->{'t'}->id} = $e->{'t'};
834 }
835 return values %readings;
1dd07bda 836}
8f9cab7b 837
4e5a7b2c 838=head2 as_graphml
8e1394aa 839
4e5a7b2c 840Returns a GraphML representation of the collation. The GraphML will contain
841two graphs. The first expresses the attributes of the readings and the witness
842paths that link them; the second expresses the relationships that link the
843readings. This is the native transfer format for a tradition.
8e1394aa 844
56eefa04 845=begin testing
846
847use Text::Tradition;
848
849my $READINGS = 311;
850my $PATHS = 361;
851
852my $datafile = 't/data/florilegium_tei_ps.xml';
853my $tradition = Text::Tradition->new( 'input' => 'TEI',
854 'name' => 'test0',
855 'file' => $datafile,
856 'linear' => 1 );
857
858ok( $tradition, "Got a tradition object" );
859is( scalar $tradition->witnesses, 13, "Found all witnesses" );
860ok( $tradition->collation, "Tradition has a collation" );
861
862my $c = $tradition->collation;
863is( scalar $c->readings, $READINGS, "Collation has all readings" );
864is( scalar $c->paths, $PATHS, "Collation has all paths" );
865is( scalar $c->relationships, 0, "Collation has all relationships" );
866
867# Add a few relationships
868$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
869$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
870$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
871
872# Now write it to GraphML and parse it again.
873
874my $graphml = $c->as_graphml;
875my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
876is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
877is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
878is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
879
880=end testing
881
8e1394aa 882=cut
883
884sub as_graphml {
a30ca502 885 my( $self, $options ) = @_;
3d14b48e 886 $self->calculate_ranks unless $self->_graphcalc_done;
887
a30ca502 888 my $start = $options->{'from'}
889 ? $self->reading( $options->{'from'} ) : $self->start;
890 my $end = $options->{'to'}
891 ? $self->reading( $options->{'to'} ) : $self->end;
892 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
893 throw( 'Start node must be before end node' );
894 }
895 # The readings need to be ranked for this to work.
896 $start = $self->start unless $start->has_rank;
897 $end = $self->end unless $end->has_rank;
898 my %use_readings;
899
8e1394aa 900 # Some namespaces
901 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
902 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
903 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 904 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 905
906 # Create the document and root node
907 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
908 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
909 $graphml->setDocumentElement( $root );
910 $root->setNamespace( $xsi_ns, 'xsi', 0 );
911 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 912
913 # List of attribute types to save on our objects and their corresponding
914 # GraphML types
915 my %save_types = (
916 'Str' => 'string',
917 'Int' => 'int',
918 'Bool' => 'boolean',
919 'RelationshipType' => 'string',
920 'RelationshipScope' => 'string',
921 );
922
923 # List of attribute names *not* to save on our objects.
924 # We will also not save any attribute beginning with _.
925 my %skipsave;
926 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 927
bbd064a9 928 # Add the data keys for the graph. Include an extra key 'version' for the
929 # GraphML output version.
e309421a 930 my %graph_data_keys;
931 my $gdi = 0;
bbd064a9 932 my %graph_attributes = ( 'version' => 'string' );
933 # Graph attributes include those of Tradition and those of Collation.
934 my %gattr_from;
935 my $tmeta = $self->tradition->meta;
936 my $cmeta = $self->meta;
937 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
938 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
939 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
940 next if $attr->name =~ /^_/;
941 next if $skipsave{$attr->name};
942 next unless $save_types{$attr->type_constraint->name};
943 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
944 }
945
946 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 947 $graph_data_keys{$datum} = 'dg'.$gdi++;
948 my $key = $root->addNewChild( $graphml_ns, 'key' );
949 $key->setAttribute( 'attr.name', $datum );
bbd064a9 950 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 951 $key->setAttribute( 'for', 'graph' );
952 $key->setAttribute( 'id', $graph_data_keys{$datum} );
953 }
f6066bac 954
bbd064a9 955 # Add the data keys for reading nodes
956 my %reading_attributes;
957 my $rmeta = Text::Tradition::Collation::Reading->meta;
958 foreach my $attr( $rmeta->get_all_attributes ) {
959 next if $attr->name =~ /^_/;
960 next if $skipsave{$attr->name};
961 next unless $save_types{$attr->type_constraint->name};
962 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
963 }
ef9d481f 964 my %node_data_keys;
965 my $ndi = 0;
bbd064a9 966 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 967 $node_data_keys{$datum} = 'dn'.$ndi++;
968 my $key = $root->addNewChild( $graphml_ns, 'key' );
969 $key->setAttribute( 'attr.name', $datum );
bbd064a9 970 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 971 $key->setAttribute( 'for', 'node' );
972 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 973 }
974
bbd064a9 975 # Add the data keys for edges, that is, paths and relationships. Path
976 # data does not come from a Moose class so is here manually.
ef9d481f 977 my $edi = 0;
978 my %edge_data_keys;
bbd064a9 979 my %edge_attributes = (
3a2ebbf4 980 witness => 'string', # ID/label for a path
3a2ebbf4 981 extra => 'boolean', # Path key
3a2ebbf4 982 );
bbd064a9 983 my @path_attributes = keys %edge_attributes; # track our manual additions
984 my $pmeta = Text::Tradition::Collation::Relationship->meta;
985 foreach my $attr( $pmeta->get_all_attributes ) {
986 next if $attr->name =~ /^_/;
987 next if $skipsave{$attr->name};
988 next unless $save_types{$attr->type_constraint->name};
989 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
990 }
991 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 992 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 993 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 994 $key->setAttribute( 'attr.name', $datum );
bbd064a9 995 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 996 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 997 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 998 }
3a2ebbf4 999
22222af9 1000 # Add the collation graph itself
2c669bca 1001 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1002 $sgraph->setAttribute( 'edgedefault', 'directed' );
1003 $sgraph->setAttribute( 'id', $self->tradition->name );
1004 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1005 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
1006 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1007 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
1008 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1009
e309421a 1010 # Collation attribute data
bbd064a9 1011 foreach my $datum ( keys %graph_attributes ) {
1012 my $value;
1013 if( $datum eq 'version' ) {
1014 $value = '3.1';
1015 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1016 $value = $self->tradition->$datum;
1017 } else {
1018 $value = $self->$datum;
1019 }
2c669bca 1020 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1021 }
8e1394aa 1022
1023 my $node_ctr = 0;
1024 my %node_hash;
22222af9 1025 # Add our readings to the graph
3a2ebbf4 1026 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1027 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1028 ( $n->rank < $start->rank || $n->rank > $end->rank );
1029 $use_readings{$n->id} = 1;
2c669bca 1030 # Add to the main graph
1031 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1032 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1033 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1034 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1035 foreach my $d ( keys %reading_attributes ) {
255875b8 1036 my $nval = $n->$d;
1037 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1038 if defined $nval;
1039 }
b15511bf 1040 }
1041
2c669bca 1042 # Add the path edges to the sequence graph
df6d9812 1043 my $edge_ctr = 0;
3a2ebbf4 1044 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1045 # We add an edge in the graphml for every witness in $e.
a30ca502 1046 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1047 my @edge_wits = sort $self->path_witnesses( $e );
1048 $e->[0] = $self->start unless $use_readings{$e->[0]};
1049 $e->[1] = $self->end unless $use_readings{$e->[1]};
1050 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1051 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1052 $node_hash{ $e->[0] },
1053 $node_hash{ $e->[1] } );
2c669bca 1054 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1055 $edge_el->setAttribute( 'source', $from );
1056 $edge_el->setAttribute( 'target', $to );
1057 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1058
1059 # It's a witness path, so add the witness
1060 my $base = $wit;
1061 my $key = $edge_data_keys{'witness'};
1062 # Is this an ante-corr witness?
1063 my $aclabel = $self->ac_label;
1064 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1065 # Keep the base witness
1066 $base = $1;
1067 # ...and record that this is an 'extra' reading path
1068 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1069 }
1070 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1071 }
1072 }
1073
22222af9 1074 # Add the relationship graph to the XML
bbd064a9 1075 map { delete $edge_data_keys{$_} } @path_attributes;
027d819c 1076 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 1077 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1078
94c00c71 1079 # Save and return the thing
1080 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1081 return $result;
df6d9812 1082}
1083
b15511bf 1084sub _add_graphml_data {
1085 my( $el, $key, $value ) = @_;
b15511bf 1086 return unless defined $value;
c9bf3dbf 1087 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1088 $data_el->setAttribute( 'key', $key );
1089 $data_el->appendText( $value );
8e1394aa 1090}
1091
4e5a7b2c 1092=head2 as_csv
910a0a6d 1093
1094Returns a CSV alignment table representation of the collation graph, one
2c669bca 1095row per witness (or witness uncorrected.)
910a0a6d 1096
1097=cut
1098
1099sub as_csv {
3a2ebbf4 1100 my( $self ) = @_;
1dd07bda 1101 my $table = $self->alignment_table;
82fa4d57 1102 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1103 my @result;
2c669bca 1104 # Make the header row
1105 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1106 push( @result, decode_utf8( $csv->string ) );
1107 # Make the rest of the rows
1108 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1109 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1110 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1111 $csv->combine( @row );
910a0a6d 1112 push( @result, decode_utf8( $csv->string ) );
1113 }
3a2ebbf4 1114 return join( "\n", @result );
910a0a6d 1115}
1116
1dd07bda 1117=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1118
566f4595 1119Return a reference to an alignment table, in a slightly enhanced CollateX
1120format which looks like this:
1121
1122 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1123 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1124 { witness => "SIG2",
4e5a7b2c 1125 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1126 ... ],
1127 length => TEXTLEN };
1128
1129If $use_refs is set to 1, the reading object is returned in the table
1130instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1131
1132If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1133keys have a true hash value will be included.
2c669bca 1134
1135=cut
9f3ba6f7 1136
1dd07bda 1137sub alignment_table {
1138 my( $self ) = @_;
c1915ab9 1139 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1140 return $self->cached_table if $self->has_cached_table;
1141
0ecb975c 1142 # Make sure we can do this
1143 throw( "Need a linear graph in order to make an alignment table" )
1144 unless $self->linear;
1145 $self->calculate_ranks unless $self->end->has_rank;
1146
2c669bca 1147 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1148 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1149 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1150 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1151 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1152 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1153 push( @{$table->{'alignment'}},
1154 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1155 if( $wit->is_layered ) {
1156 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1157 $wit->sigil.$self->ac_label );
1dd07bda 1158 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1159 push( @{$table->{'alignment'}},
1160 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1161 }
1162 }
1dd07bda 1163 $self->cached_table( $table );
1164 return $table;
910a0a6d 1165}
1166
1167sub _make_witness_row {
1dd07bda 1168 my( $path, $positions ) = @_;
910a0a6d 1169 my %char_hash;
1170 map { $char_hash{$_} = undef } @$positions;
2c669bca 1171 my $debug = 0;
910a0a6d 1172 foreach my $rdg ( @$path ) {
eca16057 1173 my $rtext = $rdg->text;
1174 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1175 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1176 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1177 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1178 }
1179 my @row = map { $char_hash{$_} } @$positions;
eca16057 1180 # Fill in lacuna markers for undef spots in the row
1181 my $last_el = shift @row;
1182 my @filled_row = ( $last_el );
1183 foreach my $el ( @row ) {
0e476982 1184 # If we are using node reference, make the lacuna node appear many times
1185 # in the table. If not, use the lacuna tag.
1dd07bda 1186 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1187 $el = $last_el;
eca16057 1188 }
1189 push( @filled_row, $el );
1190 $last_el = $el;
1191 }
1192 return @filled_row;
910a0a6d 1193}
1194
4e5a7b2c 1195=head1 NAVIGATION METHODS
910a0a6d 1196
4e5a7b2c 1197=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1198
1199Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1200with $last, for the witness given in $sigil. If a $backup sigil is
1201specified (e.g. when walking a layered witness), it will be used wherever
1202no $sigil path exists. If there is a base text reading, that will be
1203used wherever no path exists for $sigil or $backup.
e2902068 1204
1205=cut
1206
910a0a6d 1207# TODO Think about returning some lazy-eval iterator.
b0b4421a 1208# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1209
e2902068 1210sub reading_sequence {
861c3e27 1211 my( $self, $start, $end, $witness ) = @_;
e2902068 1212
930ff666 1213 $witness = $self->baselabel unless $witness;
e2902068 1214 my @readings = ( $start );
1215 my %seen;
1216 my $n = $start;
3a2ebbf4 1217 while( $n && $n->id ne $end->id ) {
1218 if( exists( $seen{$n->id} ) ) {
63778331 1219 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1220 }
3a2ebbf4 1221 $seen{$n->id} = 1;
910a0a6d 1222
861c3e27 1223 my $next = $self->next_reading( $n, $witness );
44771cf2 1224 unless( $next ) {
63778331 1225 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1226 }
910a0a6d 1227 push( @readings, $next );
1228 $n = $next;
e2902068 1229 }
1230 # Check that the last reading is our end reading.
1231 my $last = $readings[$#readings];
63778331 1232 throw( "Last reading found from " . $start->text .
1233 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1234 unless $last->id eq $end->id;
e2902068 1235
1236 return @readings;
1237}
1238
4e5a7b2c 1239=head2 next_reading( $reading, $sigil );
8e1394aa 1240
4a8828f0 1241Returns the reading that follows the given reading along the given witness
930ff666 1242path.
8e1394aa 1243
1244=cut
1245
4a8828f0 1246sub next_reading {
e2902068 1247 # Return the successor via the corresponding path.
8e1394aa 1248 my $self = shift;
3a2ebbf4 1249 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1250 return undef unless $answer;
3a2ebbf4 1251 return $self->reading( $answer );
8e1394aa 1252}
1253
4e5a7b2c 1254=head2 prior_reading( $reading, $sigil )
8e1394aa 1255
4a8828f0 1256Returns the reading that precedes the given reading along the given witness
930ff666 1257path.
8e1394aa 1258
1259=cut
1260
4a8828f0 1261sub prior_reading {
e2902068 1262 # Return the predecessor via the corresponding path.
8e1394aa 1263 my $self = shift;
3a2ebbf4 1264 my $answer = $self->_find_linked_reading( 'prior', @_ );
1265 return $self->reading( $answer );
8e1394aa 1266}
1267
4a8828f0 1268sub _find_linked_reading {
861c3e27 1269 my( $self, $direction, $node, $path ) = @_;
1270
1271 # Get a backup if we are dealing with a layered witness
1272 my $alt_path;
1273 my $aclabel = $self->ac_label;
1274 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1275 $alt_path = $1;
1276 }
1277
e2902068 1278 my @linked_paths = $direction eq 'next'
3a2ebbf4 1279 ? $self->sequence->edges_from( $node )
1280 : $self->sequence->edges_to( $node );
e2902068 1281 return undef unless scalar( @linked_paths );
8e1394aa 1282
e2902068 1283 # We have to find the linked path that contains all of the
1284 # witnesses supplied in $path.
1285 my( @path_wits, @alt_path_wits );
4e5a7b2c 1286 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1287 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1288 my $base_le;
1289 my $alt_le;
1290 foreach my $le ( @linked_paths ) {
3a2ebbf4 1291 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1292 $base_le = $le;
910a0a6d 1293 }
508fd430 1294 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1295 if( _is_within( \@path_wits, \@le_wits ) ) {
1296 # This is the right path.
1297 return $direction eq 'next' ? $le->[1] : $le->[0];
1298 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1299 $alt_le = $le;
1300 }
8e1394aa 1301 }
e2902068 1302 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1303 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1304 if $alt_le;
e2902068 1305
1306 # Got this far? Return the base path if it exists.
3a2ebbf4 1307 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1308 if $base_le;
e2902068 1309
1310 # Got this far? We have no appropriate path.
2c669bca 1311 warn "Could not find $direction node from " . $node->id
910a0a6d 1312 . " along path $path";
8e1394aa 1313 return undef;
1314}
1315
4a8828f0 1316# Some set logic.
1317sub _is_within {
1318 my( $set1, $set2 ) = @_;
7854e12e 1319 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1320 foreach my $el ( @$set1 ) {
910a0a6d 1321 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1322 }
1323 return $ret;
1324}
1325
4e5a7b2c 1326# Return the string that joins together a list of witnesses for
1327# display on a single path.
1328sub _witnesses_of_label {
1329 my( $self, $label ) = @_;
1330 my $regex = $self->wit_list_separator;
1331 my @answer = split( /\Q$regex\E/, $label );
1332 return @answer;
b0b4421a 1333}
1334
d4b75f44 1335=head2 common_readings
1336
1337Returns the list of common readings in the graph (i.e. those readings that are
1338shared by all non-lacunose witnesses.)
1339
1340=cut
1341
1342sub common_readings {
1343 my $self = shift;
1344 my @common = grep { $_->is_common } $self->readings;
1345 return @common;
1346}
1347
fae52efd 1348=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1349
1350Returns the text of a witness (plus its backup, if we are using a layer)
1351as stored in the collation. The text is returned as a string, where the
1352individual readings are joined with spaces and the meta-readings (e.g.
1353lacunae) are omitted. Optional specification of $start and $end allows
1354the generation of a subset of the witness text.
4e5a7b2c 1355
b0b4421a 1356=cut
1357
1358sub path_text {
861c3e27 1359 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1360 $start = $self->start unless $start;
1361 $end = $self->end unless $end;
861c3e27 1362 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1363 my $pathtext = '';
1364 my $last;
1365 foreach my $r ( @path ) {
1366 if( $r->join_prior || !$last || $last->join_next ) {
1367 $pathtext .= $r->text;
1368 } else {
1369 $pathtext .= ' ' . $r->text;
1370 }
1371 $last = $r;
1372 }
1373 return $pathtext;
b0b4421a 1374}
4e5a7b2c 1375
1376=head1 INITIALIZATION METHODS
1377
1378These are mostly for use by parsers.
1379
1380=head2 make_witness_path( $witness )
1381
1382Link the array of readings contained in $witness->path (and in
1383$witness->uncorrected_path if it exists) into collation paths.
1384Clear out the arrays when finished.
de51424a 1385
4e5a7b2c 1386=head2 make_witness_paths
1387
1388Call make_witness_path for all witnesses in the tradition.
1389
1390=cut
930ff666 1391
7e450e44 1392# For use when a collation is constructed from a base text and an apparatus.
1393# We have the sequences of readings and just need to add path edges.
1f7aa795 1394# When we are done, clear out the witness path attributes, as they are no
1395# longer needed.
1396# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1397
6a222840 1398sub make_witness_paths {
1399 my( $self ) = @_;
910a0a6d 1400 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1401 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1402 $self->make_witness_path( $wit );
7854e12e 1403 }
7854e12e 1404}
1405
6a222840 1406sub make_witness_path {
7854e12e 1407 my( $self, $wit ) = @_;
1408 my @chain = @{$wit->path};
15d2d3df 1409 my $sig = $wit->sigil;
fae52efd 1410 # Add start and end if necessary
1411 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1412 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1413 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1414 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1415 }
1f7aa795 1416 if( $wit->is_layered ) {
d9e873d0 1417 @chain = @{$wit->uncorrected_path};
fae52efd 1418 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1419 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1420 foreach my $idx( 0 .. $#chain-1 ) {
1421 my $source = $chain[$idx];
1422 my $target = $chain[$idx+1];
1423 $self->add_path( $source, $target, $sig.$self->ac_label )
1424 unless $self->has_path( $source, $target, $sig );
1425 }
15d2d3df 1426 }
1f7aa795 1427 $wit->clear_path;
1428 $wit->clear_uncorrected_path;
e2902068 1429}
1430
4e5a7b2c 1431=head2 calculate_ranks
1432
1433Calculate the reading ranks (that is, their aligned positions relative
1434to each other) for the graph. This can only be called on linear collations.
1435
b365fbae 1436=begin testing
1437
1438use Text::Tradition;
1439
1440my $cxfile = 't/data/Collatex-16.xml';
1441my $t = Text::Tradition->new(
1442 'name' => 'inline',
1443 'input' => 'CollateX',
1444 'file' => $cxfile,
1445 );
1446my $c = $t->collation;
1447
1448# Make an svg
bfcbcecb 1449my $table = $c->alignment_table;
1450ok( $c->has_cached_table, "Alignment table was cached" );
1451is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1452$c->calculate_ranks;
bfcbcecb 1453is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1454$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1455isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1456
1457=end testing
1458
4e5a7b2c 1459=cut
1460
910a0a6d 1461sub calculate_ranks {
1462 my $self = shift;
b365fbae 1463 # Save the existing ranks, in case we need to invalidate the cached SVG.
1464 my %existing_ranks;
910a0a6d 1465 # Walk a version of the graph where every node linked by a relationship
1466 # edge is fundamentally the same node, and do a topological ranking on
1467 # the nodes in this graph.
c9bf3dbf 1468 my $topo_graph = Graph->new();
910a0a6d 1469 my %rel_containers;
1470 my $rel_ctr = 0;
1471 # Add the nodes
1472 foreach my $r ( $self->readings ) {
3a2ebbf4 1473 next if exists $rel_containers{$r->id};
910a0a6d 1474 my @rels = $r->related_readings( 'colocated' );
1475 if( @rels ) {
1476 # Make a relationship container.
1477 push( @rels, $r );
c9bf3dbf 1478 my $rn = 'rel_container_' . $rel_ctr++;
1479 $topo_graph->add_vertex( $rn );
910a0a6d 1480 foreach( @rels ) {
3a2ebbf4 1481 $rel_containers{$_->id} = $rn;
910a0a6d 1482 }
1483 } else {
1484 # Add a new node to mirror the old node.
3a2ebbf4 1485 $rel_containers{$r->id} = $r->id;
1486 $topo_graph->add_vertex( $r->id );
910a0a6d 1487 }
4a8828f0 1488 }
3a1f2523 1489
3a2ebbf4 1490 # Add the edges.
910a0a6d 1491 foreach my $r ( $self->readings ) {
b365fbae 1492 $existing_ranks{$r} = $r->rank;
3a2ebbf4 1493 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1494 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1495 $rel_containers{$n} );
4e5a7b2c 1496 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1497 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1498 }
1499 }
1500
1501 # Now do the rankings, starting with the start node.
3a2ebbf4 1502 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1503 my $node_ranks = { $topo_start => 0 };
910a0a6d 1504 my @curr_origin = ( $topo_start );
1505 # A little iterative function.
1506 while( @curr_origin ) {
c9bf3dbf 1507 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1508 }
1509 # Transfer our rankings from the topological graph to the real one.
1510 foreach my $r ( $self->readings ) {
3a2ebbf4 1511 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1512 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1513 } else {
63778331 1514 # Die. Find the last rank we calculated.
7ec89b0e 1515 my @all_defined = sort { ( $node_ranks->{$rel_containers{$a->id}}||-1 )
1516 <=> ( $node_ranks->{$rel_containers{$b->id}}||-1 ) }
63778331 1517 $self->readings;
1518 my $last = pop @all_defined;
1519 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1520 }
de51424a 1521 }
bfcbcecb 1522 # Do we need to invalidate the cached data?
1523 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1524 foreach my $r ( $self->readings ) {
7c293912 1525 next if defined( $existing_ranks{$r} )
1526 && $existing_ranks{$r} == $r->rank;
c1915ab9 1527 # Something has changed, so clear the cache
bfcbcecb 1528 $self->_clear_cache;
c1915ab9 1529 # ...and recalculate the common readings.
1530 $self->calculate_common_readings();
b365fbae 1531 last;
1532 }
1533 }
c1915ab9 1534 # The graph calculation information is now up to date.
1535 $self->_graphcalc_done(1);
8e1394aa 1536}
3a1f2523 1537
910a0a6d 1538sub _assign_rank {
c9bf3dbf 1539 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1540 # Look at each of the children of @current_nodes. If all the child's
1541 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1542 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1543 # parent gets a rank.
910a0a6d 1544 my @next_nodes;
1545 foreach my $c ( @current_nodes ) {
c9bf3dbf 1546 warn "Current reading $c has no rank!"
1547 unless exists $node_ranks->{$c};
1548 # print STDERR "Looking at child of node $c, rank "
1549 # . $node_ranks->{$c} . "\n";
1550 foreach my $child ( $graph->successors( $c ) ) {
1551 next if exists $node_ranks->{$child};
910a0a6d 1552 my $highest_rank = -1;
1553 my $skip = 0;
c9bf3dbf 1554 foreach my $parent ( $graph->predecessors( $child ) ) {
1555 if( exists $node_ranks->{$parent} ) {
1556 $highest_rank = $node_ranks->{$parent}
1557 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1558 } else {
1559 $skip = 1;
1560 last;
1561 }
1562 }
1563 next if $skip;
c9bf3dbf 1564 my $c_rank = $highest_rank + 1;
1565 # print STDERR "Assigning rank $c_rank to node $child \n";
1566 $node_ranks->{$child} = $c_rank;
910a0a6d 1567 push( @next_nodes, $child );
1568 }
1569 }
1570 return @next_nodes;
4cdd82f1 1571}
910a0a6d 1572
c1915ab9 1573sub _clear_cache {
1574 my $self = shift;
1575 $self->wipe_svg if $self->has_cached_svg;
1576 $self->wipe_table if $self->has_cached_table;
1577}
1578
1579
4e5a7b2c 1580=head2 flatten_ranks
1581
1582A convenience method for parsing collation data. Searches the graph for readings
1583with the same text at the same rank, and merges any that are found.
1584
1585=cut
1586
0e476982 1587sub flatten_ranks {
1588 my $self = shift;
1589 my %unique_rank_rdg;
bf6e338d 1590 my $changed;
0e476982 1591 foreach my $rdg ( $self->readings ) {
1592 next unless $rdg->has_rank;
1593 my $key = $rdg->rank . "||" . $rdg->text;
1594 if( exists $unique_rank_rdg{$key} ) {
1595 # Combine!
56eefa04 1596 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1597 $changed = 1;
0e476982 1598 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1599 # TODO see if this now makes a common point.
0e476982 1600 } else {
1601 $unique_rank_rdg{$key} = $rdg;
1602 }
1603 }
bf6e338d 1604 # If we merged readings, the ranks are still fine but the alignment
1605 # table is wrong. Wipe it.
1606 $self->wipe_table() if $changed;
0e476982 1607}
4633f9e4 1608
1609
d4b75f44 1610=head2 calculate_common_readings
1611
1612Goes through the graph identifying the readings that appear in every witness
1613(apart from those with lacunae at that spot.) Marks them as common and returns
1614the list.
1615
1616=begin testing
1617
1618use Text::Tradition;
1619
1620my $cxfile = 't/data/Collatex-16.xml';
1621my $t = Text::Tradition->new(
1622 'name' => 'inline',
1623 'input' => 'CollateX',
1624 'file' => $cxfile,
1625 );
1626my $c = $t->collation;
1627
1628my @common = $c->calculate_common_readings();
1629is( scalar @common, 8, "Found correct number of common readings" );
1630my @marked = sort $c->common_readings();
1631is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1632my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1633is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1634
1635=end testing
1636
1637=cut
1638
1639sub calculate_common_readings {
1640 my $self = shift;
1641 my @common;
c1915ab9 1642 map { $_->is_common( 0 ) } $self->readings;
1643 # Implicitly calls calculate_ranks
1dd07bda 1644 my $table = $self->alignment_table;
d4b75f44 1645 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1646 my @row = map { $_->{'tokens'}->[$idx]
1647 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1648 @{$table->{'alignment'}};
d4b75f44 1649 my %hash;
1650 foreach my $r ( @row ) {
1651 if( $r ) {
1652 $hash{$r->id} = $r unless $r->is_meta;
1653 } else {
1654 $hash{'UNDEF'} = $r;
1655 }
1656 }
1657 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1658 my( $r ) = values %hash;
1659 $r->is_common( 1 );
1660 push( @common, $r );
1661 }
1662 }
1663 return @common;
1664}
1665
861c3e27 1666=head2 text_from_paths
1667
1668Calculate the text array for all witnesses from the path, for later consistency
1669checking. Only to be used if there is no non-graph-based way to know the
1670original texts.
1671
1672=cut
1673
1674sub text_from_paths {
1675 my $self = shift;
1676 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1677 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1678 my @text;
1679 foreach my $r ( @readings ) {
1680 next if $r->is_meta;
1681 push( @text, $r->text );
1682 }
861c3e27 1683 $wit->text( \@text );
1684 if( $wit->is_layered ) {
5164a6f0 1685 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1686 $wit->sigil.$self->ac_label );
1687 my @uctext;
1688 foreach my $r ( @ucrdgs ) {
1689 next if $r->is_meta;
1690 push( @uctext, $r->text );
1691 }
1692 $wit->layertext( \@uctext );
861c3e27 1693 }
1694 }
1695}
0e476982 1696
4e5a7b2c 1697=head1 UTILITY FUNCTIONS
1698
1699=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1700
4e5a7b2c 1701Find the last reading that occurs in sequence before both the given readings.
1702
1703=head2 common_successor( $reading_a, $reading_b )
1704
1705Find the first reading that occurs in sequence after both the given readings.
1706
22222af9 1707=begin testing
1708
1709use Text::Tradition;
1710
1711my $cxfile = 't/data/Collatex-16.xml';
1712my $t = Text::Tradition->new(
1713 'name' => 'inline',
1714 'input' => 'CollateX',
1715 'file' => $cxfile,
1716 );
1717my $c = $t->collation;
1718
679f17e1 1719is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1720 'n20', "Found correct common predecessor" );
679f17e1 1721is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1722 '#END#', "Found correct common successor" );
1723
4e5a7b2c 1724is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1725 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1726is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1727 '#END#', "Found correct common successor for readings on same path" );
1728
1729=end testing
1730
1731=cut
1732
1733## Return the closest reading that is a predecessor of both the given readings.
1734sub common_predecessor {
1735 my $self = shift;
4e5a7b2c 1736 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1737 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1738}
1739
1740sub common_successor {
1741 my $self = shift;
4e5a7b2c 1742 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1743 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1744}
1745
027d819c 1746sub _common_in_path {
22222af9 1747 my( $self, $r1, $r2, $dir ) = @_;
1748 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1749 $iter = $self->end->rank - $iter if $dir eq 'successors';
1750 my @candidates;
1751 my @last_checked = ( $r1, $r2 );
1752 my %all_seen;
1753 while( !@candidates ) {
1754 my @new_lc;
1755 foreach my $lc ( @last_checked ) {
1756 foreach my $p ( $lc->$dir ) {
1757 if( $all_seen{$p->id} ) {
1758 push( @candidates, $p );
1759 } else {
1760 $all_seen{$p->id} = 1;
1761 push( @new_lc, $p );
1762 }
1763 }
1764 }
1765 @last_checked = @new_lc;
1766 }
1767 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1768 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1769}
1770
63778331 1771sub throw {
1772 Text::Tradition::Error->throw(
1773 'ident' => 'Collation error',
1774 'message' => $_[0],
1775 );
1776}
1777
dd3b58b0 1778no Moose;
1779__PACKAGE__->meta->make_immutable;
e867486f 1780
027d819c 1781=head1 LICENSE
e867486f 1782
027d819c 1783This package is free software and is provided "as is" without express
1784or implied warranty. You can redistribute it and/or modify it under
1785the same terms as Perl itself.
e867486f 1786
027d819c 1787=head1 AUTHOR
e867486f 1788
027d819c 1789Tara L Andrews E<lt>aurum@cpan.orgE<gt>