clean up script dir a little
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
287 $reading = Text::Tradition::Collation::Reading->new(
288 'collation' => $self,
289 %args );
290 }
291 # First check to see if a reading with this ID exists.
292 if( $self->reading( $reading->id ) ) {
63778331 293 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 294 }
c1915ab9 295 $self->_graphcalc_done(0);
3a2ebbf4 296 $self->_add_reading( $reading->id => $reading );
297 # Once the reading has been added, put it in both graphs.
298 $self->sequence->add_vertex( $reading->id );
22222af9 299 $self->relations->add_reading( $reading->id );
3a2ebbf4 300 return $reading;
eca16057 301};
302
3a2ebbf4 303around del_reading => sub {
304 my $orig = shift;
305 my $self = shift;
306 my $arg = shift;
307
308 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
309 $arg = $arg->id;
310 }
3a2ebbf4 311 # Remove the reading from the graphs.
c1915ab9 312 $self->_graphcalc_done(0);
4e483aa5 313 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 314 $self->sequence->delete_vertex( $arg );
22222af9 315 $self->relations->delete_reading( $arg );
3a2ebbf4 316
317 # Carry on.
318 $self->$orig( $arg );
319};
7854e12e 320
4e483aa5 321=begin testing
322
323use Text::Tradition;
324
325my $cxfile = 't/data/Collatex-16.xml';
326my $t = Text::Tradition->new(
327 'name' => 'inline',
328 'input' => 'CollateX',
329 'file' => $cxfile,
330 );
331my $c = $t->collation;
332
333my $rno = scalar $c->readings;
334# Split n21 for testing purposes
335my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
336my $old_r = $c->reading( 'n21' );
337$old_r->alter_text( 'to' );
338$c->del_path( 'n20', 'n21', 'A' );
339$c->add_path( 'n20', 'n21p0', 'A' );
340$c->add_path( 'n21p0', 'n21', 'A' );
341$c->flatten_ranks();
342ok( $c->reading( 'n21p0' ), "New reading exists" );
343is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
344
679f17e1 345# Combine n3 and n4 ( with his )
4e483aa5 346$c->merge_readings( 'n3', 'n4', 1 );
347ok( !$c->reading('n4'), "Reading n4 is gone" );
348is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
349
679f17e1 350# Collapse n9 and n10 ( rood / root )
351$c->merge_readings( 'n9', 'n10' );
352ok( !$c->reading('n10'), "Reading n10 is gone" );
353is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 354
355# Combine n21 and n21p0
356my $remaining = $c->reading('n21');
357$remaining ||= $c->reading('n22'); # one of these should still exist
358$c->merge_readings( 'n21p0', $remaining, 1 );
359ok( !$c->reading('n21'), "Reading $remaining is gone" );
360is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
361
362=end testing
363
364=cut
7854e12e 365
3a2ebbf4 366sub merge_readings {
367 my $self = shift;
368
369 # We only need the IDs for adding paths to the graph, not the reading
370 # objects themselves.
4e483aa5 371 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 372 $self->_graphcalc_done(0);
3a2ebbf4 373
374 # The kept reading should inherit the paths and the relationships
375 # of the deleted reading.
376 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
377 my @vector = ( $kept );
378 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
379 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 380 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 381 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
382 $self->sequence->add_edge( @vector );
383 my $fwits = $self->sequence->get_edge_attributes( @vector );
384 @wits{keys %$fwits} = values %$fwits;
385 $self->sequence->set_edge_attributes( @vector, \%wits );
386 }
22222af9 387 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 388
389 # Do the deletion deed.
4e483aa5 390 if( $combine ) {
49d4f2ac 391 my $kept_obj = $self->reading( $kept );
4e483aa5 392 my $del_obj = $self->reading( $deleted );
393 my $joinstr = $combine_char;
394 unless( defined $joinstr ) {
395 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
396 $joinstr = $self->wordsep unless defined $joinstr;
397 }
398 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 399 }
3a2ebbf4 400 $self->del_reading( $deleted );
401}
7854e12e 402
3265b0ce 403
3a2ebbf4 404# Helper function for manipulating the graph.
405sub _stringify_args {
4e483aa5 406 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 407 $first = $first->id
408 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
409 $second = $second->id
410 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 411 return( $first, $second, @args );
3a2ebbf4 412}
df6d9812 413
4e5a7b2c 414# Helper function for manipulating the graph.
415sub _objectify_args {
416 my( $self, $first, $second, $arg ) = @_;
417 $first = $self->reading( $first )
418 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
419 $second = $self->reading( $second )
420 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
421 return( $first, $second, $arg );
422}
3a2ebbf4 423### Path logic
424
425sub add_path {
426 my $self = shift;
427
428 # We only need the IDs for adding paths to the graph, not the reading
429 # objects themselves.
430 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
431
c1915ab9 432 $self->_graphcalc_done(0);
3a2ebbf4 433 # Connect the readings
359944f7 434 unless( $self->sequence->has_edge( $source, $target ) ) {
435 $self->sequence->add_edge( $source, $target );
436 $self->relations->add_equivalence_edge( $source, $target );
437 }
3a2ebbf4 438 # Note the witness in question
439 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 440}
b15511bf 441
3a2ebbf4 442sub del_path {
443 my $self = shift;
49d4f2ac 444 my @args;
445 if( ref( $_[0] ) eq 'ARRAY' ) {
446 my $e = shift @_;
447 @args = ( @$e, @_ );
448 } else {
449 @args = @_;
450 }
3a2ebbf4 451
452 # We only need the IDs for adding paths to the graph, not the reading
453 # objects themselves.
49d4f2ac 454 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 455
c1915ab9 456 $self->_graphcalc_done(0);
3a2ebbf4 457 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 458 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 459 }
460 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
461 $self->sequence->delete_edge( $source, $target );
359944f7 462 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 463 }
784877d9 464}
465
3a2ebbf4 466
15d2d3df 467# Extra graph-alike utility
468sub has_path {
3a2ebbf4 469 my $self = shift;
470 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
471 return undef unless $self->sequence->has_edge( $source, $target );
472 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 473}
474
4e5a7b2c 475=head2 clear_witness( @sigil_list )
3a2ebbf4 476
4e5a7b2c 477Clear the given witnesses out of the collation entirely, removing references
478to them in paths, and removing readings that belong only to them. Should only
479be called via $tradition->del_witness.
3a2ebbf4 480
481=cut
482
4e5a7b2c 483sub clear_witness {
484 my( $self, @sigils ) = @_;
485
c1915ab9 486 $self->_graphcalc_done(0);
4e5a7b2c 487 # Clear the witness(es) out of the paths
488 foreach my $e ( $self->paths ) {
489 foreach my $sig ( @sigils ) {
490 $self->del_path( $e, $sig );
491 }
492 }
493
494 # Clear out the newly unused readings
495 foreach my $r ( $self->readings ) {
496 unless( $self->reading_witnesses( $r ) ) {
497 $self->del_reading( $r );
498 }
499 }
500}
3a2ebbf4 501
502sub add_relationship {
503 my $self = shift;
22222af9 504 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 505 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 506 $self->_graphcalc_done(0);
63778331 507 return @vectors;
22222af9 508}
ef9d481f 509
ca6e6095 510around qw/ get_relationship del_relationship / => sub {
511 my $orig = shift;
512 my $self = shift;
513 my @args = @_;
514 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
515 @args = @{$_[0]};
516 }
517 my( $source, $target ) = $self->_stringify_args( @args );
518 $self->$orig( $source, $target );
519};
520
22222af9 521=head2 reading_witnesses( $reading )
910a0a6d 522
22222af9 523Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 524
22222af9 525=cut
1d310495 526
1d310495 527sub reading_witnesses {
528 my( $self, $reading ) = @_;
529 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 530 # arbitrarily chosen "incoming". Thus, special-case the start node.
531 if( $reading eq $self->start ) {
532 return map { $_->sigil } $self->tradition->witnesses;
533 }
1d310495 534 my %all_witnesses;
535 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
536 my $wits = $self->sequence->get_edge_attributes( @$e );
537 @all_witnesses{ keys %$wits } = 1;
538 }
c12bb878 539 my $acstr = $self->ac_label;
540 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
541 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
542 }
1d310495 543 return keys %all_witnesses;
910a0a6d 544}
545
4e5a7b2c 546=head1 OUTPUT METHODS
8e1394aa 547
0ecb975c 548=head2 as_svg( \%options )
8e1394aa 549
0068967c 550Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 551See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 552
553=cut
554
555sub as_svg {
0ecb975c 556 my( $self, $opts ) = @_;
bfcbcecb 557 throw( "Need GraphViz installed to output SVG" )
558 unless File::Which::which( 'dot' );
e247aad1 559 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 560 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 561 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 562 my @cmd = qw/dot -Tsvg/;
563 my( $svg, $err );
564 my $dotfile = File::Temp->new();
565 ## USE FOR DEBUGGING
566 # $dotfile->unlink_on_destroy(0);
567 binmode $dotfile, ':utf8';
568 print $dotfile $self->as_dot( $opts );
569 push( @cmd, $dotfile->filename );
570 run( \@cmd, ">", binary(), \$svg );
e247aad1 571 $svg = decode_utf8( $svg );
572 $self->cached_svg( $svg ) unless $want_subgraph;
573 return $svg;
574 } else {
575 return $self->cached_svg;
b365fbae 576 }
8e1394aa 577}
578
b22576c6 579
0ecb975c 580=head2 as_dot( \%options )
b22576c6 581
0ecb975c 582Returns a string that is the collation graph expressed in dot
583(i.e. GraphViz) format. Options include:
b22576c6 584
0ecb975c 585=over 4
b22576c6 586
0ecb975c 587=item * from
b22576c6 588
0ecb975c 589=item * to
df6d9812 590
0ecb975c 591=item * color_common
592
593=back
df6d9812 594
595=cut
596
597sub as_dot {
0ecb975c 598 my( $self, $opts ) = @_;
599 my $startrank = $opts->{'from'} if $opts;
600 my $endrank = $opts->{'to'} if $opts;
601 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 602 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
603 && $self->end->rank > 100;
6648ee3d 604 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 605
b22576c6 606 # Check the arguments
607 if( $startrank ) {
608 return if $endrank && $startrank > $endrank;
609 return if $startrank > $self->end->rank;
610 }
611 if( defined $endrank ) {
612 return if $endrank < 0;
f1b3b33a 613 $endrank = undef if $endrank == $self->end->rank;
b22576c6 614 }
615
67da8d6c 616 my $graph_name = $self->tradition->name;
617 $graph_name =~ s/[^\w\s]//g;
618 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 619
620 my %graph_attrs = (
621 'rankdir' => 'LR',
622 'bgcolor' => 'none',
623 );
624 my %node_attrs = (
b8990398 625 'fontsize' => 14,
f13b5582 626 'fillcolor' => 'white',
627 'style' => 'filled',
628 'shape' => 'ellipse'
629 );
630 my %edge_attrs = (
631 'arrowhead' => 'open',
632 'color' => '#000000',
633 'fontcolor' => '#000000',
634 );
635
67da8d6c 636 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 637 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
638 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 639
b22576c6 640 # Output substitute start/end readings if necessary
641 if( $startrank ) {
642 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
643 }
644 if( $endrank ) {
645 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
646 }
b365fbae 647 if( $STRAIGHTENHACK ) {
648 ## HACK part 1
6648ee3d 649 my $startlabel = $startrank ? 'SUBSTART' : 'START';
650 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 651 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
652 }
b22576c6 653 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 654 # Sort the readings by rank if we have ranks; this speeds layout.
655 my @all_readings = $self->end->has_rank
656 ? sort { $a->rank <=> $b->rank } $self->readings
657 : $self->readings;
4633f9e4 658 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 659 foreach my $reading ( @all_readings ) {
b22576c6 660 # Only output readings within our rank range.
661 next if $startrank && $reading->rank < $startrank;
662 next if $endrank && $reading->rank > $endrank;
663 $used{$reading->id} = 1;
910a0a6d 664 # Need not output nodes without separate labels
3a2ebbf4 665 next if $reading->id eq $reading->text;
d4b75f44 666 my $rattrs;
30f0df34 667 my $label = $reading->text;
629e27b0 668 $label .= '-' if $reading->join_next;
669 $label = "-$label" if $reading->join_prior;
8f9cab7b 670 $label =~ s/\"/\\\"/g;
d4b75f44 671 $rattrs->{'label'} = $label;
0ecb975c 672 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 673 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 674 }
3a2ebbf4 675
30ddc24c 676 # Add the real edges. Need to weight one edge per rank jump, in a
677 # continuous line.
b365fbae 678 # my $weighted = $self->_add_edge_weights;
b22576c6 679 my @edges = $self->paths;
3bdec618 680 my( %substart, %subend );
b22576c6 681 foreach my $edge ( @edges ) {
682 # Do we need to output this edge?
508fd430 683 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 684 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 685 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 686
b22576c6 687 # Account for the rank gap if necessary
30ddc24c 688 my $rank0 = $self->reading( $edge->[0] )->rank
689 if $self->reading( $edge->[0] )->has_rank;
690 my $rank1 = $self->reading( $edge->[1] )->rank
691 if $self->reading( $edge->[1] )->has_rank;
692 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
693 $variables->{'minlen'} = $rank1 - $rank0;
694 }
695
696 # Add the calculated edge weights
b365fbae 697 # if( exists $weighted->{$edge->[0]}
e247aad1 698 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
699 # # $variables->{'color'} = 'red';
700 # $variables->{'weight'} = 3.0;
701 # }
30ddc24c 702
508fd430 703 # EXPERIMENTAL: make edge width reflect no. of witnesses
704 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
705 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
706
f13b5582 707 my $varopts = _dot_attr_string( $variables );
708 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
709 $edge->[0], $edge->[1], $varopts );
3bdec618 710 } elsif( $used{$edge->[0]} ) {
711 $subend{$edge->[0]} = 1;
712 } elsif( $used{$edge->[1]} ) {
713 $substart{$edge->[1]} = 1;
b22576c6 714 }
df6d9812 715 }
3bdec618 716 # Add substitute start and end edges if necessary
717 foreach my $node ( keys %substart ) {
027d819c 718 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 719 my $variables = { %edge_attrs, 'label' => $witstr };
720 my $varopts = _dot_attr_string( $variables );
721 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 722 }
723 foreach my $node ( keys %subend ) {
027d819c 724 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 725 my $variables = { %edge_attrs, 'label' => $witstr };
726 my $varopts = _dot_attr_string( $variables );
727 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 728 }
b365fbae 729 # HACK part 2
730 if( $STRAIGHTENHACK ) {
6648ee3d 731 my $endlabel = $endrank ? 'SUBEND' : 'END';
732 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 733 }
30ddc24c 734
df6d9812 735 $dot .= "}\n";
736 return $dot;
737}
738
f13b5582 739sub _dot_attr_string {
740 my( $hash ) = @_;
741 my @attrs;
742 foreach my $k ( sort keys %$hash ) {
743 my $v = $hash->{$k};
744 push( @attrs, $k.'="'.$v.'"' );
745 }
746 return( '[ ' . join( ', ', @attrs ) . ' ]' );
747}
748
30ddc24c 749sub _add_edge_weights {
750 my $self = shift;
751 # Walk the graph from START to END, choosing the successor node with
752 # the largest number of witness paths each time.
753 my $weighted = {};
754 my $curr = $self->start->id;
008fc8a6 755 my $ranked = $self->end->has_rank;
30ddc24c 756 while( $curr ne $self->end->id ) {
008fc8a6 757 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 758 my @succ = sort { $self->path_witnesses( $curr, $a )
759 <=> $self->path_witnesses( $curr, $b ) }
760 $self->sequence->successors( $curr );
761 my $next = pop @succ;
008fc8a6 762 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 763 # Try to avoid lacunae in the weighted path.
008fc8a6 764 while( @succ &&
765 ( $self->reading( $next )->is_lacuna ||
766 $nextrank - $rank > 1 ) ){
30ddc24c 767 $next = pop @succ;
768 }
769 $weighted->{$curr} = $next;
770 $curr = $next;
771 }
772 return $weighted;
773}
774
027d819c 775=head2 path_witnesses( $edge )
776
777Returns the list of sigils whose witnesses are associated with the given edge.
778The edge can be passed as either an array or an arrayref of ( $source, $target ).
779
780=cut
781
3a2ebbf4 782sub path_witnesses {
783 my( $self, @edge ) = @_;
784 # If edge is an arrayref, cope.
785 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
786 my $e = shift @edge;
787 @edge = @$e;
788 }
789 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 790 return @wits;
3a2ebbf4 791}
792
7f9f05e8 793# Helper function. Make a display label for the given witnesses, showing a.c.
794# witnesses only where the main witness is not also in the list.
027d819c 795sub _path_display_label {
508fd430 796 my $self = shift;
7f9f05e8 797 my %wits;
798 map { $wits{$_} = 1 } @_;
799
800 # If an a.c. wit is listed, remove it if the main wit is also listed.
801 # Otherwise keep it for explicit listing.
802 my $aclabel = $self->ac_label;
803 my @disp_ac;
804 foreach my $w ( sort keys %wits ) {
805 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
806 if( exists $wits{$1} ) {
807 delete $wits{$w};
808 } else {
809 push( @disp_ac, $w );
810 }
811 }
812 }
813
814 # See if we are in a majority situation.
8f9cab7b 815 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 816 if( scalar keys %wits > $maj ) {
817 unshift( @disp_ac, 'majority' );
818 return join( ', ', @disp_ac );
8f9cab7b 819 } else {
7f9f05e8 820 return join( ', ', sort keys %wits );
8f9cab7b 821 }
822}
1dd07bda 823
bf6e338d 824=head2 readings_at_rank( $rank )
1dd07bda 825
bf6e338d 826Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 827
828=cut
829
bf6e338d 830sub readings_at_rank {
1dd07bda 831 my( $self, $rank ) = @_;
bf6e338d 832 my $table = $self->alignment_table;
833 # Table rank is real rank - 1.
834 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
835 my %readings;
836 foreach my $e ( @elements ) {
837 next unless ref( $e ) eq 'HASH';
838 next unless exists $e->{'t'};
839 $readings{$e->{'t'}->id} = $e->{'t'};
840 }
841 return values %readings;
1dd07bda 842}
8f9cab7b 843
4e5a7b2c 844=head2 as_graphml
8e1394aa 845
4e5a7b2c 846Returns a GraphML representation of the collation. The GraphML will contain
847two graphs. The first expresses the attributes of the readings and the witness
848paths that link them; the second expresses the relationships that link the
849readings. This is the native transfer format for a tradition.
8e1394aa 850
56eefa04 851=begin testing
852
853use Text::Tradition;
854
855my $READINGS = 311;
856my $PATHS = 361;
857
858my $datafile = 't/data/florilegium_tei_ps.xml';
859my $tradition = Text::Tradition->new( 'input' => 'TEI',
860 'name' => 'test0',
861 'file' => $datafile,
862 'linear' => 1 );
863
864ok( $tradition, "Got a tradition object" );
865is( scalar $tradition->witnesses, 13, "Found all witnesses" );
866ok( $tradition->collation, "Tradition has a collation" );
867
868my $c = $tradition->collation;
869is( scalar $c->readings, $READINGS, "Collation has all readings" );
870is( scalar $c->paths, $PATHS, "Collation has all paths" );
871is( scalar $c->relationships, 0, "Collation has all relationships" );
872
873# Add a few relationships
874$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
875$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
876$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
877
878# Now write it to GraphML and parse it again.
879
880my $graphml = $c->as_graphml;
881my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
882is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
883is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
884is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
885
886=end testing
887
8e1394aa 888=cut
889
890sub as_graphml {
a30ca502 891 my( $self, $options ) = @_;
3d14b48e 892 $self->calculate_ranks unless $self->_graphcalc_done;
893
a30ca502 894 my $start = $options->{'from'}
895 ? $self->reading( $options->{'from'} ) : $self->start;
896 my $end = $options->{'to'}
897 ? $self->reading( $options->{'to'} ) : $self->end;
898 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
899 throw( 'Start node must be before end node' );
900 }
901 # The readings need to be ranked for this to work.
902 $start = $self->start unless $start->has_rank;
903 $end = $self->end unless $end->has_rank;
414cc046 904 my $rankoffset = 0;
905 unless( $start eq $self->start ) {
906 $rankoffset = $start->rank - 1;
907 }
a30ca502 908 my %use_readings;
909
8e1394aa 910 # Some namespaces
911 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
912 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
913 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 914 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 915
916 # Create the document and root node
917 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
918 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
919 $graphml->setDocumentElement( $root );
920 $root->setNamespace( $xsi_ns, 'xsi', 0 );
921 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 922
923 # List of attribute types to save on our objects and their corresponding
924 # GraphML types
925 my %save_types = (
926 'Str' => 'string',
927 'Int' => 'int',
928 'Bool' => 'boolean',
929 'RelationshipType' => 'string',
930 'RelationshipScope' => 'string',
931 );
932
933 # List of attribute names *not* to save on our objects.
934 # We will also not save any attribute beginning with _.
935 my %skipsave;
936 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 937
bbd064a9 938 # Add the data keys for the graph. Include an extra key 'version' for the
939 # GraphML output version.
e309421a 940 my %graph_data_keys;
941 my $gdi = 0;
bbd064a9 942 my %graph_attributes = ( 'version' => 'string' );
943 # Graph attributes include those of Tradition and those of Collation.
944 my %gattr_from;
945 my $tmeta = $self->tradition->meta;
946 my $cmeta = $self->meta;
947 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
948 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
949 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
950 next if $attr->name =~ /^_/;
951 next if $skipsave{$attr->name};
952 next unless $save_types{$attr->type_constraint->name};
953 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
954 }
955
956 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 957 $graph_data_keys{$datum} = 'dg'.$gdi++;
958 my $key = $root->addNewChild( $graphml_ns, 'key' );
959 $key->setAttribute( 'attr.name', $datum );
bbd064a9 960 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 961 $key->setAttribute( 'for', 'graph' );
962 $key->setAttribute( 'id', $graph_data_keys{$datum} );
963 }
f6066bac 964
bbd064a9 965 # Add the data keys for reading nodes
966 my %reading_attributes;
967 my $rmeta = Text::Tradition::Collation::Reading->meta;
968 foreach my $attr( $rmeta->get_all_attributes ) {
969 next if $attr->name =~ /^_/;
970 next if $skipsave{$attr->name};
971 next unless $save_types{$attr->type_constraint->name};
972 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
973 }
ef9d481f 974 my %node_data_keys;
975 my $ndi = 0;
bbd064a9 976 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 977 $node_data_keys{$datum} = 'dn'.$ndi++;
978 my $key = $root->addNewChild( $graphml_ns, 'key' );
979 $key->setAttribute( 'attr.name', $datum );
bbd064a9 980 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 981 $key->setAttribute( 'for', 'node' );
982 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 983 }
984
bbd064a9 985 # Add the data keys for edges, that is, paths and relationships. Path
986 # data does not come from a Moose class so is here manually.
ef9d481f 987 my $edi = 0;
988 my %edge_data_keys;
bbd064a9 989 my %edge_attributes = (
3a2ebbf4 990 witness => 'string', # ID/label for a path
3a2ebbf4 991 extra => 'boolean', # Path key
3a2ebbf4 992 );
bbd064a9 993 my @path_attributes = keys %edge_attributes; # track our manual additions
994 my $pmeta = Text::Tradition::Collation::Relationship->meta;
995 foreach my $attr( $pmeta->get_all_attributes ) {
996 next if $attr->name =~ /^_/;
997 next if $skipsave{$attr->name};
998 next unless $save_types{$attr->type_constraint->name};
999 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1000 }
1001 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1002 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1003 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1004 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1005 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1006 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1007 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1008 }
3a2ebbf4 1009
cc31ebaa 1010 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1011 my $xmlidname = $self->tradition->name;
1012 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1013 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1014 $xmlidname = '_'.$xmlidname;
1015 }
2c669bca 1016 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1017 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1018 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1019 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1020 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1021 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1022 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1023 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1024
e309421a 1025 # Collation attribute data
bbd064a9 1026 foreach my $datum ( keys %graph_attributes ) {
1027 my $value;
1028 if( $datum eq 'version' ) {
1029 $value = '3.1';
1030 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1031 $value = $self->tradition->$datum;
1032 } else {
1033 $value = $self->$datum;
1034 }
2c669bca 1035 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1036 }
8e1394aa 1037
1038 my $node_ctr = 0;
1039 my %node_hash;
22222af9 1040 # Add our readings to the graph
3a2ebbf4 1041 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1042 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1043 ( $n->rank < $start->rank || $n->rank > $end->rank );
1044 $use_readings{$n->id} = 1;
2c669bca 1045 # Add to the main graph
1046 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1047 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1048 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1049 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1050 foreach my $d ( keys %reading_attributes ) {
255875b8 1051 my $nval = $n->$d;
cc31ebaa 1052 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1053 # Adjust the ranks within the subgraph.
cc31ebaa 1054 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1055 : $nval - $rankoffset;
414cc046 1056 }
255875b8 1057 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1058 if defined $nval;
1059 }
b15511bf 1060 }
1061
2c669bca 1062 # Add the path edges to the sequence graph
df6d9812 1063 my $edge_ctr = 0;
3a2ebbf4 1064 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1065 # We add an edge in the graphml for every witness in $e.
a30ca502 1066 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1067 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1068 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1069 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1070 # Skip any path from start to end; that witness is not in the subgraph.
1071 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1072 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1073 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1074 $node_hash{ $e->[0] },
1075 $node_hash{ $e->[1] } );
2c669bca 1076 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1077 $edge_el->setAttribute( 'source', $from );
1078 $edge_el->setAttribute( 'target', $to );
1079 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1080
1081 # It's a witness path, so add the witness
1082 my $base = $wit;
1083 my $key = $edge_data_keys{'witness'};
1084 # Is this an ante-corr witness?
1085 my $aclabel = $self->ac_label;
1086 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1087 # Keep the base witness
1088 $base = $1;
1089 # ...and record that this is an 'extra' reading path
1090 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1091 }
1092 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1093 }
1094 }
1095
cc31ebaa 1096 # Report the actual number of nodes and edges that went in
1097 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1098 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1099
22222af9 1100 # Add the relationship graph to the XML
bbd064a9 1101 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1102 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1103 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1104
94c00c71 1105 # Save and return the thing
1106 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1107 return $result;
df6d9812 1108}
1109
b15511bf 1110sub _add_graphml_data {
1111 my( $el, $key, $value ) = @_;
b15511bf 1112 return unless defined $value;
c9bf3dbf 1113 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1114 $data_el->setAttribute( 'key', $key );
1115 $data_el->appendText( $value );
8e1394aa 1116}
1117
4e5a7b2c 1118=head2 as_csv
910a0a6d 1119
1120Returns a CSV alignment table representation of the collation graph, one
2c669bca 1121row per witness (or witness uncorrected.)
910a0a6d 1122
1123=cut
1124
1125sub as_csv {
3a2ebbf4 1126 my( $self ) = @_;
1dd07bda 1127 my $table = $self->alignment_table;
82fa4d57 1128 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1129 my @result;
2c669bca 1130 # Make the header row
1131 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1132 push( @result, decode_utf8( $csv->string ) );
1133 # Make the rest of the rows
1134 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1135 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1136 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1137 $csv->combine( @row );
910a0a6d 1138 push( @result, decode_utf8( $csv->string ) );
1139 }
3a2ebbf4 1140 return join( "\n", @result );
910a0a6d 1141}
1142
1dd07bda 1143=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1144
566f4595 1145Return a reference to an alignment table, in a slightly enhanced CollateX
1146format which looks like this:
1147
1148 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1149 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1150 { witness => "SIG2",
4e5a7b2c 1151 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1152 ... ],
1153 length => TEXTLEN };
1154
1155If $use_refs is set to 1, the reading object is returned in the table
1156instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1157
1158If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1159keys have a true hash value will be included.
2c669bca 1160
1161=cut
9f3ba6f7 1162
1dd07bda 1163sub alignment_table {
1164 my( $self ) = @_;
c1915ab9 1165 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1166 return $self->cached_table if $self->has_cached_table;
1167
0ecb975c 1168 # Make sure we can do this
1169 throw( "Need a linear graph in order to make an alignment table" )
1170 unless $self->linear;
1171 $self->calculate_ranks unless $self->end->has_rank;
1172
2c669bca 1173 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1174 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1175 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1176 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1177 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1178 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1179 push( @{$table->{'alignment'}},
1180 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1181 if( $wit->is_layered ) {
1182 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1183 $wit->sigil.$self->ac_label );
1dd07bda 1184 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1185 push( @{$table->{'alignment'}},
1186 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1187 }
1188 }
1dd07bda 1189 $self->cached_table( $table );
1190 return $table;
910a0a6d 1191}
1192
1193sub _make_witness_row {
1dd07bda 1194 my( $path, $positions ) = @_;
910a0a6d 1195 my %char_hash;
1196 map { $char_hash{$_} = undef } @$positions;
2c669bca 1197 my $debug = 0;
910a0a6d 1198 foreach my $rdg ( @$path ) {
eca16057 1199 my $rtext = $rdg->text;
1200 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1201 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1202 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1203 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1204 }
1205 my @row = map { $char_hash{$_} } @$positions;
eca16057 1206 # Fill in lacuna markers for undef spots in the row
1207 my $last_el = shift @row;
1208 my @filled_row = ( $last_el );
1209 foreach my $el ( @row ) {
0e476982 1210 # If we are using node reference, make the lacuna node appear many times
1211 # in the table. If not, use the lacuna tag.
1dd07bda 1212 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1213 $el = $last_el;
eca16057 1214 }
1215 push( @filled_row, $el );
1216 $last_el = $el;
1217 }
1218 return @filled_row;
910a0a6d 1219}
1220
4e5a7b2c 1221=head1 NAVIGATION METHODS
910a0a6d 1222
4e5a7b2c 1223=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1224
1225Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1226with $last, for the witness given in $sigil. If a $backup sigil is
1227specified (e.g. when walking a layered witness), it will be used wherever
1228no $sigil path exists. If there is a base text reading, that will be
1229used wherever no path exists for $sigil or $backup.
e2902068 1230
1231=cut
1232
910a0a6d 1233# TODO Think about returning some lazy-eval iterator.
b0b4421a 1234# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1235
e2902068 1236sub reading_sequence {
861c3e27 1237 my( $self, $start, $end, $witness ) = @_;
e2902068 1238
930ff666 1239 $witness = $self->baselabel unless $witness;
e2902068 1240 my @readings = ( $start );
1241 my %seen;
1242 my $n = $start;
3a2ebbf4 1243 while( $n && $n->id ne $end->id ) {
1244 if( exists( $seen{$n->id} ) ) {
63778331 1245 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1246 }
3a2ebbf4 1247 $seen{$n->id} = 1;
910a0a6d 1248
861c3e27 1249 my $next = $self->next_reading( $n, $witness );
44771cf2 1250 unless( $next ) {
63778331 1251 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1252 }
910a0a6d 1253 push( @readings, $next );
1254 $n = $next;
e2902068 1255 }
1256 # Check that the last reading is our end reading.
1257 my $last = $readings[$#readings];
63778331 1258 throw( "Last reading found from " . $start->text .
1259 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1260 unless $last->id eq $end->id;
e2902068 1261
1262 return @readings;
1263}
1264
4e5a7b2c 1265=head2 next_reading( $reading, $sigil );
8e1394aa 1266
4a8828f0 1267Returns the reading that follows the given reading along the given witness
930ff666 1268path.
8e1394aa 1269
1270=cut
1271
4a8828f0 1272sub next_reading {
e2902068 1273 # Return the successor via the corresponding path.
8e1394aa 1274 my $self = shift;
3a2ebbf4 1275 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1276 return undef unless $answer;
3a2ebbf4 1277 return $self->reading( $answer );
8e1394aa 1278}
1279
4e5a7b2c 1280=head2 prior_reading( $reading, $sigil )
8e1394aa 1281
4a8828f0 1282Returns the reading that precedes the given reading along the given witness
930ff666 1283path.
8e1394aa 1284
1285=cut
1286
4a8828f0 1287sub prior_reading {
e2902068 1288 # Return the predecessor via the corresponding path.
8e1394aa 1289 my $self = shift;
3a2ebbf4 1290 my $answer = $self->_find_linked_reading( 'prior', @_ );
1291 return $self->reading( $answer );
8e1394aa 1292}
1293
4a8828f0 1294sub _find_linked_reading {
861c3e27 1295 my( $self, $direction, $node, $path ) = @_;
1296
1297 # Get a backup if we are dealing with a layered witness
1298 my $alt_path;
1299 my $aclabel = $self->ac_label;
1300 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1301 $alt_path = $1;
1302 }
1303
e2902068 1304 my @linked_paths = $direction eq 'next'
3a2ebbf4 1305 ? $self->sequence->edges_from( $node )
1306 : $self->sequence->edges_to( $node );
e2902068 1307 return undef unless scalar( @linked_paths );
8e1394aa 1308
e2902068 1309 # We have to find the linked path that contains all of the
1310 # witnesses supplied in $path.
1311 my( @path_wits, @alt_path_wits );
4e5a7b2c 1312 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1313 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1314 my $base_le;
1315 my $alt_le;
1316 foreach my $le ( @linked_paths ) {
3a2ebbf4 1317 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1318 $base_le = $le;
910a0a6d 1319 }
508fd430 1320 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1321 if( _is_within( \@path_wits, \@le_wits ) ) {
1322 # This is the right path.
1323 return $direction eq 'next' ? $le->[1] : $le->[0];
1324 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1325 $alt_le = $le;
1326 }
8e1394aa 1327 }
e2902068 1328 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1329 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1330 if $alt_le;
e2902068 1331
1332 # Got this far? Return the base path if it exists.
3a2ebbf4 1333 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1334 if $base_le;
e2902068 1335
1336 # Got this far? We have no appropriate path.
2c669bca 1337 warn "Could not find $direction node from " . $node->id
910a0a6d 1338 . " along path $path";
8e1394aa 1339 return undef;
1340}
1341
4a8828f0 1342# Some set logic.
1343sub _is_within {
1344 my( $set1, $set2 ) = @_;
7854e12e 1345 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1346 foreach my $el ( @$set1 ) {
910a0a6d 1347 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1348 }
1349 return $ret;
1350}
1351
4e5a7b2c 1352# Return the string that joins together a list of witnesses for
1353# display on a single path.
1354sub _witnesses_of_label {
1355 my( $self, $label ) = @_;
1356 my $regex = $self->wit_list_separator;
1357 my @answer = split( /\Q$regex\E/, $label );
1358 return @answer;
b0b4421a 1359}
1360
d4b75f44 1361=head2 common_readings
1362
1363Returns the list of common readings in the graph (i.e. those readings that are
1364shared by all non-lacunose witnesses.)
1365
1366=cut
1367
1368sub common_readings {
1369 my $self = shift;
1370 my @common = grep { $_->is_common } $self->readings;
1371 return @common;
1372}
1373
fae52efd 1374=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1375
1376Returns the text of a witness (plus its backup, if we are using a layer)
1377as stored in the collation. The text is returned as a string, where the
1378individual readings are joined with spaces and the meta-readings (e.g.
1379lacunae) are omitted. Optional specification of $start and $end allows
1380the generation of a subset of the witness text.
4e5a7b2c 1381
b0b4421a 1382=cut
1383
1384sub path_text {
861c3e27 1385 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1386 $start = $self->start unless $start;
1387 $end = $self->end unless $end;
861c3e27 1388 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
91a190c3 1389 return $self->_text_from_path( @path );
d3e7842a 1390}
1391
1392# Utility function so that we can cheat and use it when we need both the path
1393# and its text.
1394sub _text_from_path {
1395 my( $self, @path ) = @_;
629e27b0 1396 my $pathtext = '';
1397 my $last;
1398 foreach my $r ( @path ) {
1399 if( $r->join_prior || !$last || $last->join_next ) {
1400 $pathtext .= $r->text;
1401 } else {
1402 $pathtext .= ' ' . $r->text;
1403 }
1404 $last = $r;
1405 }
1406 return $pathtext;
b0b4421a 1407}
4e5a7b2c 1408
1409=head1 INITIALIZATION METHODS
1410
1411These are mostly for use by parsers.
1412
1413=head2 make_witness_path( $witness )
1414
1415Link the array of readings contained in $witness->path (and in
1416$witness->uncorrected_path if it exists) into collation paths.
1417Clear out the arrays when finished.
de51424a 1418
4e5a7b2c 1419=head2 make_witness_paths
1420
1421Call make_witness_path for all witnesses in the tradition.
1422
1423=cut
930ff666 1424
7e450e44 1425# For use when a collation is constructed from a base text and an apparatus.
1426# We have the sequences of readings and just need to add path edges.
1f7aa795 1427# When we are done, clear out the witness path attributes, as they are no
1428# longer needed.
1429# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1430
6a222840 1431sub make_witness_paths {
1432 my( $self ) = @_;
910a0a6d 1433 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1434 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1435 $self->make_witness_path( $wit );
7854e12e 1436 }
7854e12e 1437}
1438
6a222840 1439sub make_witness_path {
7854e12e 1440 my( $self, $wit ) = @_;
1441 my @chain = @{$wit->path};
15d2d3df 1442 my $sig = $wit->sigil;
fae52efd 1443 # Add start and end if necessary
1444 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1445 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1446 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1447 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1448 }
1f7aa795 1449 if( $wit->is_layered ) {
d9e873d0 1450 @chain = @{$wit->uncorrected_path};
fae52efd 1451 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1452 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1453 foreach my $idx( 0 .. $#chain-1 ) {
1454 my $source = $chain[$idx];
1455 my $target = $chain[$idx+1];
1456 $self->add_path( $source, $target, $sig.$self->ac_label )
1457 unless $self->has_path( $source, $target, $sig );
1458 }
15d2d3df 1459 }
1f7aa795 1460 $wit->clear_path;
1461 $wit->clear_uncorrected_path;
e2902068 1462}
1463
4e5a7b2c 1464=head2 calculate_ranks
1465
1466Calculate the reading ranks (that is, their aligned positions relative
1467to each other) for the graph. This can only be called on linear collations.
1468
b365fbae 1469=begin testing
1470
1471use Text::Tradition;
1472
1473my $cxfile = 't/data/Collatex-16.xml';
1474my $t = Text::Tradition->new(
1475 'name' => 'inline',
1476 'input' => 'CollateX',
1477 'file' => $cxfile,
1478 );
1479my $c = $t->collation;
1480
1481# Make an svg
bfcbcecb 1482my $table = $c->alignment_table;
1483ok( $c->has_cached_table, "Alignment table was cached" );
1484is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1485$c->calculate_ranks;
bfcbcecb 1486is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1487$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1488isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1489
1490=end testing
1491
4e5a7b2c 1492=cut
1493
910a0a6d 1494sub calculate_ranks {
1495 my $self = shift;
b365fbae 1496 # Save the existing ranks, in case we need to invalidate the cached SVG.
1497 my %existing_ranks;
ac4d7ac2 1498 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1499
1500 # Do the rankings based on the relationship equivalence graph, starting
1501 # with the start node.
1502 my $topo_start = $self->equivalence( $self->start->id );
c9bf3dbf 1503 my $node_ranks = { $topo_start => 0 };
910a0a6d 1504 my @curr_origin = ( $topo_start );
1505 # A little iterative function.
1506 while( @curr_origin ) {
359944f7 1507 @curr_origin = _assign_rank( $self->equivalence_graph,
1508 $node_ranks, @curr_origin );
910a0a6d 1509 }
1510 # Transfer our rankings from the topological graph to the real one.
1511 foreach my $r ( $self->readings ) {
359944f7 1512 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1513 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1514 } else {
63778331 1515 # Die. Find the last rank we calculated.
359944f7 1516 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1517 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1518 $self->readings;
1519 my $last = pop @all_defined;
1520 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1521 }
de51424a 1522 }
bfcbcecb 1523 # Do we need to invalidate the cached data?
1524 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1525 foreach my $r ( $self->readings ) {
7c293912 1526 next if defined( $existing_ranks{$r} )
1527 && $existing_ranks{$r} == $r->rank;
c1915ab9 1528 # Something has changed, so clear the cache
bfcbcecb 1529 $self->_clear_cache;
c1915ab9 1530 # ...and recalculate the common readings.
1531 $self->calculate_common_readings();
b365fbae 1532 last;
1533 }
1534 }
c1915ab9 1535 # The graph calculation information is now up to date.
1536 $self->_graphcalc_done(1);
8e1394aa 1537}
3a1f2523 1538
910a0a6d 1539sub _assign_rank {
c9bf3dbf 1540 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1541 # Look at each of the children of @current_nodes. If all the child's
1542 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1543 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1544 # parent gets a rank.
910a0a6d 1545 my @next_nodes;
1546 foreach my $c ( @current_nodes ) {
c9bf3dbf 1547 warn "Current reading $c has no rank!"
1548 unless exists $node_ranks->{$c};
1549 # print STDERR "Looking at child of node $c, rank "
1550 # . $node_ranks->{$c} . "\n";
1551 foreach my $child ( $graph->successors( $c ) ) {
1552 next if exists $node_ranks->{$child};
910a0a6d 1553 my $highest_rank = -1;
1554 my $skip = 0;
c9bf3dbf 1555 foreach my $parent ( $graph->predecessors( $child ) ) {
1556 if( exists $node_ranks->{$parent} ) {
1557 $highest_rank = $node_ranks->{$parent}
1558 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1559 } else {
1560 $skip = 1;
1561 last;
1562 }
1563 }
1564 next if $skip;
c9bf3dbf 1565 my $c_rank = $highest_rank + 1;
1566 # print STDERR "Assigning rank $c_rank to node $child \n";
1567 $node_ranks->{$child} = $c_rank;
910a0a6d 1568 push( @next_nodes, $child );
1569 }
1570 }
1571 return @next_nodes;
4cdd82f1 1572}
910a0a6d 1573
c1915ab9 1574sub _clear_cache {
1575 my $self = shift;
1576 $self->wipe_svg if $self->has_cached_svg;
1577 $self->wipe_table if $self->has_cached_table;
1578}
1579
1580
4e5a7b2c 1581=head2 flatten_ranks
1582
1583A convenience method for parsing collation data. Searches the graph for readings
1584with the same text at the same rank, and merges any that are found.
1585
1586=cut
1587
0e476982 1588sub flatten_ranks {
1589 my $self = shift;
1590 my %unique_rank_rdg;
bf6e338d 1591 my $changed;
0e476982 1592 foreach my $rdg ( $self->readings ) {
1593 next unless $rdg->has_rank;
1594 my $key = $rdg->rank . "||" . $rdg->text;
1595 if( exists $unique_rank_rdg{$key} ) {
1596 # Combine!
56eefa04 1597 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1598 $changed = 1;
0e476982 1599 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1600 # TODO see if this now makes a common point.
0e476982 1601 } else {
1602 $unique_rank_rdg{$key} = $rdg;
1603 }
1604 }
bf6e338d 1605 # If we merged readings, the ranks are still fine but the alignment
1606 # table is wrong. Wipe it.
1607 $self->wipe_table() if $changed;
0e476982 1608}
4633f9e4 1609
1610
d4b75f44 1611=head2 calculate_common_readings
1612
1613Goes through the graph identifying the readings that appear in every witness
1614(apart from those with lacunae at that spot.) Marks them as common and returns
1615the list.
1616
1617=begin testing
1618
1619use Text::Tradition;
1620
1621my $cxfile = 't/data/Collatex-16.xml';
1622my $t = Text::Tradition->new(
1623 'name' => 'inline',
1624 'input' => 'CollateX',
1625 'file' => $cxfile,
1626 );
1627my $c = $t->collation;
1628
1629my @common = $c->calculate_common_readings();
1630is( scalar @common, 8, "Found correct number of common readings" );
1631my @marked = sort $c->common_readings();
1632is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1633my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1634is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1635
1636=end testing
1637
1638=cut
1639
1640sub calculate_common_readings {
1641 my $self = shift;
1642 my @common;
c1915ab9 1643 map { $_->is_common( 0 ) } $self->readings;
1644 # Implicitly calls calculate_ranks
1dd07bda 1645 my $table = $self->alignment_table;
d4b75f44 1646 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1647 my @row = map { $_->{'tokens'}->[$idx]
1648 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1649 @{$table->{'alignment'}};
d4b75f44 1650 my %hash;
1651 foreach my $r ( @row ) {
1652 if( $r ) {
1653 $hash{$r->id} = $r unless $r->is_meta;
1654 } else {
1655 $hash{'UNDEF'} = $r;
1656 }
1657 }
1658 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1659 my( $r ) = values %hash;
1660 $r->is_common( 1 );
1661 push( @common, $r );
1662 }
1663 }
1664 return @common;
1665}
1666
861c3e27 1667=head2 text_from_paths
1668
1669Calculate the text array for all witnesses from the path, for later consistency
1670checking. Only to be used if there is no non-graph-based way to know the
1671original texts.
1672
1673=cut
1674
1675sub text_from_paths {
1676 my $self = shift;
1677 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1678 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1679 my @text;
1680 foreach my $r ( @readings ) {
1681 next if $r->is_meta;
1682 push( @text, $r->text );
1683 }
861c3e27 1684 $wit->text( \@text );
1685 if( $wit->is_layered ) {
5164a6f0 1686 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1687 $wit->sigil.$self->ac_label );
1688 my @uctext;
1689 foreach my $r ( @ucrdgs ) {
1690 next if $r->is_meta;
1691 push( @uctext, $r->text );
1692 }
1693 $wit->layertext( \@uctext );
861c3e27 1694 }
1695 }
1696}
0e476982 1697
4e5a7b2c 1698=head1 UTILITY FUNCTIONS
1699
1700=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1701
4e5a7b2c 1702Find the last reading that occurs in sequence before both the given readings.
414cc046 1703At the very least this should be $self->start.
4e5a7b2c 1704
1705=head2 common_successor( $reading_a, $reading_b )
1706
1707Find the first reading that occurs in sequence after both the given readings.
414cc046 1708At the very least this should be $self->end.
4e5a7b2c 1709
22222af9 1710=begin testing
1711
1712use Text::Tradition;
1713
1714my $cxfile = 't/data/Collatex-16.xml';
1715my $t = Text::Tradition->new(
1716 'name' => 'inline',
1717 'input' => 'CollateX',
1718 'file' => $cxfile,
1719 );
1720my $c = $t->collation;
1721
679f17e1 1722is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1723 'n20', "Found correct common predecessor" );
679f17e1 1724is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1725 '#END#', "Found correct common successor" );
1726
4e5a7b2c 1727is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1728 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1729is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1730 '#END#', "Found correct common successor for readings on same path" );
1731
1732=end testing
1733
1734=cut
1735
1736## Return the closest reading that is a predecessor of both the given readings.
1737sub common_predecessor {
1738 my $self = shift;
4e5a7b2c 1739 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1740 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1741}
1742
1743sub common_successor {
1744 my $self = shift;
4e5a7b2c 1745 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1746 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1747}
1748
414cc046 1749
1750# TODO think about how to do this without ranks...
027d819c 1751sub _common_in_path {
22222af9 1752 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1753 my $iter = $self->end->rank;
22222af9 1754 my @candidates;
414cc046 1755 my @last_r1 = ( $r1 );
1756 my @last_r2 = ( $r2 );
1757 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1758 my %all_seen;
414cc046 1759 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1760 while( !@candidates ) {
414cc046 1761 last unless $iter--; # Avoid looping infinitely
1762 # Iterate separately down the graph from r1 and r2
1763 my( @new_lc1, @new_lc2 );
1764 foreach my $lc ( @last_r1 ) {
1765 foreach my $p ( $lc->$dir ) {
1766 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1767 # print STDERR "Path candidate $p from $lc\n";
1768 push( @candidates, $p );
002e3600 1769 } elsif( !$all_seen{$p->id} ) {
414cc046 1770 $all_seen{$p->id} = 'r1';
1771 push( @new_lc1, $p );
1772 }
1773 }
1774 }
1775 foreach my $lc ( @last_r2 ) {
22222af9 1776 foreach my $p ( $lc->$dir ) {
414cc046 1777 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1778 # print STDERR "Path candidate $p from $lc\n";
22222af9 1779 push( @candidates, $p );
002e3600 1780 } elsif( !$all_seen{$p->id} ) {
414cc046 1781 $all_seen{$p->id} = 'r2';
1782 push( @new_lc2, $p );
22222af9 1783 }
1784 }
1785 }
414cc046 1786 @last_r1 = @new_lc1;
1787 @last_r2 = @new_lc2;
22222af9 1788 }
1789 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1790 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1791}
1792
63778331 1793sub throw {
1794 Text::Tradition::Error->throw(
1795 'ident' => 'Collation error',
1796 'message' => $_[0],
1797 );
1798}
1799
dd3b58b0 1800no Moose;
1801__PACKAGE__->meta->make_immutable;
e867486f 1802
027d819c 1803=head1 LICENSE
e867486f 1804
027d819c 1805This package is free software and is provided "as is" without express
1806or implied warranty. You can redistribute it and/or modify it under
1807the same terms as Perl itself.
e867486f 1808
027d819c 1809=head1 AUTHOR
e867486f 1810
027d819c 1811Tara L Andrews E<lt>aurum@cpan.orgE<gt>