make French morphology use Lingua objects; add tests
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
22222af9 302 $self->relations->add_reading( $reading->id );
3a2ebbf4 303 return $reading;
eca16057 304};
305
3a2ebbf4 306around del_reading => sub {
307 my $orig = shift;
308 my $self = shift;
309 my $arg = shift;
310
311 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
312 $arg = $arg->id;
313 }
3a2ebbf4 314 # Remove the reading from the graphs.
c1915ab9 315 $self->_graphcalc_done(0);
4e483aa5 316 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 317 $self->sequence->delete_vertex( $arg );
22222af9 318 $self->relations->delete_reading( $arg );
3a2ebbf4 319
320 # Carry on.
321 $self->$orig( $arg );
322};
7854e12e 323
4e483aa5 324=begin testing
325
326use Text::Tradition;
327
328my $cxfile = 't/data/Collatex-16.xml';
329my $t = Text::Tradition->new(
330 'name' => 'inline',
331 'input' => 'CollateX',
332 'file' => $cxfile,
333 );
334my $c = $t->collation;
335
336my $rno = scalar $c->readings;
337# Split n21 for testing purposes
338my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
339my $old_r = $c->reading( 'n21' );
340$old_r->alter_text( 'to' );
341$c->del_path( 'n20', 'n21', 'A' );
342$c->add_path( 'n20', 'n21p0', 'A' );
343$c->add_path( 'n21p0', 'n21', 'A' );
344$c->flatten_ranks();
345ok( $c->reading( 'n21p0' ), "New reading exists" );
346is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
347
679f17e1 348# Combine n3 and n4 ( with his )
4e483aa5 349$c->merge_readings( 'n3', 'n4', 1 );
350ok( !$c->reading('n4'), "Reading n4 is gone" );
351is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
352
679f17e1 353# Collapse n9 and n10 ( rood / root )
354$c->merge_readings( 'n9', 'n10' );
355ok( !$c->reading('n10'), "Reading n10 is gone" );
356is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 357
358# Combine n21 and n21p0
359my $remaining = $c->reading('n21');
360$remaining ||= $c->reading('n22'); # one of these should still exist
361$c->merge_readings( 'n21p0', $remaining, 1 );
362ok( !$c->reading('n21'), "Reading $remaining is gone" );
363is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
364
365=end testing
366
367=cut
7854e12e 368
3a2ebbf4 369sub merge_readings {
370 my $self = shift;
371
372 # We only need the IDs for adding paths to the graph, not the reading
373 # objects themselves.
4e483aa5 374 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 375 $self->_graphcalc_done(0);
3a2ebbf4 376
377 # The kept reading should inherit the paths and the relationships
378 # of the deleted reading.
379 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
380 my @vector = ( $kept );
381 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
382 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 383 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 384 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
385 $self->sequence->add_edge( @vector );
386 my $fwits = $self->sequence->get_edge_attributes( @vector );
387 @wits{keys %$fwits} = values %$fwits;
388 $self->sequence->set_edge_attributes( @vector, \%wits );
389 }
22222af9 390 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 391
392 # Do the deletion deed.
4e483aa5 393 if( $combine ) {
49d4f2ac 394 my $kept_obj = $self->reading( $kept );
4e483aa5 395 my $del_obj = $self->reading( $deleted );
396 my $joinstr = $combine_char;
397 unless( defined $joinstr ) {
398 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
399 $joinstr = $self->wordsep unless defined $joinstr;
400 }
401 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 402 }
3a2ebbf4 403 $self->del_reading( $deleted );
404}
7854e12e 405
3265b0ce 406
3a2ebbf4 407# Helper function for manipulating the graph.
408sub _stringify_args {
4e483aa5 409 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 410 $first = $first->id
411 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
412 $second = $second->id
413 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 414 return( $first, $second, @args );
3a2ebbf4 415}
df6d9812 416
4e5a7b2c 417# Helper function for manipulating the graph.
418sub _objectify_args {
419 my( $self, $first, $second, $arg ) = @_;
420 $first = $self->reading( $first )
421 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
422 $second = $self->reading( $second )
423 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
424 return( $first, $second, $arg );
425}
3a2ebbf4 426### Path logic
427
428sub add_path {
429 my $self = shift;
430
431 # We only need the IDs for adding paths to the graph, not the reading
432 # objects themselves.
433 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
434
c1915ab9 435 $self->_graphcalc_done(0);
3a2ebbf4 436 # Connect the readings
359944f7 437 unless( $self->sequence->has_edge( $source, $target ) ) {
438 $self->sequence->add_edge( $source, $target );
439 $self->relations->add_equivalence_edge( $source, $target );
440 }
3a2ebbf4 441 # Note the witness in question
442 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 443}
b15511bf 444
3a2ebbf4 445sub del_path {
446 my $self = shift;
49d4f2ac 447 my @args;
448 if( ref( $_[0] ) eq 'ARRAY' ) {
449 my $e = shift @_;
450 @args = ( @$e, @_ );
451 } else {
452 @args = @_;
453 }
3a2ebbf4 454
455 # We only need the IDs for adding paths to the graph, not the reading
456 # objects themselves.
49d4f2ac 457 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 458
c1915ab9 459 $self->_graphcalc_done(0);
3a2ebbf4 460 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 461 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 462 }
463 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
464 $self->sequence->delete_edge( $source, $target );
359944f7 465 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 466 }
784877d9 467}
468
3a2ebbf4 469
15d2d3df 470# Extra graph-alike utility
471sub has_path {
3a2ebbf4 472 my $self = shift;
473 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
474 return undef unless $self->sequence->has_edge( $source, $target );
475 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 476}
477
4e5a7b2c 478=head2 clear_witness( @sigil_list )
3a2ebbf4 479
4e5a7b2c 480Clear the given witnesses out of the collation entirely, removing references
481to them in paths, and removing readings that belong only to them. Should only
482be called via $tradition->del_witness.
3a2ebbf4 483
484=cut
485
4e5a7b2c 486sub clear_witness {
487 my( $self, @sigils ) = @_;
488
c1915ab9 489 $self->_graphcalc_done(0);
4e5a7b2c 490 # Clear the witness(es) out of the paths
491 foreach my $e ( $self->paths ) {
492 foreach my $sig ( @sigils ) {
493 $self->del_path( $e, $sig );
494 }
495 }
496
497 # Clear out the newly unused readings
498 foreach my $r ( $self->readings ) {
499 unless( $self->reading_witnesses( $r ) ) {
500 $self->del_reading( $r );
501 }
502 }
503}
3a2ebbf4 504
505sub add_relationship {
506 my $self = shift;
22222af9 507 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 508 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 509 $self->_graphcalc_done(0);
63778331 510 return @vectors;
22222af9 511}
ef9d481f 512
ca6e6095 513around qw/ get_relationship del_relationship / => sub {
514 my $orig = shift;
515 my $self = shift;
516 my @args = @_;
517 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
518 @args = @{$_[0]};
519 }
520 my( $source, $target ) = $self->_stringify_args( @args );
521 $self->$orig( $source, $target );
522};
523
22222af9 524=head2 reading_witnesses( $reading )
910a0a6d 525
22222af9 526Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 527
22222af9 528=cut
1d310495 529
1d310495 530sub reading_witnesses {
531 my( $self, $reading ) = @_;
532 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 533 # arbitrarily chosen "incoming". Thus, special-case the start node.
534 if( $reading eq $self->start ) {
535 return map { $_->sigil } $self->tradition->witnesses;
536 }
1d310495 537 my %all_witnesses;
538 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
539 my $wits = $self->sequence->get_edge_attributes( @$e );
540 @all_witnesses{ keys %$wits } = 1;
541 }
c12bb878 542 my $acstr = $self->ac_label;
543 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
544 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
545 }
1d310495 546 return keys %all_witnesses;
910a0a6d 547}
548
4e5a7b2c 549=head1 OUTPUT METHODS
8e1394aa 550
0ecb975c 551=head2 as_svg( \%options )
8e1394aa 552
0068967c 553Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 554See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 555
556=cut
557
558sub as_svg {
0ecb975c 559 my( $self, $opts ) = @_;
bfcbcecb 560 throw( "Need GraphViz installed to output SVG" )
561 unless File::Which::which( 'dot' );
e247aad1 562 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 563 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 564 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 565 my @cmd = qw/dot -Tsvg/;
566 my( $svg, $err );
567 my $dotfile = File::Temp->new();
568 ## USE FOR DEBUGGING
569 # $dotfile->unlink_on_destroy(0);
570 binmode $dotfile, ':utf8';
571 print $dotfile $self->as_dot( $opts );
572 push( @cmd, $dotfile->filename );
573 run( \@cmd, ">", binary(), \$svg );
e247aad1 574 $svg = decode_utf8( $svg );
575 $self->cached_svg( $svg ) unless $want_subgraph;
576 return $svg;
577 } else {
578 return $self->cached_svg;
b365fbae 579 }
8e1394aa 580}
581
b22576c6 582
0ecb975c 583=head2 as_dot( \%options )
b22576c6 584
0ecb975c 585Returns a string that is the collation graph expressed in dot
586(i.e. GraphViz) format. Options include:
b22576c6 587
0ecb975c 588=over 4
b22576c6 589
0ecb975c 590=item * from
b22576c6 591
0ecb975c 592=item * to
df6d9812 593
0ecb975c 594=item * color_common
595
596=back
df6d9812 597
598=cut
599
600sub as_dot {
0ecb975c 601 my( $self, $opts ) = @_;
602 my $startrank = $opts->{'from'} if $opts;
603 my $endrank = $opts->{'to'} if $opts;
604 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 605 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
606 && $self->end->rank > 100;
6648ee3d 607 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 608
b22576c6 609 # Check the arguments
610 if( $startrank ) {
611 return if $endrank && $startrank > $endrank;
612 return if $startrank > $self->end->rank;
613 }
614 if( defined $endrank ) {
615 return if $endrank < 0;
f1b3b33a 616 $endrank = undef if $endrank == $self->end->rank;
b22576c6 617 }
618
67da8d6c 619 my $graph_name = $self->tradition->name;
620 $graph_name =~ s/[^\w\s]//g;
621 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 622
623 my %graph_attrs = (
624 'rankdir' => 'LR',
625 'bgcolor' => 'none',
626 );
627 my %node_attrs = (
b8990398 628 'fontsize' => 14,
f13b5582 629 'fillcolor' => 'white',
630 'style' => 'filled',
631 'shape' => 'ellipse'
632 );
633 my %edge_attrs = (
634 'arrowhead' => 'open',
635 'color' => '#000000',
636 'fontcolor' => '#000000',
637 );
638
67da8d6c 639 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 640 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
641 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 642
b22576c6 643 # Output substitute start/end readings if necessary
644 if( $startrank ) {
645 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
646 }
647 if( $endrank ) {
648 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
649 }
b365fbae 650 if( $STRAIGHTENHACK ) {
651 ## HACK part 1
6648ee3d 652 my $startlabel = $startrank ? 'SUBSTART' : 'START';
653 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 654 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
655 }
b22576c6 656 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 657 # Sort the readings by rank if we have ranks; this speeds layout.
658 my @all_readings = $self->end->has_rank
659 ? sort { $a->rank <=> $b->rank } $self->readings
660 : $self->readings;
4633f9e4 661 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 662 foreach my $reading ( @all_readings ) {
b22576c6 663 # Only output readings within our rank range.
664 next if $startrank && $reading->rank < $startrank;
665 next if $endrank && $reading->rank > $endrank;
666 $used{$reading->id} = 1;
910a0a6d 667 # Need not output nodes without separate labels
3a2ebbf4 668 next if $reading->id eq $reading->text;
d4b75f44 669 my $rattrs;
30f0df34 670 my $label = $reading->text;
629e27b0 671 $label .= '-' if $reading->join_next;
672 $label = "-$label" if $reading->join_prior;
8f9cab7b 673 $label =~ s/\"/\\\"/g;
d4b75f44 674 $rattrs->{'label'} = $label;
0ecb975c 675 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 676 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 677 }
3a2ebbf4 678
30ddc24c 679 # Add the real edges. Need to weight one edge per rank jump, in a
680 # continuous line.
b365fbae 681 # my $weighted = $self->_add_edge_weights;
b22576c6 682 my @edges = $self->paths;
3bdec618 683 my( %substart, %subend );
b22576c6 684 foreach my $edge ( @edges ) {
685 # Do we need to output this edge?
508fd430 686 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 687 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 688 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 689
b22576c6 690 # Account for the rank gap if necessary
30ddc24c 691 my $rank0 = $self->reading( $edge->[0] )->rank
692 if $self->reading( $edge->[0] )->has_rank;
693 my $rank1 = $self->reading( $edge->[1] )->rank
694 if $self->reading( $edge->[1] )->has_rank;
695 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
696 $variables->{'minlen'} = $rank1 - $rank0;
697 }
698
699 # Add the calculated edge weights
b365fbae 700 # if( exists $weighted->{$edge->[0]}
e247aad1 701 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
702 # # $variables->{'color'} = 'red';
703 # $variables->{'weight'} = 3.0;
704 # }
30ddc24c 705
508fd430 706 # EXPERIMENTAL: make edge width reflect no. of witnesses
707 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
708 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
709
f13b5582 710 my $varopts = _dot_attr_string( $variables );
711 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
712 $edge->[0], $edge->[1], $varopts );
3bdec618 713 } elsif( $used{$edge->[0]} ) {
714 $subend{$edge->[0]} = 1;
715 } elsif( $used{$edge->[1]} ) {
716 $substart{$edge->[1]} = 1;
b22576c6 717 }
df6d9812 718 }
3bdec618 719 # Add substitute start and end edges if necessary
720 foreach my $node ( keys %substart ) {
027d819c 721 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 722 my $variables = { %edge_attrs, 'label' => $witstr };
723 my $varopts = _dot_attr_string( $variables );
724 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 725 }
726 foreach my $node ( keys %subend ) {
027d819c 727 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 728 my $variables = { %edge_attrs, 'label' => $witstr };
729 my $varopts = _dot_attr_string( $variables );
730 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 731 }
b365fbae 732 # HACK part 2
733 if( $STRAIGHTENHACK ) {
6648ee3d 734 my $endlabel = $endrank ? 'SUBEND' : 'END';
735 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 736 }
30ddc24c 737
df6d9812 738 $dot .= "}\n";
739 return $dot;
740}
741
f13b5582 742sub _dot_attr_string {
743 my( $hash ) = @_;
744 my @attrs;
745 foreach my $k ( sort keys %$hash ) {
746 my $v = $hash->{$k};
747 push( @attrs, $k.'="'.$v.'"' );
748 }
749 return( '[ ' . join( ', ', @attrs ) . ' ]' );
750}
751
30ddc24c 752sub _add_edge_weights {
753 my $self = shift;
754 # Walk the graph from START to END, choosing the successor node with
755 # the largest number of witness paths each time.
756 my $weighted = {};
757 my $curr = $self->start->id;
008fc8a6 758 my $ranked = $self->end->has_rank;
30ddc24c 759 while( $curr ne $self->end->id ) {
008fc8a6 760 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 761 my @succ = sort { $self->path_witnesses( $curr, $a )
762 <=> $self->path_witnesses( $curr, $b ) }
763 $self->sequence->successors( $curr );
764 my $next = pop @succ;
008fc8a6 765 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 766 # Try to avoid lacunae in the weighted path.
008fc8a6 767 while( @succ &&
768 ( $self->reading( $next )->is_lacuna ||
769 $nextrank - $rank > 1 ) ){
30ddc24c 770 $next = pop @succ;
771 }
772 $weighted->{$curr} = $next;
773 $curr = $next;
774 }
775 return $weighted;
776}
777
027d819c 778=head2 path_witnesses( $edge )
779
780Returns the list of sigils whose witnesses are associated with the given edge.
781The edge can be passed as either an array or an arrayref of ( $source, $target ).
782
783=cut
784
3a2ebbf4 785sub path_witnesses {
786 my( $self, @edge ) = @_;
787 # If edge is an arrayref, cope.
788 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
789 my $e = shift @edge;
790 @edge = @$e;
791 }
792 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 793 return @wits;
3a2ebbf4 794}
795
7f9f05e8 796# Helper function. Make a display label for the given witnesses, showing a.c.
797# witnesses only where the main witness is not also in the list.
027d819c 798sub _path_display_label {
508fd430 799 my $self = shift;
7f9f05e8 800 my %wits;
801 map { $wits{$_} = 1 } @_;
802
803 # If an a.c. wit is listed, remove it if the main wit is also listed.
804 # Otherwise keep it for explicit listing.
805 my $aclabel = $self->ac_label;
806 my @disp_ac;
807 foreach my $w ( sort keys %wits ) {
808 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
809 if( exists $wits{$1} ) {
810 delete $wits{$w};
811 } else {
812 push( @disp_ac, $w );
813 }
814 }
815 }
816
817 # See if we are in a majority situation.
8f9cab7b 818 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 819 if( scalar keys %wits > $maj ) {
820 unshift( @disp_ac, 'majority' );
821 return join( ', ', @disp_ac );
8f9cab7b 822 } else {
7f9f05e8 823 return join( ', ', sort keys %wits );
8f9cab7b 824 }
825}
1dd07bda 826
bf6e338d 827=head2 readings_at_rank( $rank )
1dd07bda 828
bf6e338d 829Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 830
831=cut
832
bf6e338d 833sub readings_at_rank {
1dd07bda 834 my( $self, $rank ) = @_;
bf6e338d 835 my $table = $self->alignment_table;
836 # Table rank is real rank - 1.
837 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
838 my %readings;
839 foreach my $e ( @elements ) {
840 next unless ref( $e ) eq 'HASH';
841 next unless exists $e->{'t'};
842 $readings{$e->{'t'}->id} = $e->{'t'};
843 }
844 return values %readings;
1dd07bda 845}
8f9cab7b 846
4e5a7b2c 847=head2 as_graphml
8e1394aa 848
4e5a7b2c 849Returns a GraphML representation of the collation. The GraphML will contain
850two graphs. The first expresses the attributes of the readings and the witness
851paths that link them; the second expresses the relationships that link the
852readings. This is the native transfer format for a tradition.
8e1394aa 853
56eefa04 854=begin testing
855
856use Text::Tradition;
857
858my $READINGS = 311;
859my $PATHS = 361;
860
861my $datafile = 't/data/florilegium_tei_ps.xml';
862my $tradition = Text::Tradition->new( 'input' => 'TEI',
863 'name' => 'test0',
864 'file' => $datafile,
865 'linear' => 1 );
866
867ok( $tradition, "Got a tradition object" );
868is( scalar $tradition->witnesses, 13, "Found all witnesses" );
869ok( $tradition->collation, "Tradition has a collation" );
870
871my $c = $tradition->collation;
872is( scalar $c->readings, $READINGS, "Collation has all readings" );
873is( scalar $c->paths, $PATHS, "Collation has all paths" );
874is( scalar $c->relationships, 0, "Collation has all relationships" );
875
876# Add a few relationships
877$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
878$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
879$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
880
881# Now write it to GraphML and parse it again.
882
883my $graphml = $c->as_graphml;
884my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
885is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
886is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
887is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
888
889=end testing
890
8e1394aa 891=cut
892
893sub as_graphml {
a30ca502 894 my( $self, $options ) = @_;
3d14b48e 895 $self->calculate_ranks unless $self->_graphcalc_done;
896
a30ca502 897 my $start = $options->{'from'}
898 ? $self->reading( $options->{'from'} ) : $self->start;
899 my $end = $options->{'to'}
900 ? $self->reading( $options->{'to'} ) : $self->end;
901 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
902 throw( 'Start node must be before end node' );
903 }
904 # The readings need to be ranked for this to work.
905 $start = $self->start unless $start->has_rank;
906 $end = $self->end unless $end->has_rank;
414cc046 907 my $rankoffset = 0;
908 unless( $start eq $self->start ) {
909 $rankoffset = $start->rank - 1;
910 }
a30ca502 911 my %use_readings;
912
8e1394aa 913 # Some namespaces
914 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
915 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
916 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 917 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 918
919 # Create the document and root node
920 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
921 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
922 $graphml->setDocumentElement( $root );
923 $root->setNamespace( $xsi_ns, 'xsi', 0 );
924 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 925
926 # List of attribute types to save on our objects and their corresponding
927 # GraphML types
928 my %save_types = (
929 'Str' => 'string',
930 'Int' => 'int',
931 'Bool' => 'boolean',
932 'RelationshipType' => 'string',
933 'RelationshipScope' => 'string',
934 );
935
936 # List of attribute names *not* to save on our objects.
937 # We will also not save any attribute beginning with _.
938 my %skipsave;
939 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 940
bbd064a9 941 # Add the data keys for the graph. Include an extra key 'version' for the
942 # GraphML output version.
e309421a 943 my %graph_data_keys;
944 my $gdi = 0;
bbd064a9 945 my %graph_attributes = ( 'version' => 'string' );
946 # Graph attributes include those of Tradition and those of Collation.
947 my %gattr_from;
948 my $tmeta = $self->tradition->meta;
949 my $cmeta = $self->meta;
950 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
951 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
952 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
953 next if $attr->name =~ /^_/;
954 next if $skipsave{$attr->name};
955 next unless $save_types{$attr->type_constraint->name};
956 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
957 }
958
959 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 960 $graph_data_keys{$datum} = 'dg'.$gdi++;
961 my $key = $root->addNewChild( $graphml_ns, 'key' );
962 $key->setAttribute( 'attr.name', $datum );
bbd064a9 963 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 964 $key->setAttribute( 'for', 'graph' );
965 $key->setAttribute( 'id', $graph_data_keys{$datum} );
966 }
f6066bac 967
bbd064a9 968 # Add the data keys for reading nodes
969 my %reading_attributes;
970 my $rmeta = Text::Tradition::Collation::Reading->meta;
971 foreach my $attr( $rmeta->get_all_attributes ) {
972 next if $attr->name =~ /^_/;
973 next if $skipsave{$attr->name};
974 next unless $save_types{$attr->type_constraint->name};
975 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
976 }
ef9d481f 977 my %node_data_keys;
978 my $ndi = 0;
bbd064a9 979 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 980 $node_data_keys{$datum} = 'dn'.$ndi++;
981 my $key = $root->addNewChild( $graphml_ns, 'key' );
982 $key->setAttribute( 'attr.name', $datum );
bbd064a9 983 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 984 $key->setAttribute( 'for', 'node' );
985 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 986 }
987
bbd064a9 988 # Add the data keys for edges, that is, paths and relationships. Path
989 # data does not come from a Moose class so is here manually.
ef9d481f 990 my $edi = 0;
991 my %edge_data_keys;
bbd064a9 992 my %edge_attributes = (
3a2ebbf4 993 witness => 'string', # ID/label for a path
3a2ebbf4 994 extra => 'boolean', # Path key
3a2ebbf4 995 );
bbd064a9 996 my @path_attributes = keys %edge_attributes; # track our manual additions
997 my $pmeta = Text::Tradition::Collation::Relationship->meta;
998 foreach my $attr( $pmeta->get_all_attributes ) {
999 next if $attr->name =~ /^_/;
1000 next if $skipsave{$attr->name};
1001 next unless $save_types{$attr->type_constraint->name};
1002 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1003 }
1004 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1005 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1006 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1007 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1008 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1009 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1010 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1011 }
3a2ebbf4 1012
cc31ebaa 1013 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1014 my $xmlidname = $self->tradition->name;
1015 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1016 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1017 $xmlidname = '_'.$xmlidname;
1018 }
2c669bca 1019 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1020 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1021 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1022 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1023 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1024 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1025 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1026 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1027
e309421a 1028 # Collation attribute data
bbd064a9 1029 foreach my $datum ( keys %graph_attributes ) {
1030 my $value;
1031 if( $datum eq 'version' ) {
1032 $value = '3.1';
1033 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1034 $value = $self->tradition->$datum;
1035 } else {
1036 $value = $self->$datum;
1037 }
2c669bca 1038 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1039 }
8e1394aa 1040
1041 my $node_ctr = 0;
1042 my %node_hash;
22222af9 1043 # Add our readings to the graph
3a2ebbf4 1044 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1045 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1046 ( $n->rank < $start->rank || $n->rank > $end->rank );
1047 $use_readings{$n->id} = 1;
2c669bca 1048 # Add to the main graph
1049 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1050 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1051 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1052 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1053 foreach my $d ( keys %reading_attributes ) {
255875b8 1054 my $nval = $n->$d;
cc31ebaa 1055 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1056 # Adjust the ranks within the subgraph.
cc31ebaa 1057 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1058 : $nval - $rankoffset;
414cc046 1059 }
255875b8 1060 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1061 if defined $nval;
1062 }
b15511bf 1063 }
1064
2c669bca 1065 # Add the path edges to the sequence graph
df6d9812 1066 my $edge_ctr = 0;
3a2ebbf4 1067 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1068 # We add an edge in the graphml for every witness in $e.
a30ca502 1069 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1070 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1071 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1072 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1073 # Skip any path from start to end; that witness is not in the subgraph.
1074 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1075 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1076 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1077 $node_hash{ $e->[0] },
1078 $node_hash{ $e->[1] } );
2c669bca 1079 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1080 $edge_el->setAttribute( 'source', $from );
1081 $edge_el->setAttribute( 'target', $to );
1082 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1083
1084 # It's a witness path, so add the witness
1085 my $base = $wit;
1086 my $key = $edge_data_keys{'witness'};
1087 # Is this an ante-corr witness?
1088 my $aclabel = $self->ac_label;
1089 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1090 # Keep the base witness
1091 $base = $1;
1092 # ...and record that this is an 'extra' reading path
1093 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1094 }
1095 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1096 }
1097 }
1098
cc31ebaa 1099 # Report the actual number of nodes and edges that went in
1100 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1101 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1102
22222af9 1103 # Add the relationship graph to the XML
bbd064a9 1104 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1105 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1106 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1107
94c00c71 1108 # Save and return the thing
1109 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1110 return $result;
df6d9812 1111}
1112
b15511bf 1113sub _add_graphml_data {
1114 my( $el, $key, $value ) = @_;
b15511bf 1115 return unless defined $value;
c9bf3dbf 1116 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1117 $data_el->setAttribute( 'key', $key );
1118 $data_el->appendText( $value );
8e1394aa 1119}
1120
4e5a7b2c 1121=head2 as_csv
910a0a6d 1122
1123Returns a CSV alignment table representation of the collation graph, one
2c669bca 1124row per witness (or witness uncorrected.)
910a0a6d 1125
1126=cut
1127
1128sub as_csv {
3a2ebbf4 1129 my( $self ) = @_;
1dd07bda 1130 my $table = $self->alignment_table;
82fa4d57 1131 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1132 my @result;
2c669bca 1133 # Make the header row
1134 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1135 push( @result, decode_utf8( $csv->string ) );
1136 # Make the rest of the rows
1137 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1138 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1139 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1140 $csv->combine( @row );
910a0a6d 1141 push( @result, decode_utf8( $csv->string ) );
1142 }
3a2ebbf4 1143 return join( "\n", @result );
910a0a6d 1144}
1145
1dd07bda 1146=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1147
566f4595 1148Return a reference to an alignment table, in a slightly enhanced CollateX
1149format which looks like this:
1150
1151 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1152 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1153 { witness => "SIG2",
4e5a7b2c 1154 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1155 ... ],
1156 length => TEXTLEN };
1157
1158If $use_refs is set to 1, the reading object is returned in the table
1159instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1160
1161If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1162keys have a true hash value will be included.
2c669bca 1163
1164=cut
9f3ba6f7 1165
1dd07bda 1166sub alignment_table {
1167 my( $self ) = @_;
c1915ab9 1168 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1169 return $self->cached_table if $self->has_cached_table;
1170
0ecb975c 1171 # Make sure we can do this
1172 throw( "Need a linear graph in order to make an alignment table" )
1173 unless $self->linear;
1174 $self->calculate_ranks unless $self->end->has_rank;
1175
2c669bca 1176 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1177 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1178 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1179 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1180 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1181 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1182 push( @{$table->{'alignment'}},
1183 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1184 if( $wit->is_layered ) {
1185 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1186 $wit->sigil.$self->ac_label );
1dd07bda 1187 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1188 push( @{$table->{'alignment'}},
1189 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1190 }
1191 }
1dd07bda 1192 $self->cached_table( $table );
1193 return $table;
910a0a6d 1194}
1195
1196sub _make_witness_row {
1dd07bda 1197 my( $path, $positions ) = @_;
910a0a6d 1198 my %char_hash;
1199 map { $char_hash{$_} = undef } @$positions;
2c669bca 1200 my $debug = 0;
910a0a6d 1201 foreach my $rdg ( @$path ) {
eca16057 1202 my $rtext = $rdg->text;
1203 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1204 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1205 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1206 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1207 }
1208 my @row = map { $char_hash{$_} } @$positions;
eca16057 1209 # Fill in lacuna markers for undef spots in the row
1210 my $last_el = shift @row;
1211 my @filled_row = ( $last_el );
1212 foreach my $el ( @row ) {
0e476982 1213 # If we are using node reference, make the lacuna node appear many times
1214 # in the table. If not, use the lacuna tag.
1dd07bda 1215 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1216 $el = $last_el;
eca16057 1217 }
1218 push( @filled_row, $el );
1219 $last_el = $el;
1220 }
1221 return @filled_row;
910a0a6d 1222}
1223
4e5a7b2c 1224=head1 NAVIGATION METHODS
910a0a6d 1225
4e5a7b2c 1226=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1227
1228Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1229with $last, for the witness given in $sigil. If a $backup sigil is
1230specified (e.g. when walking a layered witness), it will be used wherever
1231no $sigil path exists. If there is a base text reading, that will be
1232used wherever no path exists for $sigil or $backup.
e2902068 1233
1234=cut
1235
910a0a6d 1236# TODO Think about returning some lazy-eval iterator.
b0b4421a 1237# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1238
e2902068 1239sub reading_sequence {
861c3e27 1240 my( $self, $start, $end, $witness ) = @_;
e2902068 1241
930ff666 1242 $witness = $self->baselabel unless $witness;
e2902068 1243 my @readings = ( $start );
1244 my %seen;
1245 my $n = $start;
3a2ebbf4 1246 while( $n && $n->id ne $end->id ) {
1247 if( exists( $seen{$n->id} ) ) {
63778331 1248 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1249 }
3a2ebbf4 1250 $seen{$n->id} = 1;
910a0a6d 1251
861c3e27 1252 my $next = $self->next_reading( $n, $witness );
44771cf2 1253 unless( $next ) {
63778331 1254 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1255 }
910a0a6d 1256 push( @readings, $next );
1257 $n = $next;
e2902068 1258 }
1259 # Check that the last reading is our end reading.
1260 my $last = $readings[$#readings];
63778331 1261 throw( "Last reading found from " . $start->text .
1262 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1263 unless $last->id eq $end->id;
e2902068 1264
1265 return @readings;
1266}
1267
4e5a7b2c 1268=head2 next_reading( $reading, $sigil );
8e1394aa 1269
4a8828f0 1270Returns the reading that follows the given reading along the given witness
930ff666 1271path.
8e1394aa 1272
1273=cut
1274
4a8828f0 1275sub next_reading {
e2902068 1276 # Return the successor via the corresponding path.
8e1394aa 1277 my $self = shift;
3a2ebbf4 1278 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1279 return undef unless $answer;
3a2ebbf4 1280 return $self->reading( $answer );
8e1394aa 1281}
1282
4e5a7b2c 1283=head2 prior_reading( $reading, $sigil )
8e1394aa 1284
4a8828f0 1285Returns the reading that precedes the given reading along the given witness
930ff666 1286path.
8e1394aa 1287
1288=cut
1289
4a8828f0 1290sub prior_reading {
e2902068 1291 # Return the predecessor via the corresponding path.
8e1394aa 1292 my $self = shift;
3a2ebbf4 1293 my $answer = $self->_find_linked_reading( 'prior', @_ );
1294 return $self->reading( $answer );
8e1394aa 1295}
1296
4a8828f0 1297sub _find_linked_reading {
861c3e27 1298 my( $self, $direction, $node, $path ) = @_;
1299
1300 # Get a backup if we are dealing with a layered witness
1301 my $alt_path;
1302 my $aclabel = $self->ac_label;
1303 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1304 $alt_path = $1;
1305 }
1306
e2902068 1307 my @linked_paths = $direction eq 'next'
3a2ebbf4 1308 ? $self->sequence->edges_from( $node )
1309 : $self->sequence->edges_to( $node );
e2902068 1310 return undef unless scalar( @linked_paths );
8e1394aa 1311
e2902068 1312 # We have to find the linked path that contains all of the
1313 # witnesses supplied in $path.
1314 my( @path_wits, @alt_path_wits );
4e5a7b2c 1315 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1316 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1317 my $base_le;
1318 my $alt_le;
1319 foreach my $le ( @linked_paths ) {
3a2ebbf4 1320 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1321 $base_le = $le;
910a0a6d 1322 }
508fd430 1323 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1324 if( _is_within( \@path_wits, \@le_wits ) ) {
1325 # This is the right path.
1326 return $direction eq 'next' ? $le->[1] : $le->[0];
1327 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1328 $alt_le = $le;
1329 }
8e1394aa 1330 }
e2902068 1331 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1332 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1333 if $alt_le;
e2902068 1334
1335 # Got this far? Return the base path if it exists.
3a2ebbf4 1336 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1337 if $base_le;
e2902068 1338
1339 # Got this far? We have no appropriate path.
2c669bca 1340 warn "Could not find $direction node from " . $node->id
910a0a6d 1341 . " along path $path";
8e1394aa 1342 return undef;
1343}
1344
4a8828f0 1345# Some set logic.
1346sub _is_within {
1347 my( $set1, $set2 ) = @_;
7854e12e 1348 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1349 foreach my $el ( @$set1 ) {
910a0a6d 1350 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1351 }
1352 return $ret;
1353}
1354
4e5a7b2c 1355# Return the string that joins together a list of witnesses for
1356# display on a single path.
1357sub _witnesses_of_label {
1358 my( $self, $label ) = @_;
1359 my $regex = $self->wit_list_separator;
1360 my @answer = split( /\Q$regex\E/, $label );
1361 return @answer;
b0b4421a 1362}
1363
d4b75f44 1364=head2 common_readings
1365
1366Returns the list of common readings in the graph (i.e. those readings that are
1367shared by all non-lacunose witnesses.)
1368
1369=cut
1370
1371sub common_readings {
1372 my $self = shift;
1373 my @common = grep { $_->is_common } $self->readings;
1374 return @common;
1375}
1376
fae52efd 1377=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1378
1379Returns the text of a witness (plus its backup, if we are using a layer)
1380as stored in the collation. The text is returned as a string, where the
1381individual readings are joined with spaces and the meta-readings (e.g.
1382lacunae) are omitted. Optional specification of $start and $end allows
1383the generation of a subset of the witness text.
4e5a7b2c 1384
b0b4421a 1385=cut
1386
1387sub path_text {
861c3e27 1388 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1389 $start = $self->start unless $start;
1390 $end = $self->end unless $end;
861c3e27 1391 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1392 my $pathtext = '';
1393 my $last;
1394 foreach my $r ( @path ) {
6ad2ce78 1395 unless ( $r->join_prior || !$last || $last->join_next ) {
1396 $pathtext .= ' ';
1397 }
1398 $pathtext .= $r->text;
629e27b0 1399 $last = $r;
1400 }
1401 return $pathtext;
b0b4421a 1402}
4e5a7b2c 1403
1404=head1 INITIALIZATION METHODS
1405
1406These are mostly for use by parsers.
1407
1408=head2 make_witness_path( $witness )
1409
1410Link the array of readings contained in $witness->path (and in
1411$witness->uncorrected_path if it exists) into collation paths.
1412Clear out the arrays when finished.
de51424a 1413
4e5a7b2c 1414=head2 make_witness_paths
1415
1416Call make_witness_path for all witnesses in the tradition.
1417
1418=cut
930ff666 1419
7e450e44 1420# For use when a collation is constructed from a base text and an apparatus.
1421# We have the sequences of readings and just need to add path edges.
1f7aa795 1422# When we are done, clear out the witness path attributes, as they are no
1423# longer needed.
1424# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1425
6a222840 1426sub make_witness_paths {
1427 my( $self ) = @_;
910a0a6d 1428 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1429 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1430 $self->make_witness_path( $wit );
7854e12e 1431 }
7854e12e 1432}
1433
6a222840 1434sub make_witness_path {
7854e12e 1435 my( $self, $wit ) = @_;
1436 my @chain = @{$wit->path};
15d2d3df 1437 my $sig = $wit->sigil;
fae52efd 1438 # Add start and end if necessary
1439 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1440 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1441 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1442 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1443 }
1f7aa795 1444 if( $wit->is_layered ) {
d9e873d0 1445 @chain = @{$wit->uncorrected_path};
fae52efd 1446 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1447 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1448 foreach my $idx( 0 .. $#chain-1 ) {
1449 my $source = $chain[$idx];
1450 my $target = $chain[$idx+1];
1451 $self->add_path( $source, $target, $sig.$self->ac_label )
1452 unless $self->has_path( $source, $target, $sig );
1453 }
15d2d3df 1454 }
1f7aa795 1455 $wit->clear_path;
1456 $wit->clear_uncorrected_path;
e2902068 1457}
1458
4e5a7b2c 1459=head2 calculate_ranks
1460
1461Calculate the reading ranks (that is, their aligned positions relative
1462to each other) for the graph. This can only be called on linear collations.
1463
b365fbae 1464=begin testing
1465
1466use Text::Tradition;
1467
1468my $cxfile = 't/data/Collatex-16.xml';
1469my $t = Text::Tradition->new(
1470 'name' => 'inline',
1471 'input' => 'CollateX',
1472 'file' => $cxfile,
1473 );
1474my $c = $t->collation;
1475
1476# Make an svg
bfcbcecb 1477my $table = $c->alignment_table;
1478ok( $c->has_cached_table, "Alignment table was cached" );
1479is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1480$c->calculate_ranks;
bfcbcecb 1481is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1482$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1483isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1484
1485=end testing
1486
4e5a7b2c 1487=cut
1488
910a0a6d 1489sub calculate_ranks {
1490 my $self = shift;
b365fbae 1491 # Save the existing ranks, in case we need to invalidate the cached SVG.
1492 my %existing_ranks;
ac4d7ac2 1493 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1494
1495 # Do the rankings based on the relationship equivalence graph, starting
1496 # with the start node.
1497 my $topo_start = $self->equivalence( $self->start->id );
c9bf3dbf 1498 my $node_ranks = { $topo_start => 0 };
910a0a6d 1499 my @curr_origin = ( $topo_start );
1500 # A little iterative function.
1501 while( @curr_origin ) {
359944f7 1502 @curr_origin = _assign_rank( $self->equivalence_graph,
1503 $node_ranks, @curr_origin );
910a0a6d 1504 }
1505 # Transfer our rankings from the topological graph to the real one.
1506 foreach my $r ( $self->readings ) {
359944f7 1507 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1508 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1509 } else {
63778331 1510 # Die. Find the last rank we calculated.
359944f7 1511 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1512 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1513 $self->readings;
1514 my $last = pop @all_defined;
1515 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1516 }
de51424a 1517 }
bfcbcecb 1518 # Do we need to invalidate the cached data?
1519 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1520 foreach my $r ( $self->readings ) {
7c293912 1521 next if defined( $existing_ranks{$r} )
1522 && $existing_ranks{$r} == $r->rank;
c1915ab9 1523 # Something has changed, so clear the cache
bfcbcecb 1524 $self->_clear_cache;
c1915ab9 1525 # ...and recalculate the common readings.
1526 $self->calculate_common_readings();
b365fbae 1527 last;
1528 }
1529 }
c1915ab9 1530 # The graph calculation information is now up to date.
1531 $self->_graphcalc_done(1);
8e1394aa 1532}
3a1f2523 1533
910a0a6d 1534sub _assign_rank {
c9bf3dbf 1535 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1536 # Look at each of the children of @current_nodes. If all the child's
1537 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1538 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1539 # parent gets a rank.
910a0a6d 1540 my @next_nodes;
1541 foreach my $c ( @current_nodes ) {
c9bf3dbf 1542 warn "Current reading $c has no rank!"
1543 unless exists $node_ranks->{$c};
1544 # print STDERR "Looking at child of node $c, rank "
1545 # . $node_ranks->{$c} . "\n";
1546 foreach my $child ( $graph->successors( $c ) ) {
1547 next if exists $node_ranks->{$child};
910a0a6d 1548 my $highest_rank = -1;
1549 my $skip = 0;
c9bf3dbf 1550 foreach my $parent ( $graph->predecessors( $child ) ) {
1551 if( exists $node_ranks->{$parent} ) {
1552 $highest_rank = $node_ranks->{$parent}
1553 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1554 } else {
1555 $skip = 1;
1556 last;
1557 }
1558 }
1559 next if $skip;
c9bf3dbf 1560 my $c_rank = $highest_rank + 1;
1561 # print STDERR "Assigning rank $c_rank to node $child \n";
1562 $node_ranks->{$child} = $c_rank;
910a0a6d 1563 push( @next_nodes, $child );
1564 }
1565 }
1566 return @next_nodes;
4cdd82f1 1567}
910a0a6d 1568
c1915ab9 1569sub _clear_cache {
1570 my $self = shift;
1571 $self->wipe_svg if $self->has_cached_svg;
1572 $self->wipe_table if $self->has_cached_table;
1573}
1574
1575
4e5a7b2c 1576=head2 flatten_ranks
1577
1578A convenience method for parsing collation data. Searches the graph for readings
1579with the same text at the same rank, and merges any that are found.
1580
1581=cut
1582
0e476982 1583sub flatten_ranks {
1584 my $self = shift;
1585 my %unique_rank_rdg;
bf6e338d 1586 my $changed;
0e476982 1587 foreach my $rdg ( $self->readings ) {
1588 next unless $rdg->has_rank;
1589 my $key = $rdg->rank . "||" . $rdg->text;
1590 if( exists $unique_rank_rdg{$key} ) {
1591 # Combine!
56eefa04 1592 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1593 $changed = 1;
0e476982 1594 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1595 # TODO see if this now makes a common point.
0e476982 1596 } else {
1597 $unique_rank_rdg{$key} = $rdg;
1598 }
1599 }
bf6e338d 1600 # If we merged readings, the ranks are still fine but the alignment
1601 # table is wrong. Wipe it.
1602 $self->wipe_table() if $changed;
0e476982 1603}
4633f9e4 1604
1605
d4b75f44 1606=head2 calculate_common_readings
1607
1608Goes through the graph identifying the readings that appear in every witness
1609(apart from those with lacunae at that spot.) Marks them as common and returns
1610the list.
1611
1612=begin testing
1613
1614use Text::Tradition;
1615
1616my $cxfile = 't/data/Collatex-16.xml';
1617my $t = Text::Tradition->new(
1618 'name' => 'inline',
1619 'input' => 'CollateX',
1620 'file' => $cxfile,
1621 );
1622my $c = $t->collation;
1623
1624my @common = $c->calculate_common_readings();
1625is( scalar @common, 8, "Found correct number of common readings" );
1626my @marked = sort $c->common_readings();
1627is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1628my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1629is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1630
1631=end testing
1632
1633=cut
1634
1635sub calculate_common_readings {
1636 my $self = shift;
1637 my @common;
c1915ab9 1638 map { $_->is_common( 0 ) } $self->readings;
1639 # Implicitly calls calculate_ranks
1dd07bda 1640 my $table = $self->alignment_table;
d4b75f44 1641 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1642 my @row = map { $_->{'tokens'}->[$idx]
1643 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1644 @{$table->{'alignment'}};
d4b75f44 1645 my %hash;
1646 foreach my $r ( @row ) {
1647 if( $r ) {
1648 $hash{$r->id} = $r unless $r->is_meta;
1649 } else {
1650 $hash{'UNDEF'} = $r;
1651 }
1652 }
1653 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1654 my( $r ) = values %hash;
1655 $r->is_common( 1 );
1656 push( @common, $r );
1657 }
1658 }
1659 return @common;
1660}
1661
861c3e27 1662=head2 text_from_paths
1663
1664Calculate the text array for all witnesses from the path, for later consistency
1665checking. Only to be used if there is no non-graph-based way to know the
1666original texts.
1667
1668=cut
1669
1670sub text_from_paths {
1671 my $self = shift;
1672 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1673 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1674 my @text;
1675 foreach my $r ( @readings ) {
1676 next if $r->is_meta;
1677 push( @text, $r->text );
1678 }
861c3e27 1679 $wit->text( \@text );
1680 if( $wit->is_layered ) {
5164a6f0 1681 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1682 $wit->sigil.$self->ac_label );
1683 my @uctext;
1684 foreach my $r ( @ucrdgs ) {
1685 next if $r->is_meta;
1686 push( @uctext, $r->text );
1687 }
1688 $wit->layertext( \@uctext );
861c3e27 1689 }
1690 }
1691}
0e476982 1692
4e5a7b2c 1693=head1 UTILITY FUNCTIONS
1694
1695=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1696
4e5a7b2c 1697Find the last reading that occurs in sequence before both the given readings.
414cc046 1698At the very least this should be $self->start.
4e5a7b2c 1699
1700=head2 common_successor( $reading_a, $reading_b )
1701
1702Find the first reading that occurs in sequence after both the given readings.
414cc046 1703At the very least this should be $self->end.
4e5a7b2c 1704
22222af9 1705=begin testing
1706
1707use Text::Tradition;
1708
1709my $cxfile = 't/data/Collatex-16.xml';
1710my $t = Text::Tradition->new(
1711 'name' => 'inline',
1712 'input' => 'CollateX',
1713 'file' => $cxfile,
1714 );
1715my $c = $t->collation;
1716
679f17e1 1717is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1718 'n20', "Found correct common predecessor" );
679f17e1 1719is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1720 '#END#', "Found correct common successor" );
1721
4e5a7b2c 1722is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1723 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1724is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1725 '#END#', "Found correct common successor for readings on same path" );
1726
1727=end testing
1728
1729=cut
1730
1731## Return the closest reading that is a predecessor of both the given readings.
1732sub common_predecessor {
1733 my $self = shift;
4e5a7b2c 1734 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1735 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1736}
1737
1738sub common_successor {
1739 my $self = shift;
4e5a7b2c 1740 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1741 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1742}
1743
414cc046 1744
1745# TODO think about how to do this without ranks...
027d819c 1746sub _common_in_path {
22222af9 1747 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1748 my $iter = $self->end->rank;
22222af9 1749 my @candidates;
414cc046 1750 my @last_r1 = ( $r1 );
1751 my @last_r2 = ( $r2 );
1752 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1753 my %all_seen;
414cc046 1754 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1755 while( !@candidates ) {
414cc046 1756 last unless $iter--; # Avoid looping infinitely
1757 # Iterate separately down the graph from r1 and r2
1758 my( @new_lc1, @new_lc2 );
1759 foreach my $lc ( @last_r1 ) {
1760 foreach my $p ( $lc->$dir ) {
1761 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1762 # print STDERR "Path candidate $p from $lc\n";
1763 push( @candidates, $p );
002e3600 1764 } elsif( !$all_seen{$p->id} ) {
414cc046 1765 $all_seen{$p->id} = 'r1';
1766 push( @new_lc1, $p );
1767 }
1768 }
1769 }
1770 foreach my $lc ( @last_r2 ) {
22222af9 1771 foreach my $p ( $lc->$dir ) {
414cc046 1772 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1773 # print STDERR "Path candidate $p from $lc\n";
22222af9 1774 push( @candidates, $p );
002e3600 1775 } elsif( !$all_seen{$p->id} ) {
414cc046 1776 $all_seen{$p->id} = 'r2';
1777 push( @new_lc2, $p );
22222af9 1778 }
1779 }
1780 }
414cc046 1781 @last_r1 = @new_lc1;
1782 @last_r2 = @new_lc2;
22222af9 1783 }
1784 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1785 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1786}
1787
63778331 1788sub throw {
1789 Text::Tradition::Error->throw(
1790 'ident' => 'Collation error',
1791 'message' => $_[0],
1792 );
1793}
1794
dd3b58b0 1795no Moose;
1796__PACKAGE__->meta->make_immutable;
e867486f 1797
027d819c 1798=head1 LICENSE
e867486f 1799
027d819c 1800This package is free software and is provided "as is" without express
1801or implied warranty. You can redistribute it and/or modify it under
1802the same terms as Perl itself.
e867486f 1803
027d819c 1804=head1 AUTHOR
e867486f 1805
027d819c 1806Tara L Andrews E<lt>aurum@cpan.orgE<gt>