fix exponential loop in common_in_path
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
df6d9812 12use XML::LibXML;
a344ffcf 13use XML::LibXML::XPathContext;
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
202ccb18 31 get_relationship => 'get_relationship',
ee801e17 32 del_relationship => 'del_relationship',
22222af9 33 },
34 writer => '_set_relations',
3a2ebbf4 35 );
dd3b58b0 36
3a2ebbf4 37has 'tradition' => (
38 is => 'ro',
d047cd52 39 isa => 'Text::Tradition',
8cfd99c4 40 writer => '_set_tradition',
8d9a1cd8 41 weak_ref => 1,
d047cd52 42 );
dd3b58b0 43
3a2ebbf4 44has 'readings' => (
45 isa => 'HashRef[Text::Tradition::Collation::Reading]',
46 traits => ['Hash'],
47 handles => {
48 reading => 'get',
49 _add_reading => 'set',
50 del_reading => 'delete',
51 has_reading => 'exists',
52 readings => 'values',
53 },
54 default => sub { {} },
55 );
910a0a6d 56
4a8828f0 57has 'wit_list_separator' => (
7854e12e 58 is => 'rw',
59 isa => 'Str',
60 default => ', ',
61 );
62
63has 'baselabel' => (
64 is => 'rw',
65 isa => 'Str',
66 default => 'base text',
67 );
4a8828f0 68
15d2d3df 69has 'linear' => (
70 is => 'rw',
71 isa => 'Bool',
72 default => 1,
73 );
c84275ff 74
ef9d481f 75has 'ac_label' => (
76 is => 'rw',
77 isa => 'Str',
78 default => ' (a.c.)',
79 );
3a2ebbf4 80
4e483aa5 81has 'wordsep' => (
82 is => 'rw',
83 isa => 'Str',
84 default => ' ',
85 );
86
3a2ebbf4 87has 'start' => (
88 is => 'ro',
89 isa => 'Text::Tradition::Collation::Reading',
90 writer => '_set_start',
91 weak_ref => 1,
92 );
93
94has 'end' => (
95 is => 'ro',
96 isa => 'Text::Tradition::Collation::Reading',
97 writer => '_set_end',
98 weak_ref => 1,
99 );
b365fbae 100
101has 'cached_svg' => (
102 is => 'rw',
103 isa => 'Str',
104 predicate => 'has_cached_svg',
105 clearer => 'wipe_svg',
106 );
1dd07bda 107
108has 'cached_table' => (
109 is => 'rw',
110 isa => 'HashRef',
111 predicate => 'has_cached_table',
112 clearer => 'wipe_table',
113 );
c1915ab9 114
115has '_graphcalc_done' => (
116 is => 'rw',
117 isa => 'Bool',
118 default => undef,
119 );
1f563ac3 120
4e5a7b2c 121=head1 NAME
122
123Text::Tradition::Collation - a software model for a text collation
124
125=head1 SYNOPSIS
126
127 use Text::Tradition;
128 my $t = Text::Tradition->new(
129 'name' => 'this is a text',
130 'input' => 'TEI',
131 'file' => '/path/to/tei_parallel_seg_file.xml' );
132
133 my $c = $t->collation;
134 my @readings = $c->readings;
135 my @paths = $c->paths;
136 my @relationships = $c->relationships;
137
138 my $svg_variant_graph = $t->collation->as_svg();
139
140=head1 DESCRIPTION
141
142Text::Tradition is a library for representation and analysis of collated
143texts, particularly medieval ones. The Collation is the central feature of
144a Tradition, where the text, its sequence of readings, and its relationships
145between readings are actually kept.
146
147=head1 CONSTRUCTOR
148
149=head2 new
150
151The constructor. Takes a hash or hashref of the following arguments:
152
153=over
154
155=item * tradition - The Text::Tradition object to which the collation
156belongs. Required.
157
158=item * linear - Whether the collation should be linear; that is, whether
159transposed readings should be treated as two linked readings rather than one,
160and therefore whether the collation graph is acyclic. Defaults to true.
161
4e5a7b2c 162=item * baselabel - The default label for the path taken by a base text
163(if any). Defaults to 'base text'.
164
165=item * wit_list_separator - The string to join a list of witnesses for
166purposes of making labels in display graphs. Defaults to ', '.
167
168=item * ac_label - The extra label to tack onto a witness sigil when
169representing another layer of path for the given witness - that is, when
170a text has more than one possible reading due to scribal corrections or
171the like. Defaults to ' (a.c.)'.
172
4e483aa5 173=item * wordsep - The string used to separate words in the original text.
174Defaults to ' '.
175
4e5a7b2c 176=back
177
178=head1 ACCESSORS
179
180=head2 tradition
181
182=head2 linear
183
4e5a7b2c 184=head2 wit_list_separator
185
186=head2 baselabel
187
188=head2 ac_label
189
4e483aa5 190=head2 wordsep
191
4e5a7b2c 192Simple accessors for collation attributes.
193
194=head2 start
195
196The meta-reading at the start of every witness path.
197
198=head2 end
199
200The meta-reading at the end of every witness path.
201
202=head2 readings
203
204Returns all Reading objects in the graph.
205
206=head2 reading( $id )
207
208Returns the Reading object corresponding to the given ID.
209
210=head2 add_reading( $reading_args )
211
212Adds a new reading object to the collation.
213See L<Text::Tradition::Collation::Reading> for the available arguments.
214
215=head2 del_reading( $object_or_id )
216
217Removes the given reading from the collation, implicitly removing its
218paths and relationships.
219
4e483aa5 220=head2 merge_readings( $main, $second, $concatenate, $with_str )
221
222Merges the $second reading into the $main one. If $concatenate is true, then
223the merged node will carry the text of both readings, concatenated with either
224$with_str (if specified) or a sensible default (the empty string if the
225appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 226
4e483aa5 227The first two arguments may be either readings or reading IDs.
4e5a7b2c 228
229=head2 has_reading( $id )
230
231Predicate to see whether a given reading ID is in the graph.
232
233=head2 reading_witnesses( $object_or_id )
234
235Returns a list of sigils whose witnesses contain the reading.
236
237=head2 paths
238
239Returns all reading paths within the document - that is, all edges in the
240collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
241
242=head2 add_path( $source, $target, $sigil )
243
244Links the given readings in the collation in sequence, under the given witness
245sigil. The readings may be specified by object or ID.
246
247=head2 del_path( $source, $target, $sigil )
248
249Links the given readings in the collation in sequence, under the given witness
250sigil. The readings may be specified by object or ID.
251
252=head2 has_path( $source, $target );
253
254Returns true if the two readings are linked in sequence in any witness.
255The readings may be specified by object or ID.
256
257=head2 relationships
258
259Returns all Relationship objects in the collation.
260
261=head2 add_relationship( $reading, $other_reading, $options )
262
263Adds a new relationship of the type given in $options between the two readings,
264which may be specified by object or ID. Returns a value of ( $status, @vectors)
265where $status is true on success, and @vectors is a list of relationship edges
266that were ultimately added.
267See L<Text::Tradition::Collation::Relationship> for the available options.
268
269=cut
dd3b58b0 270
d047cd52 271sub BUILD {
3a2ebbf4 272 my $self = shift;
22222af9 273 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 274 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
275 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 276}
784877d9 277
3a2ebbf4 278### Reading construct/destruct functions
279
280sub add_reading {
281 my( $self, $reading ) = @_;
282 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
283 my %args = %$reading;
284 $reading = Text::Tradition::Collation::Reading->new(
285 'collation' => $self,
286 %args );
287 }
288 # First check to see if a reading with this ID exists.
289 if( $self->reading( $reading->id ) ) {
63778331 290 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 291 }
c1915ab9 292 $self->_graphcalc_done(0);
3a2ebbf4 293 $self->_add_reading( $reading->id => $reading );
294 # Once the reading has been added, put it in both graphs.
295 $self->sequence->add_vertex( $reading->id );
22222af9 296 $self->relations->add_reading( $reading->id );
3a2ebbf4 297 return $reading;
eca16057 298};
299
3a2ebbf4 300around del_reading => sub {
301 my $orig = shift;
302 my $self = shift;
303 my $arg = shift;
304
305 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
306 $arg = $arg->id;
307 }
3a2ebbf4 308 # Remove the reading from the graphs.
c1915ab9 309 $self->_graphcalc_done(0);
4e483aa5 310 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 311 $self->sequence->delete_vertex( $arg );
22222af9 312 $self->relations->delete_reading( $arg );
3a2ebbf4 313
314 # Carry on.
315 $self->$orig( $arg );
316};
7854e12e 317
4e483aa5 318=begin testing
319
320use Text::Tradition;
321
322my $cxfile = 't/data/Collatex-16.xml';
323my $t = Text::Tradition->new(
324 'name' => 'inline',
325 'input' => 'CollateX',
326 'file' => $cxfile,
327 );
328my $c = $t->collation;
329
330my $rno = scalar $c->readings;
331# Split n21 for testing purposes
332my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
333my $old_r = $c->reading( 'n21' );
334$old_r->alter_text( 'to' );
335$c->del_path( 'n20', 'n21', 'A' );
336$c->add_path( 'n20', 'n21p0', 'A' );
337$c->add_path( 'n21p0', 'n21', 'A' );
338$c->flatten_ranks();
339ok( $c->reading( 'n21p0' ), "New reading exists" );
340is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
341
679f17e1 342# Combine n3 and n4 ( with his )
4e483aa5 343$c->merge_readings( 'n3', 'n4', 1 );
344ok( !$c->reading('n4'), "Reading n4 is gone" );
345is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
346
679f17e1 347# Collapse n9 and n10 ( rood / root )
348$c->merge_readings( 'n9', 'n10' );
349ok( !$c->reading('n10'), "Reading n10 is gone" );
350is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 351
352# Combine n21 and n21p0
353my $remaining = $c->reading('n21');
354$remaining ||= $c->reading('n22'); # one of these should still exist
355$c->merge_readings( 'n21p0', $remaining, 1 );
356ok( !$c->reading('n21'), "Reading $remaining is gone" );
357is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
358
359=end testing
360
361=cut
7854e12e 362
3a2ebbf4 363sub merge_readings {
364 my $self = shift;
365
366 # We only need the IDs for adding paths to the graph, not the reading
367 # objects themselves.
4e483aa5 368 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 369 $self->_graphcalc_done(0);
3a2ebbf4 370
371 # The kept reading should inherit the paths and the relationships
372 # of the deleted reading.
373 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
374 my @vector = ( $kept );
375 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
376 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 377 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 378 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
379 $self->sequence->add_edge( @vector );
380 my $fwits = $self->sequence->get_edge_attributes( @vector );
381 @wits{keys %$fwits} = values %$fwits;
382 $self->sequence->set_edge_attributes( @vector, \%wits );
383 }
22222af9 384 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 385
386 # Do the deletion deed.
4e483aa5 387 if( $combine ) {
49d4f2ac 388 my $kept_obj = $self->reading( $kept );
4e483aa5 389 my $del_obj = $self->reading( $deleted );
390 my $joinstr = $combine_char;
391 unless( defined $joinstr ) {
392 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
393 $joinstr = $self->wordsep unless defined $joinstr;
394 }
395 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 396 }
3a2ebbf4 397 $self->del_reading( $deleted );
398}
7854e12e 399
3265b0ce 400
3a2ebbf4 401# Helper function for manipulating the graph.
402sub _stringify_args {
4e483aa5 403 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 404 $first = $first->id
405 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
406 $second = $second->id
407 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 408 return( $first, $second, @args );
3a2ebbf4 409}
df6d9812 410
4e5a7b2c 411# Helper function for manipulating the graph.
412sub _objectify_args {
413 my( $self, $first, $second, $arg ) = @_;
414 $first = $self->reading( $first )
415 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
416 $second = $self->reading( $second )
417 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
418 return( $first, $second, $arg );
419}
3a2ebbf4 420### Path logic
421
422sub add_path {
423 my $self = shift;
424
425 # We only need the IDs for adding paths to the graph, not the reading
426 # objects themselves.
427 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
428
c1915ab9 429 $self->_graphcalc_done(0);
3a2ebbf4 430 # Connect the readings
431 $self->sequence->add_edge( $source, $target );
432 # Note the witness in question
433 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 434};
435
3a2ebbf4 436sub del_path {
437 my $self = shift;
49d4f2ac 438 my @args;
439 if( ref( $_[0] ) eq 'ARRAY' ) {
440 my $e = shift @_;
441 @args = ( @$e, @_ );
442 } else {
443 @args = @_;
444 }
3a2ebbf4 445
446 # We only need the IDs for adding paths to the graph, not the reading
447 # objects themselves.
49d4f2ac 448 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 449
c1915ab9 450 $self->_graphcalc_done(0);
3a2ebbf4 451 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 452 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 453 }
454 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
455 $self->sequence->delete_edge( $source, $target );
456 }
784877d9 457}
458
3a2ebbf4 459
15d2d3df 460# Extra graph-alike utility
461sub has_path {
3a2ebbf4 462 my $self = shift;
463 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
464 return undef unless $self->sequence->has_edge( $source, $target );
465 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 466}
467
4e5a7b2c 468=head2 clear_witness( @sigil_list )
3a2ebbf4 469
4e5a7b2c 470Clear the given witnesses out of the collation entirely, removing references
471to them in paths, and removing readings that belong only to them. Should only
472be called via $tradition->del_witness.
3a2ebbf4 473
474=cut
475
4e5a7b2c 476sub clear_witness {
477 my( $self, @sigils ) = @_;
478
c1915ab9 479 $self->_graphcalc_done(0);
4e5a7b2c 480 # Clear the witness(es) out of the paths
481 foreach my $e ( $self->paths ) {
482 foreach my $sig ( @sigils ) {
483 $self->del_path( $e, $sig );
484 }
485 }
486
487 # Clear out the newly unused readings
488 foreach my $r ( $self->readings ) {
489 unless( $self->reading_witnesses( $r ) ) {
490 $self->del_reading( $r );
491 }
492 }
493}
3a2ebbf4 494
495sub add_relationship {
496 my $self = shift;
22222af9 497 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 498 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 499 $self->_graphcalc_done(0);
63778331 500 return @vectors;
22222af9 501}
ef9d481f 502
ca6e6095 503around qw/ get_relationship del_relationship / => sub {
504 my $orig = shift;
505 my $self = shift;
506 my @args = @_;
507 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
508 @args = @{$_[0]};
509 }
510 my( $source, $target ) = $self->_stringify_args( @args );
511 $self->$orig( $source, $target );
512};
513
22222af9 514=head2 reading_witnesses( $reading )
910a0a6d 515
22222af9 516Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 517
22222af9 518=cut
1d310495 519
1d310495 520sub reading_witnesses {
521 my( $self, $reading ) = @_;
522 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 523 # arbitrarily chosen "incoming". Thus, special-case the start node.
524 if( $reading eq $self->start ) {
525 return map { $_->sigil } $self->tradition->witnesses;
526 }
1d310495 527 my %all_witnesses;
528 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
529 my $wits = $self->sequence->get_edge_attributes( @$e );
530 @all_witnesses{ keys %$wits } = 1;
531 }
c12bb878 532 my $acstr = $self->ac_label;
533 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
534 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
535 }
1d310495 536 return keys %all_witnesses;
910a0a6d 537}
538
4e5a7b2c 539=head1 OUTPUT METHODS
8e1394aa 540
0ecb975c 541=head2 as_svg( \%options )
8e1394aa 542
0068967c 543Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 544See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 545
546=cut
547
548sub as_svg {
0ecb975c 549 my( $self, $opts ) = @_;
bfcbcecb 550 throw( "Need GraphViz installed to output SVG" )
551 unless File::Which::which( 'dot' );
e247aad1 552 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 553 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 554 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 555 my @cmd = qw/dot -Tsvg/;
556 my( $svg, $err );
557 my $dotfile = File::Temp->new();
558 ## USE FOR DEBUGGING
559 # $dotfile->unlink_on_destroy(0);
560 binmode $dotfile, ':utf8';
561 print $dotfile $self->as_dot( $opts );
562 push( @cmd, $dotfile->filename );
563 run( \@cmd, ">", binary(), \$svg );
e247aad1 564 $svg = decode_utf8( $svg );
565 $self->cached_svg( $svg ) unless $want_subgraph;
566 return $svg;
567 } else {
568 return $self->cached_svg;
b365fbae 569 }
8e1394aa 570}
571
b22576c6 572
0ecb975c 573=head2 as_dot( \%options )
b22576c6 574
0ecb975c 575Returns a string that is the collation graph expressed in dot
576(i.e. GraphViz) format. Options include:
b22576c6 577
0ecb975c 578=over 4
b22576c6 579
0ecb975c 580=item * from
b22576c6 581
0ecb975c 582=item * to
df6d9812 583
0ecb975c 584=item * color_common
585
586=back
df6d9812 587
588=cut
589
590sub as_dot {
0ecb975c 591 my( $self, $opts ) = @_;
592 my $startrank = $opts->{'from'} if $opts;
593 my $endrank = $opts->{'to'} if $opts;
594 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 595 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
596 && $self->end->rank > 100;
6648ee3d 597 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 598
b22576c6 599 # Check the arguments
600 if( $startrank ) {
601 return if $endrank && $startrank > $endrank;
602 return if $startrank > $self->end->rank;
603 }
604 if( defined $endrank ) {
605 return if $endrank < 0;
f1b3b33a 606 $endrank = undef if $endrank == $self->end->rank;
b22576c6 607 }
608
67da8d6c 609 my $graph_name = $self->tradition->name;
610 $graph_name =~ s/[^\w\s]//g;
611 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 612
613 my %graph_attrs = (
614 'rankdir' => 'LR',
615 'bgcolor' => 'none',
616 );
617 my %node_attrs = (
b8990398 618 'fontsize' => 14,
f13b5582 619 'fillcolor' => 'white',
620 'style' => 'filled',
621 'shape' => 'ellipse'
622 );
623 my %edge_attrs = (
624 'arrowhead' => 'open',
625 'color' => '#000000',
626 'fontcolor' => '#000000',
627 );
628
67da8d6c 629 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 630 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
631 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 632
b22576c6 633 # Output substitute start/end readings if necessary
634 if( $startrank ) {
635 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
636 }
637 if( $endrank ) {
638 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
639 }
b365fbae 640 if( $STRAIGHTENHACK ) {
641 ## HACK part 1
6648ee3d 642 my $startlabel = $startrank ? 'SUBSTART' : 'START';
643 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 644 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
645 }
b22576c6 646 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 647 # Sort the readings by rank if we have ranks; this speeds layout.
648 my @all_readings = $self->end->has_rank
649 ? sort { $a->rank <=> $b->rank } $self->readings
650 : $self->readings;
4633f9e4 651 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 652 foreach my $reading ( @all_readings ) {
b22576c6 653 # Only output readings within our rank range.
654 next if $startrank && $reading->rank < $startrank;
655 next if $endrank && $reading->rank > $endrank;
656 $used{$reading->id} = 1;
910a0a6d 657 # Need not output nodes without separate labels
3a2ebbf4 658 next if $reading->id eq $reading->text;
d4b75f44 659 my $rattrs;
30f0df34 660 my $label = $reading->text;
629e27b0 661 $label .= '-' if $reading->join_next;
662 $label = "-$label" if $reading->join_prior;
8f9cab7b 663 $label =~ s/\"/\\\"/g;
d4b75f44 664 $rattrs->{'label'} = $label;
0ecb975c 665 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 666 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 667 }
3a2ebbf4 668
30ddc24c 669 # Add the real edges. Need to weight one edge per rank jump, in a
670 # continuous line.
b365fbae 671 # my $weighted = $self->_add_edge_weights;
b22576c6 672 my @edges = $self->paths;
3bdec618 673 my( %substart, %subend );
b22576c6 674 foreach my $edge ( @edges ) {
675 # Do we need to output this edge?
508fd430 676 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 677 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 678 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 679
b22576c6 680 # Account for the rank gap if necessary
30ddc24c 681 my $rank0 = $self->reading( $edge->[0] )->rank
682 if $self->reading( $edge->[0] )->has_rank;
683 my $rank1 = $self->reading( $edge->[1] )->rank
684 if $self->reading( $edge->[1] )->has_rank;
685 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
686 $variables->{'minlen'} = $rank1 - $rank0;
687 }
688
689 # Add the calculated edge weights
b365fbae 690 # if( exists $weighted->{$edge->[0]}
e247aad1 691 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
692 # # $variables->{'color'} = 'red';
693 # $variables->{'weight'} = 3.0;
694 # }
30ddc24c 695
508fd430 696 # EXPERIMENTAL: make edge width reflect no. of witnesses
697 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
698 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
699
f13b5582 700 my $varopts = _dot_attr_string( $variables );
701 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
702 $edge->[0], $edge->[1], $varopts );
3bdec618 703 } elsif( $used{$edge->[0]} ) {
704 $subend{$edge->[0]} = 1;
705 } elsif( $used{$edge->[1]} ) {
706 $substart{$edge->[1]} = 1;
b22576c6 707 }
df6d9812 708 }
3bdec618 709 # Add substitute start and end edges if necessary
710 foreach my $node ( keys %substart ) {
027d819c 711 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 712 my $variables = { %edge_attrs, 'label' => $witstr };
713 my $varopts = _dot_attr_string( $variables );
714 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 715 }
716 foreach my $node ( keys %subend ) {
027d819c 717 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 718 my $variables = { %edge_attrs, 'label' => $witstr };
719 my $varopts = _dot_attr_string( $variables );
720 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 721 }
b365fbae 722 # HACK part 2
723 if( $STRAIGHTENHACK ) {
6648ee3d 724 my $endlabel = $endrank ? 'SUBEND' : 'END';
725 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 726 }
30ddc24c 727
df6d9812 728 $dot .= "}\n";
729 return $dot;
730}
731
f13b5582 732sub _dot_attr_string {
733 my( $hash ) = @_;
734 my @attrs;
735 foreach my $k ( sort keys %$hash ) {
736 my $v = $hash->{$k};
737 push( @attrs, $k.'="'.$v.'"' );
738 }
739 return( '[ ' . join( ', ', @attrs ) . ' ]' );
740}
741
30ddc24c 742sub _add_edge_weights {
743 my $self = shift;
744 # Walk the graph from START to END, choosing the successor node with
745 # the largest number of witness paths each time.
746 my $weighted = {};
747 my $curr = $self->start->id;
008fc8a6 748 my $ranked = $self->end->has_rank;
30ddc24c 749 while( $curr ne $self->end->id ) {
008fc8a6 750 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 751 my @succ = sort { $self->path_witnesses( $curr, $a )
752 <=> $self->path_witnesses( $curr, $b ) }
753 $self->sequence->successors( $curr );
754 my $next = pop @succ;
008fc8a6 755 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 756 # Try to avoid lacunae in the weighted path.
008fc8a6 757 while( @succ &&
758 ( $self->reading( $next )->is_lacuna ||
759 $nextrank - $rank > 1 ) ){
30ddc24c 760 $next = pop @succ;
761 }
762 $weighted->{$curr} = $next;
763 $curr = $next;
764 }
765 return $weighted;
766}
767
027d819c 768=head2 path_witnesses( $edge )
769
770Returns the list of sigils whose witnesses are associated with the given edge.
771The edge can be passed as either an array or an arrayref of ( $source, $target ).
772
773=cut
774
3a2ebbf4 775sub path_witnesses {
776 my( $self, @edge ) = @_;
777 # If edge is an arrayref, cope.
778 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
779 my $e = shift @edge;
780 @edge = @$e;
781 }
782 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 783 return @wits;
3a2ebbf4 784}
785
7f9f05e8 786# Helper function. Make a display label for the given witnesses, showing a.c.
787# witnesses only where the main witness is not also in the list.
027d819c 788sub _path_display_label {
508fd430 789 my $self = shift;
7f9f05e8 790 my %wits;
791 map { $wits{$_} = 1 } @_;
792
793 # If an a.c. wit is listed, remove it if the main wit is also listed.
794 # Otherwise keep it for explicit listing.
795 my $aclabel = $self->ac_label;
796 my @disp_ac;
797 foreach my $w ( sort keys %wits ) {
798 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
799 if( exists $wits{$1} ) {
800 delete $wits{$w};
801 } else {
802 push( @disp_ac, $w );
803 }
804 }
805 }
806
807 # See if we are in a majority situation.
8f9cab7b 808 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 809 if( scalar keys %wits > $maj ) {
810 unshift( @disp_ac, 'majority' );
811 return join( ', ', @disp_ac );
8f9cab7b 812 } else {
7f9f05e8 813 return join( ', ', sort keys %wits );
8f9cab7b 814 }
815}
1dd07bda 816
bf6e338d 817=head2 readings_at_rank( $rank )
1dd07bda 818
bf6e338d 819Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 820
821=cut
822
bf6e338d 823sub readings_at_rank {
1dd07bda 824 my( $self, $rank ) = @_;
bf6e338d 825 my $table = $self->alignment_table;
826 # Table rank is real rank - 1.
827 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
828 my %readings;
829 foreach my $e ( @elements ) {
830 next unless ref( $e ) eq 'HASH';
831 next unless exists $e->{'t'};
832 $readings{$e->{'t'}->id} = $e->{'t'};
833 }
834 return values %readings;
1dd07bda 835}
8f9cab7b 836
4e5a7b2c 837=head2 as_graphml
8e1394aa 838
4e5a7b2c 839Returns a GraphML representation of the collation. The GraphML will contain
840two graphs. The first expresses the attributes of the readings and the witness
841paths that link them; the second expresses the relationships that link the
842readings. This is the native transfer format for a tradition.
8e1394aa 843
56eefa04 844=begin testing
845
846use Text::Tradition;
847
848my $READINGS = 311;
849my $PATHS = 361;
850
851my $datafile = 't/data/florilegium_tei_ps.xml';
852my $tradition = Text::Tradition->new( 'input' => 'TEI',
853 'name' => 'test0',
854 'file' => $datafile,
855 'linear' => 1 );
856
857ok( $tradition, "Got a tradition object" );
858is( scalar $tradition->witnesses, 13, "Found all witnesses" );
859ok( $tradition->collation, "Tradition has a collation" );
860
861my $c = $tradition->collation;
862is( scalar $c->readings, $READINGS, "Collation has all readings" );
863is( scalar $c->paths, $PATHS, "Collation has all paths" );
864is( scalar $c->relationships, 0, "Collation has all relationships" );
865
866# Add a few relationships
867$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
868$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
869$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
870
871# Now write it to GraphML and parse it again.
872
873my $graphml = $c->as_graphml;
874my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
875is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
876is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
877is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
878
879=end testing
880
8e1394aa 881=cut
882
883sub as_graphml {
a30ca502 884 my( $self, $options ) = @_;
3d14b48e 885 $self->calculate_ranks unless $self->_graphcalc_done;
886
a30ca502 887 my $start = $options->{'from'}
888 ? $self->reading( $options->{'from'} ) : $self->start;
889 my $end = $options->{'to'}
890 ? $self->reading( $options->{'to'} ) : $self->end;
891 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
892 throw( 'Start node must be before end node' );
893 }
894 # The readings need to be ranked for this to work.
895 $start = $self->start unless $start->has_rank;
896 $end = $self->end unless $end->has_rank;
414cc046 897 my $rankoffset = 0;
898 unless( $start eq $self->start ) {
899 $rankoffset = $start->rank - 1;
900 }
a30ca502 901 my %use_readings;
902
8e1394aa 903 # Some namespaces
904 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
905 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
906 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 907 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 908
909 # Create the document and root node
910 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
911 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
912 $graphml->setDocumentElement( $root );
913 $root->setNamespace( $xsi_ns, 'xsi', 0 );
914 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 915
916 # List of attribute types to save on our objects and their corresponding
917 # GraphML types
918 my %save_types = (
919 'Str' => 'string',
920 'Int' => 'int',
921 'Bool' => 'boolean',
922 'RelationshipType' => 'string',
923 'RelationshipScope' => 'string',
924 );
925
926 # List of attribute names *not* to save on our objects.
927 # We will also not save any attribute beginning with _.
928 my %skipsave;
929 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 930
bbd064a9 931 # Add the data keys for the graph. Include an extra key 'version' for the
932 # GraphML output version.
e309421a 933 my %graph_data_keys;
934 my $gdi = 0;
bbd064a9 935 my %graph_attributes = ( 'version' => 'string' );
936 # Graph attributes include those of Tradition and those of Collation.
937 my %gattr_from;
938 my $tmeta = $self->tradition->meta;
939 my $cmeta = $self->meta;
940 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
941 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
942 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
943 next if $attr->name =~ /^_/;
944 next if $skipsave{$attr->name};
945 next unless $save_types{$attr->type_constraint->name};
946 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
947 }
948
949 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 950 $graph_data_keys{$datum} = 'dg'.$gdi++;
951 my $key = $root->addNewChild( $graphml_ns, 'key' );
952 $key->setAttribute( 'attr.name', $datum );
bbd064a9 953 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 954 $key->setAttribute( 'for', 'graph' );
955 $key->setAttribute( 'id', $graph_data_keys{$datum} );
956 }
f6066bac 957
bbd064a9 958 # Add the data keys for reading nodes
959 my %reading_attributes;
960 my $rmeta = Text::Tradition::Collation::Reading->meta;
961 foreach my $attr( $rmeta->get_all_attributes ) {
962 next if $attr->name =~ /^_/;
963 next if $skipsave{$attr->name};
964 next unless $save_types{$attr->type_constraint->name};
965 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
966 }
ef9d481f 967 my %node_data_keys;
968 my $ndi = 0;
bbd064a9 969 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 970 $node_data_keys{$datum} = 'dn'.$ndi++;
971 my $key = $root->addNewChild( $graphml_ns, 'key' );
972 $key->setAttribute( 'attr.name', $datum );
bbd064a9 973 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 974 $key->setAttribute( 'for', 'node' );
975 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 976 }
977
bbd064a9 978 # Add the data keys for edges, that is, paths and relationships. Path
979 # data does not come from a Moose class so is here manually.
ef9d481f 980 my $edi = 0;
981 my %edge_data_keys;
bbd064a9 982 my %edge_attributes = (
3a2ebbf4 983 witness => 'string', # ID/label for a path
3a2ebbf4 984 extra => 'boolean', # Path key
3a2ebbf4 985 );
bbd064a9 986 my @path_attributes = keys %edge_attributes; # track our manual additions
987 my $pmeta = Text::Tradition::Collation::Relationship->meta;
988 foreach my $attr( $pmeta->get_all_attributes ) {
989 next if $attr->name =~ /^_/;
990 next if $skipsave{$attr->name};
991 next unless $save_types{$attr->type_constraint->name};
992 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
993 }
994 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 995 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 996 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 997 $key->setAttribute( 'attr.name', $datum );
bbd064a9 998 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 999 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1000 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1001 }
3a2ebbf4 1002
22222af9 1003 # Add the collation graph itself
2c669bca 1004 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1005 $sgraph->setAttribute( 'edgedefault', 'directed' );
1006 $sgraph->setAttribute( 'id', $self->tradition->name );
1007 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
1008 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
1009 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
1010 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
1011 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1012
e309421a 1013 # Collation attribute data
bbd064a9 1014 foreach my $datum ( keys %graph_attributes ) {
1015 my $value;
1016 if( $datum eq 'version' ) {
1017 $value = '3.1';
1018 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1019 $value = $self->tradition->$datum;
1020 } else {
1021 $value = $self->$datum;
1022 }
2c669bca 1023 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1024 }
8e1394aa 1025
1026 my $node_ctr = 0;
1027 my %node_hash;
22222af9 1028 # Add our readings to the graph
3a2ebbf4 1029 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1030 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1031 ( $n->rank < $start->rank || $n->rank > $end->rank );
1032 $use_readings{$n->id} = 1;
2c669bca 1033 # Add to the main graph
1034 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1035 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1036 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1037 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1038 foreach my $d ( keys %reading_attributes ) {
255875b8 1039 my $nval = $n->$d;
414cc046 1040 if( $rankoffset && $d eq 'rank' ) {
1041 # Adjust the ranks within the subgraph.
1042 $nval = $n eq $self->end ? $end->rank + 1 : $nval - $rankoffset;
1043 }
255875b8 1044 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1045 if defined $nval;
1046 }
b15511bf 1047 }
1048
2c669bca 1049 # Add the path edges to the sequence graph
df6d9812 1050 my $edge_ctr = 0;
3a2ebbf4 1051 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1052 # We add an edge in the graphml for every witness in $e.
a30ca502 1053 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1054 my @edge_wits = sort $self->path_witnesses( $e );
1055 $e->[0] = $self->start unless $use_readings{$e->[0]};
1056 $e->[1] = $self->end unless $use_readings{$e->[1]};
1057 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1058 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1059 $node_hash{ $e->[0] },
1060 $node_hash{ $e->[1] } );
2c669bca 1061 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1062 $edge_el->setAttribute( 'source', $from );
1063 $edge_el->setAttribute( 'target', $to );
1064 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1065
1066 # It's a witness path, so add the witness
1067 my $base = $wit;
1068 my $key = $edge_data_keys{'witness'};
1069 # Is this an ante-corr witness?
1070 my $aclabel = $self->ac_label;
1071 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1072 # Keep the base witness
1073 $base = $1;
1074 # ...and record that this is an 'extra' reading path
1075 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1076 }
1077 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1078 }
1079 }
1080
22222af9 1081 # Add the relationship graph to the XML
bbd064a9 1082 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1083 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1084 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1085
94c00c71 1086 # Save and return the thing
1087 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1088 return $result;
df6d9812 1089}
1090
b15511bf 1091sub _add_graphml_data {
1092 my( $el, $key, $value ) = @_;
b15511bf 1093 return unless defined $value;
c9bf3dbf 1094 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1095 $data_el->setAttribute( 'key', $key );
1096 $data_el->appendText( $value );
8e1394aa 1097}
1098
4e5a7b2c 1099=head2 as_csv
910a0a6d 1100
1101Returns a CSV alignment table representation of the collation graph, one
2c669bca 1102row per witness (or witness uncorrected.)
910a0a6d 1103
1104=cut
1105
1106sub as_csv {
3a2ebbf4 1107 my( $self ) = @_;
1dd07bda 1108 my $table = $self->alignment_table;
82fa4d57 1109 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1110 my @result;
2c669bca 1111 # Make the header row
1112 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1113 push( @result, decode_utf8( $csv->string ) );
1114 # Make the rest of the rows
1115 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1116 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1117 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1118 $csv->combine( @row );
910a0a6d 1119 push( @result, decode_utf8( $csv->string ) );
1120 }
3a2ebbf4 1121 return join( "\n", @result );
910a0a6d 1122}
1123
1dd07bda 1124=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1125
566f4595 1126Return a reference to an alignment table, in a slightly enhanced CollateX
1127format which looks like this:
1128
1129 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1130 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1131 { witness => "SIG2",
4e5a7b2c 1132 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1133 ... ],
1134 length => TEXTLEN };
1135
1136If $use_refs is set to 1, the reading object is returned in the table
1137instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1138
1139If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1140keys have a true hash value will be included.
2c669bca 1141
1142=cut
9f3ba6f7 1143
1dd07bda 1144sub alignment_table {
1145 my( $self ) = @_;
c1915ab9 1146 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1147 return $self->cached_table if $self->has_cached_table;
1148
0ecb975c 1149 # Make sure we can do this
1150 throw( "Need a linear graph in order to make an alignment table" )
1151 unless $self->linear;
1152 $self->calculate_ranks unless $self->end->has_rank;
1153
2c669bca 1154 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1155 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1156 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1157 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1158 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1159 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1160 push( @{$table->{'alignment'}},
1161 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1162 if( $wit->is_layered ) {
1163 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1164 $wit->sigil.$self->ac_label );
1dd07bda 1165 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1166 push( @{$table->{'alignment'}},
1167 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1168 }
1169 }
1dd07bda 1170 $self->cached_table( $table );
1171 return $table;
910a0a6d 1172}
1173
1174sub _make_witness_row {
1dd07bda 1175 my( $path, $positions ) = @_;
910a0a6d 1176 my %char_hash;
1177 map { $char_hash{$_} = undef } @$positions;
2c669bca 1178 my $debug = 0;
910a0a6d 1179 foreach my $rdg ( @$path ) {
eca16057 1180 my $rtext = $rdg->text;
1181 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1182 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1183 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1184 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1185 }
1186 my @row = map { $char_hash{$_} } @$positions;
eca16057 1187 # Fill in lacuna markers for undef spots in the row
1188 my $last_el = shift @row;
1189 my @filled_row = ( $last_el );
1190 foreach my $el ( @row ) {
0e476982 1191 # If we are using node reference, make the lacuna node appear many times
1192 # in the table. If not, use the lacuna tag.
1dd07bda 1193 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1194 $el = $last_el;
eca16057 1195 }
1196 push( @filled_row, $el );
1197 $last_el = $el;
1198 }
1199 return @filled_row;
910a0a6d 1200}
1201
4e5a7b2c 1202=head1 NAVIGATION METHODS
910a0a6d 1203
4e5a7b2c 1204=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1205
1206Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1207with $last, for the witness given in $sigil. If a $backup sigil is
1208specified (e.g. when walking a layered witness), it will be used wherever
1209no $sigil path exists. If there is a base text reading, that will be
1210used wherever no path exists for $sigil or $backup.
e2902068 1211
1212=cut
1213
910a0a6d 1214# TODO Think about returning some lazy-eval iterator.
b0b4421a 1215# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1216
e2902068 1217sub reading_sequence {
861c3e27 1218 my( $self, $start, $end, $witness ) = @_;
e2902068 1219
930ff666 1220 $witness = $self->baselabel unless $witness;
e2902068 1221 my @readings = ( $start );
1222 my %seen;
1223 my $n = $start;
3a2ebbf4 1224 while( $n && $n->id ne $end->id ) {
1225 if( exists( $seen{$n->id} ) ) {
63778331 1226 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1227 }
3a2ebbf4 1228 $seen{$n->id} = 1;
910a0a6d 1229
861c3e27 1230 my $next = $self->next_reading( $n, $witness );
44771cf2 1231 unless( $next ) {
63778331 1232 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1233 }
910a0a6d 1234 push( @readings, $next );
1235 $n = $next;
e2902068 1236 }
1237 # Check that the last reading is our end reading.
1238 my $last = $readings[$#readings];
63778331 1239 throw( "Last reading found from " . $start->text .
1240 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1241 unless $last->id eq $end->id;
e2902068 1242
1243 return @readings;
1244}
1245
4e5a7b2c 1246=head2 next_reading( $reading, $sigil );
8e1394aa 1247
4a8828f0 1248Returns the reading that follows the given reading along the given witness
930ff666 1249path.
8e1394aa 1250
1251=cut
1252
4a8828f0 1253sub next_reading {
e2902068 1254 # Return the successor via the corresponding path.
8e1394aa 1255 my $self = shift;
3a2ebbf4 1256 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1257 return undef unless $answer;
3a2ebbf4 1258 return $self->reading( $answer );
8e1394aa 1259}
1260
4e5a7b2c 1261=head2 prior_reading( $reading, $sigil )
8e1394aa 1262
4a8828f0 1263Returns the reading that precedes the given reading along the given witness
930ff666 1264path.
8e1394aa 1265
1266=cut
1267
4a8828f0 1268sub prior_reading {
e2902068 1269 # Return the predecessor via the corresponding path.
8e1394aa 1270 my $self = shift;
3a2ebbf4 1271 my $answer = $self->_find_linked_reading( 'prior', @_ );
1272 return $self->reading( $answer );
8e1394aa 1273}
1274
4a8828f0 1275sub _find_linked_reading {
861c3e27 1276 my( $self, $direction, $node, $path ) = @_;
1277
1278 # Get a backup if we are dealing with a layered witness
1279 my $alt_path;
1280 my $aclabel = $self->ac_label;
1281 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1282 $alt_path = $1;
1283 }
1284
e2902068 1285 my @linked_paths = $direction eq 'next'
3a2ebbf4 1286 ? $self->sequence->edges_from( $node )
1287 : $self->sequence->edges_to( $node );
e2902068 1288 return undef unless scalar( @linked_paths );
8e1394aa 1289
e2902068 1290 # We have to find the linked path that contains all of the
1291 # witnesses supplied in $path.
1292 my( @path_wits, @alt_path_wits );
4e5a7b2c 1293 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1294 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1295 my $base_le;
1296 my $alt_le;
1297 foreach my $le ( @linked_paths ) {
3a2ebbf4 1298 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1299 $base_le = $le;
910a0a6d 1300 }
508fd430 1301 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1302 if( _is_within( \@path_wits, \@le_wits ) ) {
1303 # This is the right path.
1304 return $direction eq 'next' ? $le->[1] : $le->[0];
1305 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1306 $alt_le = $le;
1307 }
8e1394aa 1308 }
e2902068 1309 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1310 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1311 if $alt_le;
e2902068 1312
1313 # Got this far? Return the base path if it exists.
3a2ebbf4 1314 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1315 if $base_le;
e2902068 1316
1317 # Got this far? We have no appropriate path.
2c669bca 1318 warn "Could not find $direction node from " . $node->id
910a0a6d 1319 . " along path $path";
8e1394aa 1320 return undef;
1321}
1322
4a8828f0 1323# Some set logic.
1324sub _is_within {
1325 my( $set1, $set2 ) = @_;
7854e12e 1326 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1327 foreach my $el ( @$set1 ) {
910a0a6d 1328 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1329 }
1330 return $ret;
1331}
1332
4e5a7b2c 1333# Return the string that joins together a list of witnesses for
1334# display on a single path.
1335sub _witnesses_of_label {
1336 my( $self, $label ) = @_;
1337 my $regex = $self->wit_list_separator;
1338 my @answer = split( /\Q$regex\E/, $label );
1339 return @answer;
b0b4421a 1340}
1341
d4b75f44 1342=head2 common_readings
1343
1344Returns the list of common readings in the graph (i.e. those readings that are
1345shared by all non-lacunose witnesses.)
1346
1347=cut
1348
1349sub common_readings {
1350 my $self = shift;
1351 my @common = grep { $_->is_common } $self->readings;
1352 return @common;
1353}
1354
fae52efd 1355=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1356
1357Returns the text of a witness (plus its backup, if we are using a layer)
1358as stored in the collation. The text is returned as a string, where the
1359individual readings are joined with spaces and the meta-readings (e.g.
1360lacunae) are omitted. Optional specification of $start and $end allows
1361the generation of a subset of the witness text.
4e5a7b2c 1362
b0b4421a 1363=cut
1364
1365sub path_text {
861c3e27 1366 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1367 $start = $self->start unless $start;
1368 $end = $self->end unless $end;
861c3e27 1369 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1370 my $pathtext = '';
1371 my $last;
1372 foreach my $r ( @path ) {
1373 if( $r->join_prior || !$last || $last->join_next ) {
1374 $pathtext .= $r->text;
1375 } else {
1376 $pathtext .= ' ' . $r->text;
1377 }
1378 $last = $r;
1379 }
1380 return $pathtext;
b0b4421a 1381}
4e5a7b2c 1382
1383=head1 INITIALIZATION METHODS
1384
1385These are mostly for use by parsers.
1386
1387=head2 make_witness_path( $witness )
1388
1389Link the array of readings contained in $witness->path (and in
1390$witness->uncorrected_path if it exists) into collation paths.
1391Clear out the arrays when finished.
de51424a 1392
4e5a7b2c 1393=head2 make_witness_paths
1394
1395Call make_witness_path for all witnesses in the tradition.
1396
1397=cut
930ff666 1398
7e450e44 1399# For use when a collation is constructed from a base text and an apparatus.
1400# We have the sequences of readings and just need to add path edges.
1f7aa795 1401# When we are done, clear out the witness path attributes, as they are no
1402# longer needed.
1403# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1404
6a222840 1405sub make_witness_paths {
1406 my( $self ) = @_;
910a0a6d 1407 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1408 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1409 $self->make_witness_path( $wit );
7854e12e 1410 }
7854e12e 1411}
1412
6a222840 1413sub make_witness_path {
7854e12e 1414 my( $self, $wit ) = @_;
1415 my @chain = @{$wit->path};
15d2d3df 1416 my $sig = $wit->sigil;
fae52efd 1417 # Add start and end if necessary
1418 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1419 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1420 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1421 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1422 }
1f7aa795 1423 if( $wit->is_layered ) {
d9e873d0 1424 @chain = @{$wit->uncorrected_path};
fae52efd 1425 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1426 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1427 foreach my $idx( 0 .. $#chain-1 ) {
1428 my $source = $chain[$idx];
1429 my $target = $chain[$idx+1];
1430 $self->add_path( $source, $target, $sig.$self->ac_label )
1431 unless $self->has_path( $source, $target, $sig );
1432 }
15d2d3df 1433 }
1f7aa795 1434 $wit->clear_path;
1435 $wit->clear_uncorrected_path;
e2902068 1436}
1437
414cc046 1438=head2 equivalence_graph( \%readingmap, $startrank, $endrank, @testvector )
ac4d7ac2 1439
1440Returns an equivalence graph of the collation, in which all readings
1441related via a 'colocated' relationship are transformed into a single
1442vertex. Can be used to determine the validity of a new relationship. The
1443mapping between equivalence vertices and reading IDs will be stored in the
1444hash whose reference is passed as readingmap. For a subset of the graph,
1445pass in a start and/or an ending rank (this only works if L<calculate_ranks>
1446has been called at least once.)
1447
414cc046 1448It is also possible to pass in a test relationship in @testvector, and get
1449the resulting equivalence graph before the relationship has been made.
1450
ac4d7ac2 1451=cut
1452
1453sub equivalence_graph {
414cc046 1454 my( $self, $map, $start, $end, @newvector ) = @_;
ac4d7ac2 1455 $start = undef unless $self->end->has_rank;
1456 $end = undef unless $self->end->has_rank;
ac4d7ac2 1457 my $eqgraph = Graph->new();
1458 my $rel_ctr = 0;
1459 # Add the nodes
1460 foreach my $r ( $self->readings ) {
1461 unless( $r eq $self->start || $r eq $self->end ) {
1462 next if $start && $r->rank < $start;
1463 next if $end && $r->rank > $end;
1464 }
1465 next if exists $map->{$r->id};
414cc046 1466 my @rels = $self->related_readings( $r->id, 'colocated' );
1467 push( @rels, $r->id );
1468 # Make an equivalence vertex
1469 my $rn = 'equivalence_' . $rel_ctr++;
1470 $eqgraph->add_vertex( $rn );
1471 # Note which readings belong to this vertex.
1472 push( @rels, $r->id );
1473 foreach( @rels ) {
1474 $map->{$_} = $rn;
1475 }
ac4d7ac2 1476 }
1477
1478 # Add the edges.
1479 foreach my $p ( $self->paths ) {
1480 my $efrom = exists $map->{$p->[0]} ? $map->{$p->[0]}
1481 : $map->{$self->start->id};
1482 my $eto = exists $map->{$p->[1]} ? $map->{$p->[1]}
1483 : $map->{$self->end->id};
1484 $eqgraph->add_edge( $efrom, $eto );
1485 }
414cc046 1486
1487 # Collapse the vertices in @newvector if applicable.
1488 if( @newvector ) {
1489 my( $eqs, $eqt ) = map { $map->{$_} } @newvector;
1490 $DB::single = 1 unless $eqs && $eqt;
1491 unless( $eqs eq $eqt ) {
1492 # Combine the vertices.
1493 map { $eqgraph->add_edge( $eqs, $_ ) } $eqgraph->successors( $eqt );
1494 map { $eqgraph->add_edge( $_, $eqs ) } $eqgraph->predecessors( $eqt );
1495 $eqgraph->delete_vertex( $eqt );
1496 }
1497 }
ac4d7ac2 1498 return $eqgraph;
1499}
1500
4e5a7b2c 1501=head2 calculate_ranks
1502
1503Calculate the reading ranks (that is, their aligned positions relative
1504to each other) for the graph. This can only be called on linear collations.
1505
b365fbae 1506=begin testing
1507
1508use Text::Tradition;
1509
1510my $cxfile = 't/data/Collatex-16.xml';
1511my $t = Text::Tradition->new(
1512 'name' => 'inline',
1513 'input' => 'CollateX',
1514 'file' => $cxfile,
1515 );
1516my $c = $t->collation;
1517
1518# Make an svg
bfcbcecb 1519my $table = $c->alignment_table;
1520ok( $c->has_cached_table, "Alignment table was cached" );
1521is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1522$c->calculate_ranks;
bfcbcecb 1523is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1524$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1525isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1526
1527=end testing
1528
4e5a7b2c 1529=cut
1530
910a0a6d 1531sub calculate_ranks {
1532 my $self = shift;
b365fbae 1533 # Save the existing ranks, in case we need to invalidate the cached SVG.
1534 my %existing_ranks;
ac4d7ac2 1535 map { $existing_ranks{$_} = $_->rank } $self->readings;
910a0a6d 1536 # Walk a version of the graph where every node linked by a relationship
1537 # edge is fundamentally the same node, and do a topological ranking on
1538 # the nodes in this graph.
910a0a6d 1539 my %rel_containers;
ac4d7ac2 1540 my $topo_graph = $self->equivalence_graph( \%rel_containers );
910a0a6d 1541
1542 # Now do the rankings, starting with the start node.
3a2ebbf4 1543 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1544 my $node_ranks = { $topo_start => 0 };
910a0a6d 1545 my @curr_origin = ( $topo_start );
1546 # A little iterative function.
1547 while( @curr_origin ) {
c9bf3dbf 1548 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1549 }
1550 # Transfer our rankings from the topological graph to the real one.
1551 foreach my $r ( $self->readings ) {
3a2ebbf4 1552 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1553 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1554 } else {
63778331 1555 # Die. Find the last rank we calculated.
7ec89b0e 1556 my @all_defined = sort { ( $node_ranks->{$rel_containers{$a->id}}||-1 )
1557 <=> ( $node_ranks->{$rel_containers{$b->id}}||-1 ) }
63778331 1558 $self->readings;
1559 my $last = pop @all_defined;
1560 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1561 }
de51424a 1562 }
bfcbcecb 1563 # Do we need to invalidate the cached data?
1564 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1565 foreach my $r ( $self->readings ) {
7c293912 1566 next if defined( $existing_ranks{$r} )
1567 && $existing_ranks{$r} == $r->rank;
c1915ab9 1568 # Something has changed, so clear the cache
bfcbcecb 1569 $self->_clear_cache;
c1915ab9 1570 # ...and recalculate the common readings.
1571 $self->calculate_common_readings();
b365fbae 1572 last;
1573 }
1574 }
c1915ab9 1575 # The graph calculation information is now up to date.
1576 $self->_graphcalc_done(1);
8e1394aa 1577}
3a1f2523 1578
910a0a6d 1579sub _assign_rank {
c9bf3dbf 1580 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1581 # Look at each of the children of @current_nodes. If all the child's
1582 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1583 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1584 # parent gets a rank.
910a0a6d 1585 my @next_nodes;
1586 foreach my $c ( @current_nodes ) {
c9bf3dbf 1587 warn "Current reading $c has no rank!"
1588 unless exists $node_ranks->{$c};
1589 # print STDERR "Looking at child of node $c, rank "
1590 # . $node_ranks->{$c} . "\n";
1591 foreach my $child ( $graph->successors( $c ) ) {
1592 next if exists $node_ranks->{$child};
910a0a6d 1593 my $highest_rank = -1;
1594 my $skip = 0;
c9bf3dbf 1595 foreach my $parent ( $graph->predecessors( $child ) ) {
1596 if( exists $node_ranks->{$parent} ) {
1597 $highest_rank = $node_ranks->{$parent}
1598 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1599 } else {
1600 $skip = 1;
1601 last;
1602 }
1603 }
1604 next if $skip;
c9bf3dbf 1605 my $c_rank = $highest_rank + 1;
1606 # print STDERR "Assigning rank $c_rank to node $child \n";
1607 $node_ranks->{$child} = $c_rank;
910a0a6d 1608 push( @next_nodes, $child );
1609 }
1610 }
1611 return @next_nodes;
4cdd82f1 1612}
910a0a6d 1613
c1915ab9 1614sub _clear_cache {
1615 my $self = shift;
1616 $self->wipe_svg if $self->has_cached_svg;
1617 $self->wipe_table if $self->has_cached_table;
1618}
1619
1620
4e5a7b2c 1621=head2 flatten_ranks
1622
1623A convenience method for parsing collation data. Searches the graph for readings
1624with the same text at the same rank, and merges any that are found.
1625
1626=cut
1627
0e476982 1628sub flatten_ranks {
1629 my $self = shift;
1630 my %unique_rank_rdg;
bf6e338d 1631 my $changed;
0e476982 1632 foreach my $rdg ( $self->readings ) {
1633 next unless $rdg->has_rank;
1634 my $key = $rdg->rank . "||" . $rdg->text;
1635 if( exists $unique_rank_rdg{$key} ) {
1636 # Combine!
56eefa04 1637 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1638 $changed = 1;
0e476982 1639 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1640 # TODO see if this now makes a common point.
0e476982 1641 } else {
1642 $unique_rank_rdg{$key} = $rdg;
1643 }
1644 }
bf6e338d 1645 # If we merged readings, the ranks are still fine but the alignment
1646 # table is wrong. Wipe it.
1647 $self->wipe_table() if $changed;
0e476982 1648}
4633f9e4 1649
1650
d4b75f44 1651=head2 calculate_common_readings
1652
1653Goes through the graph identifying the readings that appear in every witness
1654(apart from those with lacunae at that spot.) Marks them as common and returns
1655the list.
1656
1657=begin testing
1658
1659use Text::Tradition;
1660
1661my $cxfile = 't/data/Collatex-16.xml';
1662my $t = Text::Tradition->new(
1663 'name' => 'inline',
1664 'input' => 'CollateX',
1665 'file' => $cxfile,
1666 );
1667my $c = $t->collation;
1668
1669my @common = $c->calculate_common_readings();
1670is( scalar @common, 8, "Found correct number of common readings" );
1671my @marked = sort $c->common_readings();
1672is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1673my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1674is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1675
1676=end testing
1677
1678=cut
1679
1680sub calculate_common_readings {
1681 my $self = shift;
1682 my @common;
c1915ab9 1683 map { $_->is_common( 0 ) } $self->readings;
1684 # Implicitly calls calculate_ranks
1dd07bda 1685 my $table = $self->alignment_table;
d4b75f44 1686 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1687 my @row = map { $_->{'tokens'}->[$idx]
1688 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1689 @{$table->{'alignment'}};
d4b75f44 1690 my %hash;
1691 foreach my $r ( @row ) {
1692 if( $r ) {
1693 $hash{$r->id} = $r unless $r->is_meta;
1694 } else {
1695 $hash{'UNDEF'} = $r;
1696 }
1697 }
1698 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1699 my( $r ) = values %hash;
1700 $r->is_common( 1 );
1701 push( @common, $r );
1702 }
1703 }
1704 return @common;
1705}
1706
861c3e27 1707=head2 text_from_paths
1708
1709Calculate the text array for all witnesses from the path, for later consistency
1710checking. Only to be used if there is no non-graph-based way to know the
1711original texts.
1712
1713=cut
1714
1715sub text_from_paths {
1716 my $self = shift;
1717 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1718 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1719 my @text;
1720 foreach my $r ( @readings ) {
1721 next if $r->is_meta;
1722 push( @text, $r->text );
1723 }
861c3e27 1724 $wit->text( \@text );
1725 if( $wit->is_layered ) {
5164a6f0 1726 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1727 $wit->sigil.$self->ac_label );
1728 my @uctext;
1729 foreach my $r ( @ucrdgs ) {
1730 next if $r->is_meta;
1731 push( @uctext, $r->text );
1732 }
1733 $wit->layertext( \@uctext );
861c3e27 1734 }
1735 }
1736}
0e476982 1737
4e5a7b2c 1738=head1 UTILITY FUNCTIONS
1739
1740=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1741
4e5a7b2c 1742Find the last reading that occurs in sequence before both the given readings.
414cc046 1743At the very least this should be $self->start.
4e5a7b2c 1744
1745=head2 common_successor( $reading_a, $reading_b )
1746
1747Find the first reading that occurs in sequence after both the given readings.
414cc046 1748At the very least this should be $self->end.
4e5a7b2c 1749
22222af9 1750=begin testing
1751
1752use Text::Tradition;
1753
1754my $cxfile = 't/data/Collatex-16.xml';
1755my $t = Text::Tradition->new(
1756 'name' => 'inline',
1757 'input' => 'CollateX',
1758 'file' => $cxfile,
1759 );
1760my $c = $t->collation;
1761
679f17e1 1762is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1763 'n20', "Found correct common predecessor" );
679f17e1 1764is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1765 '#END#', "Found correct common successor" );
1766
4e5a7b2c 1767is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1768 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1769is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1770 '#END#', "Found correct common successor for readings on same path" );
1771
1772=end testing
1773
1774=cut
1775
1776## Return the closest reading that is a predecessor of both the given readings.
1777sub common_predecessor {
1778 my $self = shift;
4e5a7b2c 1779 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1780 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1781}
1782
1783sub common_successor {
1784 my $self = shift;
4e5a7b2c 1785 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1786 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1787}
1788
414cc046 1789
1790# TODO think about how to do this without ranks...
027d819c 1791sub _common_in_path {
22222af9 1792 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1793 my $iter = $self->end->rank;
22222af9 1794 my @candidates;
414cc046 1795 my @last_r1 = ( $r1 );
1796 my @last_r2 = ( $r2 );
1797 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1798 my %all_seen;
414cc046 1799 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1800 while( !@candidates ) {
414cc046 1801 last unless $iter--; # Avoid looping infinitely
1802 # Iterate separately down the graph from r1 and r2
1803 my( @new_lc1, @new_lc2 );
1804 foreach my $lc ( @last_r1 ) {
1805 foreach my $p ( $lc->$dir ) {
1806 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1807 # print STDERR "Path candidate $p from $lc\n";
1808 push( @candidates, $p );
002e3600 1809 } elsif( !$all_seen{$p->id} ) {
414cc046 1810 $all_seen{$p->id} = 'r1';
1811 push( @new_lc1, $p );
1812 }
1813 }
1814 }
1815 foreach my $lc ( @last_r2 ) {
22222af9 1816 foreach my $p ( $lc->$dir ) {
414cc046 1817 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1818 # print STDERR "Path candidate $p from $lc\n";
22222af9 1819 push( @candidates, $p );
002e3600 1820 } elsif( !$all_seen{$p->id} ) {
414cc046 1821 $all_seen{$p->id} = 'r2';
1822 push( @new_lc2, $p );
22222af9 1823 }
1824 }
1825 }
414cc046 1826 @last_r1 = @new_lc1;
1827 @last_r2 = @new_lc2;
22222af9 1828 }
1829 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1830 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1831}
1832
63778331 1833sub throw {
1834 Text::Tradition::Error->throw(
1835 'ident' => 'Collation error',
1836 'message' => $_[0],
1837 );
1838}
1839
dd3b58b0 1840no Moose;
1841__PACKAGE__->meta->make_immutable;
e867486f 1842
027d819c 1843=head1 LICENSE
e867486f 1844
027d819c 1845This package is free software and is provided "as is" without express
1846or implied warranty. You can redistribute it and/or modify it under
1847the same terms as Perl itself.
e867486f 1848
027d819c 1849=head1 AUTHOR
e867486f 1850
027d819c 1851Tara L Andrews E<lt>aurum@cpan.orgE<gt>