fix graphml subgraph generation; fix tab parsing to only set 'collated' where needed...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
22222af9 34 },
35 writer => '_set_relations',
3a2ebbf4 36 );
dd3b58b0 37
3a2ebbf4 38has 'tradition' => (
39 is => 'ro',
d047cd52 40 isa => 'Text::Tradition',
8cfd99c4 41 writer => '_set_tradition',
8d9a1cd8 42 weak_ref => 1,
d047cd52 43 );
dd3b58b0 44
3a2ebbf4 45has 'readings' => (
46 isa => 'HashRef[Text::Tradition::Collation::Reading]',
47 traits => ['Hash'],
48 handles => {
49 reading => 'get',
50 _add_reading => 'set',
51 del_reading => 'delete',
52 has_reading => 'exists',
53 readings => 'values',
54 },
55 default => sub { {} },
56 );
910a0a6d 57
4a8828f0 58has 'wit_list_separator' => (
7854e12e 59 is => 'rw',
60 isa => 'Str',
61 default => ', ',
62 );
63
64has 'baselabel' => (
65 is => 'rw',
66 isa => 'Str',
67 default => 'base text',
68 );
4a8828f0 69
15d2d3df 70has 'linear' => (
71 is => 'rw',
72 isa => 'Bool',
73 default => 1,
74 );
c84275ff 75
ef9d481f 76has 'ac_label' => (
77 is => 'rw',
78 isa => 'Str',
79 default => ' (a.c.)',
80 );
3a2ebbf4 81
4e483aa5 82has 'wordsep' => (
83 is => 'rw',
84 isa => 'Str',
85 default => ' ',
86 );
87
3a2ebbf4 88has 'start' => (
89 is => 'ro',
90 isa => 'Text::Tradition::Collation::Reading',
91 writer => '_set_start',
92 weak_ref => 1,
93 );
94
95has 'end' => (
96 is => 'ro',
97 isa => 'Text::Tradition::Collation::Reading',
98 writer => '_set_end',
99 weak_ref => 1,
100 );
b365fbae 101
102has 'cached_svg' => (
103 is => 'rw',
104 isa => 'Str',
105 predicate => 'has_cached_svg',
106 clearer => 'wipe_svg',
107 );
1dd07bda 108
109has 'cached_table' => (
110 is => 'rw',
111 isa => 'HashRef',
112 predicate => 'has_cached_table',
113 clearer => 'wipe_table',
114 );
c1915ab9 115
116has '_graphcalc_done' => (
117 is => 'rw',
118 isa => 'Bool',
119 default => undef,
120 );
1f563ac3 121
4e5a7b2c 122=head1 NAME
123
124Text::Tradition::Collation - a software model for a text collation
125
126=head1 SYNOPSIS
127
128 use Text::Tradition;
129 my $t = Text::Tradition->new(
130 'name' => 'this is a text',
131 'input' => 'TEI',
132 'file' => '/path/to/tei_parallel_seg_file.xml' );
133
134 my $c = $t->collation;
135 my @readings = $c->readings;
136 my @paths = $c->paths;
137 my @relationships = $c->relationships;
138
139 my $svg_variant_graph = $t->collation->as_svg();
140
141=head1 DESCRIPTION
142
143Text::Tradition is a library for representation and analysis of collated
144texts, particularly medieval ones. The Collation is the central feature of
145a Tradition, where the text, its sequence of readings, and its relationships
146between readings are actually kept.
147
148=head1 CONSTRUCTOR
149
150=head2 new
151
152The constructor. Takes a hash or hashref of the following arguments:
153
154=over
155
156=item * tradition - The Text::Tradition object to which the collation
157belongs. Required.
158
159=item * linear - Whether the collation should be linear; that is, whether
160transposed readings should be treated as two linked readings rather than one,
161and therefore whether the collation graph is acyclic. Defaults to true.
162
4e5a7b2c 163=item * baselabel - The default label for the path taken by a base text
164(if any). Defaults to 'base text'.
165
166=item * wit_list_separator - The string to join a list of witnesses for
167purposes of making labels in display graphs. Defaults to ', '.
168
169=item * ac_label - The extra label to tack onto a witness sigil when
170representing another layer of path for the given witness - that is, when
171a text has more than one possible reading due to scribal corrections or
172the like. Defaults to ' (a.c.)'.
173
4e483aa5 174=item * wordsep - The string used to separate words in the original text.
175Defaults to ' '.
176
4e5a7b2c 177=back
178
179=head1 ACCESSORS
180
181=head2 tradition
182
183=head2 linear
184
4e5a7b2c 185=head2 wit_list_separator
186
187=head2 baselabel
188
189=head2 ac_label
190
4e483aa5 191=head2 wordsep
192
4e5a7b2c 193Simple accessors for collation attributes.
194
195=head2 start
196
197The meta-reading at the start of every witness path.
198
199=head2 end
200
201The meta-reading at the end of every witness path.
202
203=head2 readings
204
205Returns all Reading objects in the graph.
206
207=head2 reading( $id )
208
209Returns the Reading object corresponding to the given ID.
210
211=head2 add_reading( $reading_args )
212
213Adds a new reading object to the collation.
214See L<Text::Tradition::Collation::Reading> for the available arguments.
215
216=head2 del_reading( $object_or_id )
217
218Removes the given reading from the collation, implicitly removing its
219paths and relationships.
220
4e483aa5 221=head2 merge_readings( $main, $second, $concatenate, $with_str )
222
223Merges the $second reading into the $main one. If $concatenate is true, then
224the merged node will carry the text of both readings, concatenated with either
225$with_str (if specified) or a sensible default (the empty string if the
226appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 227
4e483aa5 228The first two arguments may be either readings or reading IDs.
4e5a7b2c 229
230=head2 has_reading( $id )
231
232Predicate to see whether a given reading ID is in the graph.
233
234=head2 reading_witnesses( $object_or_id )
235
236Returns a list of sigils whose witnesses contain the reading.
237
238=head2 paths
239
240Returns all reading paths within the document - that is, all edges in the
241collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
242
243=head2 add_path( $source, $target, $sigil )
244
245Links the given readings in the collation in sequence, under the given witness
246sigil. The readings may be specified by object or ID.
247
248=head2 del_path( $source, $target, $sigil )
249
250Links the given readings in the collation in sequence, under the given witness
251sigil. The readings may be specified by object or ID.
252
253=head2 has_path( $source, $target );
254
255Returns true if the two readings are linked in sequence in any witness.
256The readings may be specified by object or ID.
257
258=head2 relationships
259
260Returns all Relationship objects in the collation.
261
262=head2 add_relationship( $reading, $other_reading, $options )
263
264Adds a new relationship of the type given in $options between the two readings,
265which may be specified by object or ID. Returns a value of ( $status, @vectors)
266where $status is true on success, and @vectors is a list of relationship edges
267that were ultimately added.
268See L<Text::Tradition::Collation::Relationship> for the available options.
269
270=cut
dd3b58b0 271
d047cd52 272sub BUILD {
3a2ebbf4 273 my $self = shift;
22222af9 274 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 275 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
276 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 277}
784877d9 278
3a2ebbf4 279### Reading construct/destruct functions
280
281sub add_reading {
282 my( $self, $reading ) = @_;
283 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
284 my %args = %$reading;
285 $reading = Text::Tradition::Collation::Reading->new(
286 'collation' => $self,
287 %args );
288 }
289 # First check to see if a reading with this ID exists.
290 if( $self->reading( $reading->id ) ) {
63778331 291 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 292 }
c1915ab9 293 $self->_graphcalc_done(0);
3a2ebbf4 294 $self->_add_reading( $reading->id => $reading );
295 # Once the reading has been added, put it in both graphs.
296 $self->sequence->add_vertex( $reading->id );
22222af9 297 $self->relations->add_reading( $reading->id );
3a2ebbf4 298 return $reading;
eca16057 299};
300
3a2ebbf4 301around del_reading => sub {
302 my $orig = shift;
303 my $self = shift;
304 my $arg = shift;
305
306 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
307 $arg = $arg->id;
308 }
3a2ebbf4 309 # Remove the reading from the graphs.
c1915ab9 310 $self->_graphcalc_done(0);
4e483aa5 311 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 312 $self->sequence->delete_vertex( $arg );
22222af9 313 $self->relations->delete_reading( $arg );
3a2ebbf4 314
315 # Carry on.
316 $self->$orig( $arg );
317};
7854e12e 318
4e483aa5 319=begin testing
320
321use Text::Tradition;
322
323my $cxfile = 't/data/Collatex-16.xml';
324my $t = Text::Tradition->new(
325 'name' => 'inline',
326 'input' => 'CollateX',
327 'file' => $cxfile,
328 );
329my $c = $t->collation;
330
331my $rno = scalar $c->readings;
332# Split n21 for testing purposes
333my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
334my $old_r = $c->reading( 'n21' );
335$old_r->alter_text( 'to' );
336$c->del_path( 'n20', 'n21', 'A' );
337$c->add_path( 'n20', 'n21p0', 'A' );
338$c->add_path( 'n21p0', 'n21', 'A' );
339$c->flatten_ranks();
340ok( $c->reading( 'n21p0' ), "New reading exists" );
341is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
342
679f17e1 343# Combine n3 and n4 ( with his )
4e483aa5 344$c->merge_readings( 'n3', 'n4', 1 );
345ok( !$c->reading('n4'), "Reading n4 is gone" );
346is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
347
679f17e1 348# Collapse n9 and n10 ( rood / root )
349$c->merge_readings( 'n9', 'n10' );
350ok( !$c->reading('n10'), "Reading n10 is gone" );
351is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 352
353# Combine n21 and n21p0
354my $remaining = $c->reading('n21');
355$remaining ||= $c->reading('n22'); # one of these should still exist
356$c->merge_readings( 'n21p0', $remaining, 1 );
357ok( !$c->reading('n21'), "Reading $remaining is gone" );
358is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
359
360=end testing
361
362=cut
7854e12e 363
3a2ebbf4 364sub merge_readings {
365 my $self = shift;
366
367 # We only need the IDs for adding paths to the graph, not the reading
368 # objects themselves.
4e483aa5 369 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 370 $self->_graphcalc_done(0);
3a2ebbf4 371
372 # The kept reading should inherit the paths and the relationships
373 # of the deleted reading.
374 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
375 my @vector = ( $kept );
376 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
377 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 378 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 379 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
380 $self->sequence->add_edge( @vector );
381 my $fwits = $self->sequence->get_edge_attributes( @vector );
382 @wits{keys %$fwits} = values %$fwits;
383 $self->sequence->set_edge_attributes( @vector, \%wits );
384 }
22222af9 385 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 386
387 # Do the deletion deed.
4e483aa5 388 if( $combine ) {
49d4f2ac 389 my $kept_obj = $self->reading( $kept );
4e483aa5 390 my $del_obj = $self->reading( $deleted );
391 my $joinstr = $combine_char;
392 unless( defined $joinstr ) {
393 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
394 $joinstr = $self->wordsep unless defined $joinstr;
395 }
396 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 397 }
3a2ebbf4 398 $self->del_reading( $deleted );
399}
7854e12e 400
3265b0ce 401
3a2ebbf4 402# Helper function for manipulating the graph.
403sub _stringify_args {
4e483aa5 404 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 405 $first = $first->id
406 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
407 $second = $second->id
408 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 409 return( $first, $second, @args );
3a2ebbf4 410}
df6d9812 411
4e5a7b2c 412# Helper function for manipulating the graph.
413sub _objectify_args {
414 my( $self, $first, $second, $arg ) = @_;
415 $first = $self->reading( $first )
416 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
417 $second = $self->reading( $second )
418 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
419 return( $first, $second, $arg );
420}
3a2ebbf4 421### Path logic
422
423sub add_path {
424 my $self = shift;
425
426 # We only need the IDs for adding paths to the graph, not the reading
427 # objects themselves.
428 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
429
c1915ab9 430 $self->_graphcalc_done(0);
3a2ebbf4 431 # Connect the readings
432 $self->sequence->add_edge( $source, $target );
433 # Note the witness in question
434 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 435};
436
3a2ebbf4 437sub del_path {
438 my $self = shift;
49d4f2ac 439 my @args;
440 if( ref( $_[0] ) eq 'ARRAY' ) {
441 my $e = shift @_;
442 @args = ( @$e, @_ );
443 } else {
444 @args = @_;
445 }
3a2ebbf4 446
447 # We only need the IDs for adding paths to the graph, not the reading
448 # objects themselves.
49d4f2ac 449 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 450
c1915ab9 451 $self->_graphcalc_done(0);
3a2ebbf4 452 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 453 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 454 }
455 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
456 $self->sequence->delete_edge( $source, $target );
457 }
784877d9 458}
459
3a2ebbf4 460
15d2d3df 461# Extra graph-alike utility
462sub has_path {
3a2ebbf4 463 my $self = shift;
464 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
465 return undef unless $self->sequence->has_edge( $source, $target );
466 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 467}
468
4e5a7b2c 469=head2 clear_witness( @sigil_list )
3a2ebbf4 470
4e5a7b2c 471Clear the given witnesses out of the collation entirely, removing references
472to them in paths, and removing readings that belong only to them. Should only
473be called via $tradition->del_witness.
3a2ebbf4 474
475=cut
476
4e5a7b2c 477sub clear_witness {
478 my( $self, @sigils ) = @_;
479
c1915ab9 480 $self->_graphcalc_done(0);
4e5a7b2c 481 # Clear the witness(es) out of the paths
482 foreach my $e ( $self->paths ) {
483 foreach my $sig ( @sigils ) {
484 $self->del_path( $e, $sig );
485 }
486 }
487
488 # Clear out the newly unused readings
489 foreach my $r ( $self->readings ) {
490 unless( $self->reading_witnesses( $r ) ) {
491 $self->del_reading( $r );
492 }
493 }
494}
3a2ebbf4 495
496sub add_relationship {
497 my $self = shift;
22222af9 498 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 499 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 500 $self->_graphcalc_done(0);
63778331 501 return @vectors;
22222af9 502}
ef9d481f 503
ca6e6095 504around qw/ get_relationship del_relationship / => sub {
505 my $orig = shift;
506 my $self = shift;
507 my @args = @_;
508 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
509 @args = @{$_[0]};
510 }
511 my( $source, $target ) = $self->_stringify_args( @args );
512 $self->$orig( $source, $target );
513};
514
22222af9 515=head2 reading_witnesses( $reading )
910a0a6d 516
22222af9 517Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 518
22222af9 519=cut
1d310495 520
1d310495 521sub reading_witnesses {
522 my( $self, $reading ) = @_;
523 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 524 # arbitrarily chosen "incoming". Thus, special-case the start node.
525 if( $reading eq $self->start ) {
526 return map { $_->sigil } $self->tradition->witnesses;
527 }
1d310495 528 my %all_witnesses;
529 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
530 my $wits = $self->sequence->get_edge_attributes( @$e );
531 @all_witnesses{ keys %$wits } = 1;
532 }
c12bb878 533 my $acstr = $self->ac_label;
534 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
535 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
536 }
1d310495 537 return keys %all_witnesses;
910a0a6d 538}
539
4e5a7b2c 540=head1 OUTPUT METHODS
8e1394aa 541
0ecb975c 542=head2 as_svg( \%options )
8e1394aa 543
0068967c 544Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 545See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 546
547=cut
548
549sub as_svg {
0ecb975c 550 my( $self, $opts ) = @_;
bfcbcecb 551 throw( "Need GraphViz installed to output SVG" )
552 unless File::Which::which( 'dot' );
e247aad1 553 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 554 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 555 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 556 my @cmd = qw/dot -Tsvg/;
557 my( $svg, $err );
558 my $dotfile = File::Temp->new();
559 ## USE FOR DEBUGGING
560 # $dotfile->unlink_on_destroy(0);
561 binmode $dotfile, ':utf8';
562 print $dotfile $self->as_dot( $opts );
563 push( @cmd, $dotfile->filename );
564 run( \@cmd, ">", binary(), \$svg );
e247aad1 565 $svg = decode_utf8( $svg );
566 $self->cached_svg( $svg ) unless $want_subgraph;
567 return $svg;
568 } else {
569 return $self->cached_svg;
b365fbae 570 }
8e1394aa 571}
572
b22576c6 573
0ecb975c 574=head2 as_dot( \%options )
b22576c6 575
0ecb975c 576Returns a string that is the collation graph expressed in dot
577(i.e. GraphViz) format. Options include:
b22576c6 578
0ecb975c 579=over 4
b22576c6 580
0ecb975c 581=item * from
b22576c6 582
0ecb975c 583=item * to
df6d9812 584
0ecb975c 585=item * color_common
586
587=back
df6d9812 588
589=cut
590
591sub as_dot {
0ecb975c 592 my( $self, $opts ) = @_;
593 my $startrank = $opts->{'from'} if $opts;
594 my $endrank = $opts->{'to'} if $opts;
595 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 596 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
597 && $self->end->rank > 100;
6648ee3d 598 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 599
b22576c6 600 # Check the arguments
601 if( $startrank ) {
602 return if $endrank && $startrank > $endrank;
603 return if $startrank > $self->end->rank;
604 }
605 if( defined $endrank ) {
606 return if $endrank < 0;
f1b3b33a 607 $endrank = undef if $endrank == $self->end->rank;
b22576c6 608 }
609
67da8d6c 610 my $graph_name = $self->tradition->name;
611 $graph_name =~ s/[^\w\s]//g;
612 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 613
614 my %graph_attrs = (
615 'rankdir' => 'LR',
616 'bgcolor' => 'none',
617 );
618 my %node_attrs = (
b8990398 619 'fontsize' => 14,
f13b5582 620 'fillcolor' => 'white',
621 'style' => 'filled',
622 'shape' => 'ellipse'
623 );
624 my %edge_attrs = (
625 'arrowhead' => 'open',
626 'color' => '#000000',
627 'fontcolor' => '#000000',
628 );
629
67da8d6c 630 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 631 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
632 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 633
b22576c6 634 # Output substitute start/end readings if necessary
635 if( $startrank ) {
636 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
637 }
638 if( $endrank ) {
639 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
640 }
b365fbae 641 if( $STRAIGHTENHACK ) {
642 ## HACK part 1
6648ee3d 643 my $startlabel = $startrank ? 'SUBSTART' : 'START';
644 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 645 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
646 }
b22576c6 647 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 648 # Sort the readings by rank if we have ranks; this speeds layout.
649 my @all_readings = $self->end->has_rank
650 ? sort { $a->rank <=> $b->rank } $self->readings
651 : $self->readings;
4633f9e4 652 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 653 foreach my $reading ( @all_readings ) {
b22576c6 654 # Only output readings within our rank range.
655 next if $startrank && $reading->rank < $startrank;
656 next if $endrank && $reading->rank > $endrank;
657 $used{$reading->id} = 1;
910a0a6d 658 # Need not output nodes without separate labels
3a2ebbf4 659 next if $reading->id eq $reading->text;
d4b75f44 660 my $rattrs;
30f0df34 661 my $label = $reading->text;
629e27b0 662 $label .= '-' if $reading->join_next;
663 $label = "-$label" if $reading->join_prior;
8f9cab7b 664 $label =~ s/\"/\\\"/g;
d4b75f44 665 $rattrs->{'label'} = $label;
0ecb975c 666 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 667 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 668 }
3a2ebbf4 669
30ddc24c 670 # Add the real edges. Need to weight one edge per rank jump, in a
671 # continuous line.
b365fbae 672 # my $weighted = $self->_add_edge_weights;
b22576c6 673 my @edges = $self->paths;
3bdec618 674 my( %substart, %subend );
b22576c6 675 foreach my $edge ( @edges ) {
676 # Do we need to output this edge?
508fd430 677 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 678 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 679 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 680
b22576c6 681 # Account for the rank gap if necessary
30ddc24c 682 my $rank0 = $self->reading( $edge->[0] )->rank
683 if $self->reading( $edge->[0] )->has_rank;
684 my $rank1 = $self->reading( $edge->[1] )->rank
685 if $self->reading( $edge->[1] )->has_rank;
686 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
687 $variables->{'minlen'} = $rank1 - $rank0;
688 }
689
690 # Add the calculated edge weights
b365fbae 691 # if( exists $weighted->{$edge->[0]}
e247aad1 692 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
693 # # $variables->{'color'} = 'red';
694 # $variables->{'weight'} = 3.0;
695 # }
30ddc24c 696
508fd430 697 # EXPERIMENTAL: make edge width reflect no. of witnesses
698 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
699 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
700
f13b5582 701 my $varopts = _dot_attr_string( $variables );
702 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
703 $edge->[0], $edge->[1], $varopts );
3bdec618 704 } elsif( $used{$edge->[0]} ) {
705 $subend{$edge->[0]} = 1;
706 } elsif( $used{$edge->[1]} ) {
707 $substart{$edge->[1]} = 1;
b22576c6 708 }
df6d9812 709 }
3bdec618 710 # Add substitute start and end edges if necessary
711 foreach my $node ( keys %substart ) {
027d819c 712 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 713 my $variables = { %edge_attrs, 'label' => $witstr };
714 my $varopts = _dot_attr_string( $variables );
715 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 716 }
717 foreach my $node ( keys %subend ) {
027d819c 718 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 719 my $variables = { %edge_attrs, 'label' => $witstr };
720 my $varopts = _dot_attr_string( $variables );
721 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 722 }
b365fbae 723 # HACK part 2
724 if( $STRAIGHTENHACK ) {
6648ee3d 725 my $endlabel = $endrank ? 'SUBEND' : 'END';
726 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 727 }
30ddc24c 728
df6d9812 729 $dot .= "}\n";
730 return $dot;
731}
732
f13b5582 733sub _dot_attr_string {
734 my( $hash ) = @_;
735 my @attrs;
736 foreach my $k ( sort keys %$hash ) {
737 my $v = $hash->{$k};
738 push( @attrs, $k.'="'.$v.'"' );
739 }
740 return( '[ ' . join( ', ', @attrs ) . ' ]' );
741}
742
30ddc24c 743sub _add_edge_weights {
744 my $self = shift;
745 # Walk the graph from START to END, choosing the successor node with
746 # the largest number of witness paths each time.
747 my $weighted = {};
748 my $curr = $self->start->id;
008fc8a6 749 my $ranked = $self->end->has_rank;
30ddc24c 750 while( $curr ne $self->end->id ) {
008fc8a6 751 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 752 my @succ = sort { $self->path_witnesses( $curr, $a )
753 <=> $self->path_witnesses( $curr, $b ) }
754 $self->sequence->successors( $curr );
755 my $next = pop @succ;
008fc8a6 756 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 757 # Try to avoid lacunae in the weighted path.
008fc8a6 758 while( @succ &&
759 ( $self->reading( $next )->is_lacuna ||
760 $nextrank - $rank > 1 ) ){
30ddc24c 761 $next = pop @succ;
762 }
763 $weighted->{$curr} = $next;
764 $curr = $next;
765 }
766 return $weighted;
767}
768
027d819c 769=head2 path_witnesses( $edge )
770
771Returns the list of sigils whose witnesses are associated with the given edge.
772The edge can be passed as either an array or an arrayref of ( $source, $target ).
773
774=cut
775
3a2ebbf4 776sub path_witnesses {
777 my( $self, @edge ) = @_;
778 # If edge is an arrayref, cope.
779 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
780 my $e = shift @edge;
781 @edge = @$e;
782 }
783 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 784 return @wits;
3a2ebbf4 785}
786
7f9f05e8 787# Helper function. Make a display label for the given witnesses, showing a.c.
788# witnesses only where the main witness is not also in the list.
027d819c 789sub _path_display_label {
508fd430 790 my $self = shift;
7f9f05e8 791 my %wits;
792 map { $wits{$_} = 1 } @_;
793
794 # If an a.c. wit is listed, remove it if the main wit is also listed.
795 # Otherwise keep it for explicit listing.
796 my $aclabel = $self->ac_label;
797 my @disp_ac;
798 foreach my $w ( sort keys %wits ) {
799 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
800 if( exists $wits{$1} ) {
801 delete $wits{$w};
802 } else {
803 push( @disp_ac, $w );
804 }
805 }
806 }
807
808 # See if we are in a majority situation.
8f9cab7b 809 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 810 if( scalar keys %wits > $maj ) {
811 unshift( @disp_ac, 'majority' );
812 return join( ', ', @disp_ac );
8f9cab7b 813 } else {
7f9f05e8 814 return join( ', ', sort keys %wits );
8f9cab7b 815 }
816}
1dd07bda 817
bf6e338d 818=head2 readings_at_rank( $rank )
1dd07bda 819
bf6e338d 820Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 821
822=cut
823
bf6e338d 824sub readings_at_rank {
1dd07bda 825 my( $self, $rank ) = @_;
bf6e338d 826 my $table = $self->alignment_table;
827 # Table rank is real rank - 1.
828 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
829 my %readings;
830 foreach my $e ( @elements ) {
831 next unless ref( $e ) eq 'HASH';
832 next unless exists $e->{'t'};
833 $readings{$e->{'t'}->id} = $e->{'t'};
834 }
835 return values %readings;
1dd07bda 836}
8f9cab7b 837
4e5a7b2c 838=head2 as_graphml
8e1394aa 839
4e5a7b2c 840Returns a GraphML representation of the collation. The GraphML will contain
841two graphs. The first expresses the attributes of the readings and the witness
842paths that link them; the second expresses the relationships that link the
843readings. This is the native transfer format for a tradition.
8e1394aa 844
56eefa04 845=begin testing
846
847use Text::Tradition;
848
849my $READINGS = 311;
850my $PATHS = 361;
851
852my $datafile = 't/data/florilegium_tei_ps.xml';
853my $tradition = Text::Tradition->new( 'input' => 'TEI',
854 'name' => 'test0',
855 'file' => $datafile,
856 'linear' => 1 );
857
858ok( $tradition, "Got a tradition object" );
859is( scalar $tradition->witnesses, 13, "Found all witnesses" );
860ok( $tradition->collation, "Tradition has a collation" );
861
862my $c = $tradition->collation;
863is( scalar $c->readings, $READINGS, "Collation has all readings" );
864is( scalar $c->paths, $PATHS, "Collation has all paths" );
865is( scalar $c->relationships, 0, "Collation has all relationships" );
866
867# Add a few relationships
868$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
869$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
870$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
871
872# Now write it to GraphML and parse it again.
873
874my $graphml = $c->as_graphml;
875my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
876is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
877is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
878is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
879
880=end testing
881
8e1394aa 882=cut
883
884sub as_graphml {
a30ca502 885 my( $self, $options ) = @_;
3d14b48e 886 $self->calculate_ranks unless $self->_graphcalc_done;
887
a30ca502 888 my $start = $options->{'from'}
889 ? $self->reading( $options->{'from'} ) : $self->start;
890 my $end = $options->{'to'}
891 ? $self->reading( $options->{'to'} ) : $self->end;
892 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
893 throw( 'Start node must be before end node' );
894 }
895 # The readings need to be ranked for this to work.
896 $start = $self->start unless $start->has_rank;
897 $end = $self->end unless $end->has_rank;
414cc046 898 my $rankoffset = 0;
899 unless( $start eq $self->start ) {
900 $rankoffset = $start->rank - 1;
901 }
a30ca502 902 my %use_readings;
903
8e1394aa 904 # Some namespaces
905 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
906 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
907 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 908 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 909
910 # Create the document and root node
911 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
912 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
913 $graphml->setDocumentElement( $root );
914 $root->setNamespace( $xsi_ns, 'xsi', 0 );
915 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 916
917 # List of attribute types to save on our objects and their corresponding
918 # GraphML types
919 my %save_types = (
920 'Str' => 'string',
921 'Int' => 'int',
922 'Bool' => 'boolean',
923 'RelationshipType' => 'string',
924 'RelationshipScope' => 'string',
925 );
926
927 # List of attribute names *not* to save on our objects.
928 # We will also not save any attribute beginning with _.
929 my %skipsave;
930 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 931
bbd064a9 932 # Add the data keys for the graph. Include an extra key 'version' for the
933 # GraphML output version.
e309421a 934 my %graph_data_keys;
935 my $gdi = 0;
bbd064a9 936 my %graph_attributes = ( 'version' => 'string' );
937 # Graph attributes include those of Tradition and those of Collation.
938 my %gattr_from;
939 my $tmeta = $self->tradition->meta;
940 my $cmeta = $self->meta;
941 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
942 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
943 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
944 next if $attr->name =~ /^_/;
945 next if $skipsave{$attr->name};
946 next unless $save_types{$attr->type_constraint->name};
947 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
948 }
949
950 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 951 $graph_data_keys{$datum} = 'dg'.$gdi++;
952 my $key = $root->addNewChild( $graphml_ns, 'key' );
953 $key->setAttribute( 'attr.name', $datum );
bbd064a9 954 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 955 $key->setAttribute( 'for', 'graph' );
956 $key->setAttribute( 'id', $graph_data_keys{$datum} );
957 }
f6066bac 958
bbd064a9 959 # Add the data keys for reading nodes
960 my %reading_attributes;
961 my $rmeta = Text::Tradition::Collation::Reading->meta;
962 foreach my $attr( $rmeta->get_all_attributes ) {
963 next if $attr->name =~ /^_/;
964 next if $skipsave{$attr->name};
965 next unless $save_types{$attr->type_constraint->name};
966 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
967 }
ef9d481f 968 my %node_data_keys;
969 my $ndi = 0;
bbd064a9 970 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 971 $node_data_keys{$datum} = 'dn'.$ndi++;
972 my $key = $root->addNewChild( $graphml_ns, 'key' );
973 $key->setAttribute( 'attr.name', $datum );
bbd064a9 974 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 975 $key->setAttribute( 'for', 'node' );
976 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 977 }
978
bbd064a9 979 # Add the data keys for edges, that is, paths and relationships. Path
980 # data does not come from a Moose class so is here manually.
ef9d481f 981 my $edi = 0;
982 my %edge_data_keys;
bbd064a9 983 my %edge_attributes = (
3a2ebbf4 984 witness => 'string', # ID/label for a path
3a2ebbf4 985 extra => 'boolean', # Path key
3a2ebbf4 986 );
bbd064a9 987 my @path_attributes = keys %edge_attributes; # track our manual additions
988 my $pmeta = Text::Tradition::Collation::Relationship->meta;
989 foreach my $attr( $pmeta->get_all_attributes ) {
990 next if $attr->name =~ /^_/;
991 next if $skipsave{$attr->name};
992 next unless $save_types{$attr->type_constraint->name};
993 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
994 }
995 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 996 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 997 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 998 $key->setAttribute( 'attr.name', $datum );
bbd064a9 999 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1000 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1001 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1002 }
3a2ebbf4 1003
cc31ebaa 1004 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1005 my $xmlidname = $self->tradition->name;
1006 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1007 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1008 $xmlidname = '_'.$xmlidname;
1009 }
2c669bca 1010 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1011 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1012 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1013 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1014 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1015 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1016 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1017 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1018
e309421a 1019 # Collation attribute data
bbd064a9 1020 foreach my $datum ( keys %graph_attributes ) {
1021 my $value;
1022 if( $datum eq 'version' ) {
1023 $value = '3.1';
1024 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1025 $value = $self->tradition->$datum;
1026 } else {
1027 $value = $self->$datum;
1028 }
2c669bca 1029 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1030 }
8e1394aa 1031
1032 my $node_ctr = 0;
1033 my %node_hash;
22222af9 1034 # Add our readings to the graph
3a2ebbf4 1035 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1036 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1037 ( $n->rank < $start->rank || $n->rank > $end->rank );
1038 $use_readings{$n->id} = 1;
2c669bca 1039 # Add to the main graph
1040 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1041 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1042 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1043 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1044 foreach my $d ( keys %reading_attributes ) {
255875b8 1045 my $nval = $n->$d;
cc31ebaa 1046 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1047 # Adjust the ranks within the subgraph.
cc31ebaa 1048 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1049 : $nval - $rankoffset;
414cc046 1050 }
255875b8 1051 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1052 if defined $nval;
1053 }
b15511bf 1054 }
1055
2c669bca 1056 # Add the path edges to the sequence graph
df6d9812 1057 my $edge_ctr = 0;
cc31ebaa 1058 $DB::single = 1;
3a2ebbf4 1059 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1060 # We add an edge in the graphml for every witness in $e.
a30ca502 1061 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1062 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1063 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1064 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1065 # Skip any path from start to end; that witness is not in the subgraph.
1066 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1067 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1068 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1069 $node_hash{ $e->[0] },
1070 $node_hash{ $e->[1] } );
2c669bca 1071 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1072 $edge_el->setAttribute( 'source', $from );
1073 $edge_el->setAttribute( 'target', $to );
1074 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1075
1076 # It's a witness path, so add the witness
1077 my $base = $wit;
1078 my $key = $edge_data_keys{'witness'};
1079 # Is this an ante-corr witness?
1080 my $aclabel = $self->ac_label;
1081 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1082 # Keep the base witness
1083 $base = $1;
1084 # ...and record that this is an 'extra' reading path
1085 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1086 }
1087 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1088 }
1089 }
1090
cc31ebaa 1091 # Report the actual number of nodes and edges that went in
1092 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1093 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1094
22222af9 1095 # Add the relationship graph to the XML
bbd064a9 1096 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1097 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1098 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1099
94c00c71 1100 # Save and return the thing
1101 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1102 return $result;
df6d9812 1103}
1104
b15511bf 1105sub _add_graphml_data {
1106 my( $el, $key, $value ) = @_;
b15511bf 1107 return unless defined $value;
c9bf3dbf 1108 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1109 $data_el->setAttribute( 'key', $key );
1110 $data_el->appendText( $value );
8e1394aa 1111}
1112
4e5a7b2c 1113=head2 as_csv
910a0a6d 1114
1115Returns a CSV alignment table representation of the collation graph, one
2c669bca 1116row per witness (or witness uncorrected.)
910a0a6d 1117
1118=cut
1119
1120sub as_csv {
3a2ebbf4 1121 my( $self ) = @_;
1dd07bda 1122 my $table = $self->alignment_table;
82fa4d57 1123 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1124 my @result;
2c669bca 1125 # Make the header row
1126 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1127 push( @result, decode_utf8( $csv->string ) );
1128 # Make the rest of the rows
1129 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1130 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1131 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1132 $csv->combine( @row );
910a0a6d 1133 push( @result, decode_utf8( $csv->string ) );
1134 }
3a2ebbf4 1135 return join( "\n", @result );
910a0a6d 1136}
1137
1dd07bda 1138=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1139
566f4595 1140Return a reference to an alignment table, in a slightly enhanced CollateX
1141format which looks like this:
1142
1143 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1144 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1145 { witness => "SIG2",
4e5a7b2c 1146 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1147 ... ],
1148 length => TEXTLEN };
1149
1150If $use_refs is set to 1, the reading object is returned in the table
1151instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1152
1153If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1154keys have a true hash value will be included.
2c669bca 1155
1156=cut
9f3ba6f7 1157
1dd07bda 1158sub alignment_table {
1159 my( $self ) = @_;
c1915ab9 1160 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1161 return $self->cached_table if $self->has_cached_table;
1162
0ecb975c 1163 # Make sure we can do this
1164 throw( "Need a linear graph in order to make an alignment table" )
1165 unless $self->linear;
1166 $self->calculate_ranks unless $self->end->has_rank;
1167
2c669bca 1168 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1169 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1170 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1171 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1172 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1173 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1174 push( @{$table->{'alignment'}},
1175 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1176 if( $wit->is_layered ) {
1177 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1178 $wit->sigil.$self->ac_label );
1dd07bda 1179 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1180 push( @{$table->{'alignment'}},
1181 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1182 }
1183 }
1dd07bda 1184 $self->cached_table( $table );
1185 return $table;
910a0a6d 1186}
1187
1188sub _make_witness_row {
1dd07bda 1189 my( $path, $positions ) = @_;
910a0a6d 1190 my %char_hash;
1191 map { $char_hash{$_} = undef } @$positions;
2c669bca 1192 my $debug = 0;
910a0a6d 1193 foreach my $rdg ( @$path ) {
eca16057 1194 my $rtext = $rdg->text;
1195 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1196 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1197 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1198 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1199 }
1200 my @row = map { $char_hash{$_} } @$positions;
eca16057 1201 # Fill in lacuna markers for undef spots in the row
1202 my $last_el = shift @row;
1203 my @filled_row = ( $last_el );
1204 foreach my $el ( @row ) {
0e476982 1205 # If we are using node reference, make the lacuna node appear many times
1206 # in the table. If not, use the lacuna tag.
1dd07bda 1207 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1208 $el = $last_el;
eca16057 1209 }
1210 push( @filled_row, $el );
1211 $last_el = $el;
1212 }
1213 return @filled_row;
910a0a6d 1214}
1215
4e5a7b2c 1216=head1 NAVIGATION METHODS
910a0a6d 1217
4e5a7b2c 1218=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1219
1220Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1221with $last, for the witness given in $sigil. If a $backup sigil is
1222specified (e.g. when walking a layered witness), it will be used wherever
1223no $sigil path exists. If there is a base text reading, that will be
1224used wherever no path exists for $sigil or $backup.
e2902068 1225
1226=cut
1227
910a0a6d 1228# TODO Think about returning some lazy-eval iterator.
b0b4421a 1229# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1230
e2902068 1231sub reading_sequence {
861c3e27 1232 my( $self, $start, $end, $witness ) = @_;
e2902068 1233
930ff666 1234 $witness = $self->baselabel unless $witness;
e2902068 1235 my @readings = ( $start );
1236 my %seen;
1237 my $n = $start;
3a2ebbf4 1238 while( $n && $n->id ne $end->id ) {
1239 if( exists( $seen{$n->id} ) ) {
63778331 1240 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1241 }
3a2ebbf4 1242 $seen{$n->id} = 1;
910a0a6d 1243
861c3e27 1244 my $next = $self->next_reading( $n, $witness );
44771cf2 1245 unless( $next ) {
63778331 1246 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1247 }
910a0a6d 1248 push( @readings, $next );
1249 $n = $next;
e2902068 1250 }
1251 # Check that the last reading is our end reading.
1252 my $last = $readings[$#readings];
63778331 1253 throw( "Last reading found from " . $start->text .
1254 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1255 unless $last->id eq $end->id;
e2902068 1256
1257 return @readings;
1258}
1259
4e5a7b2c 1260=head2 next_reading( $reading, $sigil );
8e1394aa 1261
4a8828f0 1262Returns the reading that follows the given reading along the given witness
930ff666 1263path.
8e1394aa 1264
1265=cut
1266
4a8828f0 1267sub next_reading {
e2902068 1268 # Return the successor via the corresponding path.
8e1394aa 1269 my $self = shift;
3a2ebbf4 1270 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1271 return undef unless $answer;
3a2ebbf4 1272 return $self->reading( $answer );
8e1394aa 1273}
1274
4e5a7b2c 1275=head2 prior_reading( $reading, $sigil )
8e1394aa 1276
4a8828f0 1277Returns the reading that precedes the given reading along the given witness
930ff666 1278path.
8e1394aa 1279
1280=cut
1281
4a8828f0 1282sub prior_reading {
e2902068 1283 # Return the predecessor via the corresponding path.
8e1394aa 1284 my $self = shift;
3a2ebbf4 1285 my $answer = $self->_find_linked_reading( 'prior', @_ );
1286 return $self->reading( $answer );
8e1394aa 1287}
1288
4a8828f0 1289sub _find_linked_reading {
861c3e27 1290 my( $self, $direction, $node, $path ) = @_;
1291
1292 # Get a backup if we are dealing with a layered witness
1293 my $alt_path;
1294 my $aclabel = $self->ac_label;
1295 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1296 $alt_path = $1;
1297 }
1298
e2902068 1299 my @linked_paths = $direction eq 'next'
3a2ebbf4 1300 ? $self->sequence->edges_from( $node )
1301 : $self->sequence->edges_to( $node );
e2902068 1302 return undef unless scalar( @linked_paths );
8e1394aa 1303
e2902068 1304 # We have to find the linked path that contains all of the
1305 # witnesses supplied in $path.
1306 my( @path_wits, @alt_path_wits );
4e5a7b2c 1307 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1308 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1309 my $base_le;
1310 my $alt_le;
1311 foreach my $le ( @linked_paths ) {
3a2ebbf4 1312 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1313 $base_le = $le;
910a0a6d 1314 }
508fd430 1315 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1316 if( _is_within( \@path_wits, \@le_wits ) ) {
1317 # This is the right path.
1318 return $direction eq 'next' ? $le->[1] : $le->[0];
1319 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1320 $alt_le = $le;
1321 }
8e1394aa 1322 }
e2902068 1323 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1324 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1325 if $alt_le;
e2902068 1326
1327 # Got this far? Return the base path if it exists.
3a2ebbf4 1328 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1329 if $base_le;
e2902068 1330
1331 # Got this far? We have no appropriate path.
2c669bca 1332 warn "Could not find $direction node from " . $node->id
910a0a6d 1333 . " along path $path";
8e1394aa 1334 return undef;
1335}
1336
4a8828f0 1337# Some set logic.
1338sub _is_within {
1339 my( $set1, $set2 ) = @_;
7854e12e 1340 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1341 foreach my $el ( @$set1 ) {
910a0a6d 1342 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1343 }
1344 return $ret;
1345}
1346
4e5a7b2c 1347# Return the string that joins together a list of witnesses for
1348# display on a single path.
1349sub _witnesses_of_label {
1350 my( $self, $label ) = @_;
1351 my $regex = $self->wit_list_separator;
1352 my @answer = split( /\Q$regex\E/, $label );
1353 return @answer;
b0b4421a 1354}
1355
d4b75f44 1356=head2 common_readings
1357
1358Returns the list of common readings in the graph (i.e. those readings that are
1359shared by all non-lacunose witnesses.)
1360
1361=cut
1362
1363sub common_readings {
1364 my $self = shift;
1365 my @common = grep { $_->is_common } $self->readings;
1366 return @common;
1367}
1368
fae52efd 1369=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1370
1371Returns the text of a witness (plus its backup, if we are using a layer)
1372as stored in the collation. The text is returned as a string, where the
1373individual readings are joined with spaces and the meta-readings (e.g.
1374lacunae) are omitted. Optional specification of $start and $end allows
1375the generation of a subset of the witness text.
4e5a7b2c 1376
b0b4421a 1377=cut
1378
1379sub path_text {
861c3e27 1380 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1381 $start = $self->start unless $start;
1382 $end = $self->end unless $end;
861c3e27 1383 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1384 my $pathtext = '';
1385 my $last;
1386 foreach my $r ( @path ) {
1387 if( $r->join_prior || !$last || $last->join_next ) {
1388 $pathtext .= $r->text;
1389 } else {
1390 $pathtext .= ' ' . $r->text;
1391 }
1392 $last = $r;
1393 }
1394 return $pathtext;
b0b4421a 1395}
4e5a7b2c 1396
1397=head1 INITIALIZATION METHODS
1398
1399These are mostly for use by parsers.
1400
1401=head2 make_witness_path( $witness )
1402
1403Link the array of readings contained in $witness->path (and in
1404$witness->uncorrected_path if it exists) into collation paths.
1405Clear out the arrays when finished.
de51424a 1406
4e5a7b2c 1407=head2 make_witness_paths
1408
1409Call make_witness_path for all witnesses in the tradition.
1410
1411=cut
930ff666 1412
7e450e44 1413# For use when a collation is constructed from a base text and an apparatus.
1414# We have the sequences of readings and just need to add path edges.
1f7aa795 1415# When we are done, clear out the witness path attributes, as they are no
1416# longer needed.
1417# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1418
6a222840 1419sub make_witness_paths {
1420 my( $self ) = @_;
910a0a6d 1421 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1422 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1423 $self->make_witness_path( $wit );
7854e12e 1424 }
7854e12e 1425}
1426
6a222840 1427sub make_witness_path {
7854e12e 1428 my( $self, $wit ) = @_;
1429 my @chain = @{$wit->path};
15d2d3df 1430 my $sig = $wit->sigil;
fae52efd 1431 # Add start and end if necessary
1432 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1433 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1434 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1435 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1436 }
1f7aa795 1437 if( $wit->is_layered ) {
d9e873d0 1438 @chain = @{$wit->uncorrected_path};
fae52efd 1439 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1440 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1441 foreach my $idx( 0 .. $#chain-1 ) {
1442 my $source = $chain[$idx];
1443 my $target = $chain[$idx+1];
1444 $self->add_path( $source, $target, $sig.$self->ac_label )
1445 unless $self->has_path( $source, $target, $sig );
1446 }
15d2d3df 1447 }
1f7aa795 1448 $wit->clear_path;
1449 $wit->clear_uncorrected_path;
e2902068 1450}
1451
414cc046 1452=head2 equivalence_graph( \%readingmap, $startrank, $endrank, @testvector )
ac4d7ac2 1453
1454Returns an equivalence graph of the collation, in which all readings
1455related via a 'colocated' relationship are transformed into a single
1456vertex. Can be used to determine the validity of a new relationship. The
1457mapping between equivalence vertices and reading IDs will be stored in the
1458hash whose reference is passed as readingmap. For a subset of the graph,
1459pass in a start and/or an ending rank (this only works if L<calculate_ranks>
1460has been called at least once.)
1461
414cc046 1462It is also possible to pass in a test relationship in @testvector, and get
1463the resulting equivalence graph before the relationship has been made.
1464
ac4d7ac2 1465=cut
1466
1467sub equivalence_graph {
414cc046 1468 my( $self, $map, $start, $end, @newvector ) = @_;
ac4d7ac2 1469 $start = undef unless $self->end->has_rank;
1470 $end = undef unless $self->end->has_rank;
ac4d7ac2 1471 my $eqgraph = Graph->new();
1472 my $rel_ctr = 0;
1473 # Add the nodes
1474 foreach my $r ( $self->readings ) {
1475 unless( $r eq $self->start || $r eq $self->end ) {
1476 next if $start && $r->rank < $start;
1477 next if $end && $r->rank > $end;
1478 }
1479 next if exists $map->{$r->id};
414cc046 1480 my @rels = $self->related_readings( $r->id, 'colocated' );
1481 push( @rels, $r->id );
1482 # Make an equivalence vertex
1483 my $rn = 'equivalence_' . $rel_ctr++;
1484 $eqgraph->add_vertex( $rn );
1485 # Note which readings belong to this vertex.
1486 push( @rels, $r->id );
1487 foreach( @rels ) {
1488 $map->{$_} = $rn;
1489 }
ac4d7ac2 1490 }
1491
1492 # Add the edges.
1493 foreach my $p ( $self->paths ) {
1494 my $efrom = exists $map->{$p->[0]} ? $map->{$p->[0]}
1495 : $map->{$self->start->id};
1496 my $eto = exists $map->{$p->[1]} ? $map->{$p->[1]}
1497 : $map->{$self->end->id};
1498 $eqgraph->add_edge( $efrom, $eto );
1499 }
414cc046 1500
1501 # Collapse the vertices in @newvector if applicable.
1502 if( @newvector ) {
1503 my( $eqs, $eqt ) = map { $map->{$_} } @newvector;
1504 $DB::single = 1 unless $eqs && $eqt;
1505 unless( $eqs eq $eqt ) {
1506 # Combine the vertices.
1507 map { $eqgraph->add_edge( $eqs, $_ ) } $eqgraph->successors( $eqt );
1508 map { $eqgraph->add_edge( $_, $eqs ) } $eqgraph->predecessors( $eqt );
1509 $eqgraph->delete_vertex( $eqt );
1510 }
1511 }
ac4d7ac2 1512 return $eqgraph;
1513}
1514
4e5a7b2c 1515=head2 calculate_ranks
1516
1517Calculate the reading ranks (that is, their aligned positions relative
1518to each other) for the graph. This can only be called on linear collations.
1519
b365fbae 1520=begin testing
1521
1522use Text::Tradition;
1523
1524my $cxfile = 't/data/Collatex-16.xml';
1525my $t = Text::Tradition->new(
1526 'name' => 'inline',
1527 'input' => 'CollateX',
1528 'file' => $cxfile,
1529 );
1530my $c = $t->collation;
1531
1532# Make an svg
bfcbcecb 1533my $table = $c->alignment_table;
1534ok( $c->has_cached_table, "Alignment table was cached" );
1535is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1536$c->calculate_ranks;
bfcbcecb 1537is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1538$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1539isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1540
1541=end testing
1542
4e5a7b2c 1543=cut
1544
910a0a6d 1545sub calculate_ranks {
1546 my $self = shift;
b365fbae 1547 # Save the existing ranks, in case we need to invalidate the cached SVG.
1548 my %existing_ranks;
ac4d7ac2 1549 map { $existing_ranks{$_} = $_->rank } $self->readings;
910a0a6d 1550 # Walk a version of the graph where every node linked by a relationship
1551 # edge is fundamentally the same node, and do a topological ranking on
1552 # the nodes in this graph.
910a0a6d 1553 my %rel_containers;
ac4d7ac2 1554 my $topo_graph = $self->equivalence_graph( \%rel_containers );
910a0a6d 1555
1556 # Now do the rankings, starting with the start node.
3a2ebbf4 1557 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1558 my $node_ranks = { $topo_start => 0 };
910a0a6d 1559 my @curr_origin = ( $topo_start );
1560 # A little iterative function.
1561 while( @curr_origin ) {
c9bf3dbf 1562 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1563 }
1564 # Transfer our rankings from the topological graph to the real one.
1565 foreach my $r ( $self->readings ) {
3a2ebbf4 1566 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1567 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1568 } else {
63778331 1569 # Die. Find the last rank we calculated.
7ec89b0e 1570 my @all_defined = sort { ( $node_ranks->{$rel_containers{$a->id}}||-1 )
1571 <=> ( $node_ranks->{$rel_containers{$b->id}}||-1 ) }
63778331 1572 $self->readings;
1573 my $last = pop @all_defined;
1574 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1575 }
de51424a 1576 }
bfcbcecb 1577 # Do we need to invalidate the cached data?
1578 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1579 foreach my $r ( $self->readings ) {
7c293912 1580 next if defined( $existing_ranks{$r} )
1581 && $existing_ranks{$r} == $r->rank;
c1915ab9 1582 # Something has changed, so clear the cache
bfcbcecb 1583 $self->_clear_cache;
c1915ab9 1584 # ...and recalculate the common readings.
1585 $self->calculate_common_readings();
b365fbae 1586 last;
1587 }
1588 }
c1915ab9 1589 # The graph calculation information is now up to date.
1590 $self->_graphcalc_done(1);
8e1394aa 1591}
3a1f2523 1592
910a0a6d 1593sub _assign_rank {
c9bf3dbf 1594 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1595 # Look at each of the children of @current_nodes. If all the child's
1596 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1597 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1598 # parent gets a rank.
910a0a6d 1599 my @next_nodes;
1600 foreach my $c ( @current_nodes ) {
c9bf3dbf 1601 warn "Current reading $c has no rank!"
1602 unless exists $node_ranks->{$c};
1603 # print STDERR "Looking at child of node $c, rank "
1604 # . $node_ranks->{$c} . "\n";
1605 foreach my $child ( $graph->successors( $c ) ) {
1606 next if exists $node_ranks->{$child};
910a0a6d 1607 my $highest_rank = -1;
1608 my $skip = 0;
c9bf3dbf 1609 foreach my $parent ( $graph->predecessors( $child ) ) {
1610 if( exists $node_ranks->{$parent} ) {
1611 $highest_rank = $node_ranks->{$parent}
1612 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1613 } else {
1614 $skip = 1;
1615 last;
1616 }
1617 }
1618 next if $skip;
c9bf3dbf 1619 my $c_rank = $highest_rank + 1;
1620 # print STDERR "Assigning rank $c_rank to node $child \n";
1621 $node_ranks->{$child} = $c_rank;
910a0a6d 1622 push( @next_nodes, $child );
1623 }
1624 }
1625 return @next_nodes;
4cdd82f1 1626}
910a0a6d 1627
c1915ab9 1628sub _clear_cache {
1629 my $self = shift;
1630 $self->wipe_svg if $self->has_cached_svg;
1631 $self->wipe_table if $self->has_cached_table;
1632}
1633
1634
4e5a7b2c 1635=head2 flatten_ranks
1636
1637A convenience method for parsing collation data. Searches the graph for readings
1638with the same text at the same rank, and merges any that are found.
1639
1640=cut
1641
0e476982 1642sub flatten_ranks {
1643 my $self = shift;
1644 my %unique_rank_rdg;
bf6e338d 1645 my $changed;
0e476982 1646 foreach my $rdg ( $self->readings ) {
1647 next unless $rdg->has_rank;
1648 my $key = $rdg->rank . "||" . $rdg->text;
1649 if( exists $unique_rank_rdg{$key} ) {
1650 # Combine!
56eefa04 1651 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1652 $changed = 1;
0e476982 1653 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1654 # TODO see if this now makes a common point.
0e476982 1655 } else {
1656 $unique_rank_rdg{$key} = $rdg;
1657 }
1658 }
bf6e338d 1659 # If we merged readings, the ranks are still fine but the alignment
1660 # table is wrong. Wipe it.
1661 $self->wipe_table() if $changed;
0e476982 1662}
4633f9e4 1663
1664
d4b75f44 1665=head2 calculate_common_readings
1666
1667Goes through the graph identifying the readings that appear in every witness
1668(apart from those with lacunae at that spot.) Marks them as common and returns
1669the list.
1670
1671=begin testing
1672
1673use Text::Tradition;
1674
1675my $cxfile = 't/data/Collatex-16.xml';
1676my $t = Text::Tradition->new(
1677 'name' => 'inline',
1678 'input' => 'CollateX',
1679 'file' => $cxfile,
1680 );
1681my $c = $t->collation;
1682
1683my @common = $c->calculate_common_readings();
1684is( scalar @common, 8, "Found correct number of common readings" );
1685my @marked = sort $c->common_readings();
1686is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1687my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1688is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1689
1690=end testing
1691
1692=cut
1693
1694sub calculate_common_readings {
1695 my $self = shift;
1696 my @common;
c1915ab9 1697 map { $_->is_common( 0 ) } $self->readings;
1698 # Implicitly calls calculate_ranks
1dd07bda 1699 my $table = $self->alignment_table;
d4b75f44 1700 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1701 my @row = map { $_->{'tokens'}->[$idx]
1702 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1703 @{$table->{'alignment'}};
d4b75f44 1704 my %hash;
1705 foreach my $r ( @row ) {
1706 if( $r ) {
1707 $hash{$r->id} = $r unless $r->is_meta;
1708 } else {
1709 $hash{'UNDEF'} = $r;
1710 }
1711 }
1712 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1713 my( $r ) = values %hash;
1714 $r->is_common( 1 );
1715 push( @common, $r );
1716 }
1717 }
1718 return @common;
1719}
1720
861c3e27 1721=head2 text_from_paths
1722
1723Calculate the text array for all witnesses from the path, for later consistency
1724checking. Only to be used if there is no non-graph-based way to know the
1725original texts.
1726
1727=cut
1728
1729sub text_from_paths {
1730 my $self = shift;
1731 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1732 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1733 my @text;
1734 foreach my $r ( @readings ) {
1735 next if $r->is_meta;
1736 push( @text, $r->text );
1737 }
861c3e27 1738 $wit->text( \@text );
1739 if( $wit->is_layered ) {
5164a6f0 1740 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1741 $wit->sigil.$self->ac_label );
1742 my @uctext;
1743 foreach my $r ( @ucrdgs ) {
1744 next if $r->is_meta;
1745 push( @uctext, $r->text );
1746 }
1747 $wit->layertext( \@uctext );
861c3e27 1748 }
1749 }
1750}
0e476982 1751
4e5a7b2c 1752=head1 UTILITY FUNCTIONS
1753
1754=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1755
4e5a7b2c 1756Find the last reading that occurs in sequence before both the given readings.
414cc046 1757At the very least this should be $self->start.
4e5a7b2c 1758
1759=head2 common_successor( $reading_a, $reading_b )
1760
1761Find the first reading that occurs in sequence after both the given readings.
414cc046 1762At the very least this should be $self->end.
4e5a7b2c 1763
22222af9 1764=begin testing
1765
1766use Text::Tradition;
1767
1768my $cxfile = 't/data/Collatex-16.xml';
1769my $t = Text::Tradition->new(
1770 'name' => 'inline',
1771 'input' => 'CollateX',
1772 'file' => $cxfile,
1773 );
1774my $c = $t->collation;
1775
679f17e1 1776is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1777 'n20', "Found correct common predecessor" );
679f17e1 1778is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1779 '#END#', "Found correct common successor" );
1780
4e5a7b2c 1781is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1782 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1783is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1784 '#END#', "Found correct common successor for readings on same path" );
1785
1786=end testing
1787
1788=cut
1789
1790## Return the closest reading that is a predecessor of both the given readings.
1791sub common_predecessor {
1792 my $self = shift;
4e5a7b2c 1793 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1794 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1795}
1796
1797sub common_successor {
1798 my $self = shift;
4e5a7b2c 1799 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1800 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1801}
1802
414cc046 1803
1804# TODO think about how to do this without ranks...
027d819c 1805sub _common_in_path {
22222af9 1806 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1807 my $iter = $self->end->rank;
22222af9 1808 my @candidates;
414cc046 1809 my @last_r1 = ( $r1 );
1810 my @last_r2 = ( $r2 );
1811 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1812 my %all_seen;
414cc046 1813 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1814 while( !@candidates ) {
414cc046 1815 last unless $iter--; # Avoid looping infinitely
1816 # Iterate separately down the graph from r1 and r2
1817 my( @new_lc1, @new_lc2 );
1818 foreach my $lc ( @last_r1 ) {
1819 foreach my $p ( $lc->$dir ) {
1820 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1821 # print STDERR "Path candidate $p from $lc\n";
1822 push( @candidates, $p );
002e3600 1823 } elsif( !$all_seen{$p->id} ) {
414cc046 1824 $all_seen{$p->id} = 'r1';
1825 push( @new_lc1, $p );
1826 }
1827 }
1828 }
1829 foreach my $lc ( @last_r2 ) {
22222af9 1830 foreach my $p ( $lc->$dir ) {
414cc046 1831 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1832 # print STDERR "Path candidate $p from $lc\n";
22222af9 1833 push( @candidates, $p );
002e3600 1834 } elsif( !$all_seen{$p->id} ) {
414cc046 1835 $all_seen{$p->id} = 'r2';
1836 push( @new_lc2, $p );
22222af9 1837 }
1838 }
1839 }
414cc046 1840 @last_r1 = @new_lc1;
1841 @last_r2 = @new_lc2;
22222af9 1842 }
1843 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1844 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1845}
1846
63778331 1847sub throw {
1848 Text::Tradition::Error->throw(
1849 'ident' => 'Collation error',
1850 'message' => $_[0],
1851 );
1852}
1853
dd3b58b0 1854no Moose;
1855__PACKAGE__->meta->make_immutable;
e867486f 1856
027d819c 1857=head1 LICENSE
e867486f 1858
027d819c 1859This package is free software and is provided "as is" without express
1860or implied warranty. You can redistribute it and/or modify it under
1861the same terms as Perl itself.
e867486f 1862
027d819c 1863=head1 AUTHOR
e867486f 1864
027d819c 1865Tara L Andrews E<lt>aurum@cpan.orgE<gt>