try not to let collations interfere with relationship mapping
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
910a0a6d 8use Text::CSV_XS;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
df6d9812 12use XML::LibXML;
a344ffcf 13use XML::LibXML::XPathContext;
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
202ccb18 31 get_relationship => 'get_relationship',
ee801e17 32 del_relationship => 'del_relationship',
22222af9 33 },
34 writer => '_set_relations',
3a2ebbf4 35 );
dd3b58b0 36
3a2ebbf4 37has 'tradition' => (
38 is => 'ro',
d047cd52 39 isa => 'Text::Tradition',
8d9a1cd8 40 weak_ref => 1,
d047cd52 41 );
dd3b58b0 42
3a2ebbf4 43has 'readings' => (
44 isa => 'HashRef[Text::Tradition::Collation::Reading]',
45 traits => ['Hash'],
46 handles => {
47 reading => 'get',
48 _add_reading => 'set',
49 del_reading => 'delete',
50 has_reading => 'exists',
51 readings => 'values',
52 },
53 default => sub { {} },
54 );
910a0a6d 55
4a8828f0 56has 'wit_list_separator' => (
7854e12e 57 is => 'rw',
58 isa => 'Str',
59 default => ', ',
60 );
61
62has 'baselabel' => (
63 is => 'rw',
64 isa => 'Str',
65 default => 'base text',
66 );
4a8828f0 67
15d2d3df 68has 'linear' => (
69 is => 'rw',
70 isa => 'Bool',
71 default => 1,
72 );
c84275ff 73
ef9d481f 74has 'ac_label' => (
75 is => 'rw',
76 isa => 'Str',
77 default => ' (a.c.)',
78 );
3a2ebbf4 79
4e483aa5 80has 'wordsep' => (
81 is => 'rw',
82 isa => 'Str',
83 default => ' ',
84 );
85
3a2ebbf4 86has 'start' => (
87 is => 'ro',
88 isa => 'Text::Tradition::Collation::Reading',
89 writer => '_set_start',
90 weak_ref => 1,
91 );
92
93has 'end' => (
94 is => 'ro',
95 isa => 'Text::Tradition::Collation::Reading',
96 writer => '_set_end',
97 weak_ref => 1,
98 );
b365fbae 99
100has 'cached_svg' => (
101 is => 'rw',
102 isa => 'Str',
103 predicate => 'has_cached_svg',
104 clearer => 'wipe_svg',
105 );
1dd07bda 106
107has 'cached_table' => (
108 is => 'rw',
109 isa => 'HashRef',
110 predicate => 'has_cached_table',
111 clearer => 'wipe_table',
112 );
c1915ab9 113
114has '_graphcalc_done' => (
115 is => 'rw',
116 isa => 'Bool',
117 default => undef,
118 );
1f563ac3 119
4e5a7b2c 120=head1 NAME
121
122Text::Tradition::Collation - a software model for a text collation
123
124=head1 SYNOPSIS
125
126 use Text::Tradition;
127 my $t = Text::Tradition->new(
128 'name' => 'this is a text',
129 'input' => 'TEI',
130 'file' => '/path/to/tei_parallel_seg_file.xml' );
131
132 my $c = $t->collation;
133 my @readings = $c->readings;
134 my @paths = $c->paths;
135 my @relationships = $c->relationships;
136
137 my $svg_variant_graph = $t->collation->as_svg();
138
139=head1 DESCRIPTION
140
141Text::Tradition is a library for representation and analysis of collated
142texts, particularly medieval ones. The Collation is the central feature of
143a Tradition, where the text, its sequence of readings, and its relationships
144between readings are actually kept.
145
146=head1 CONSTRUCTOR
147
148=head2 new
149
150The constructor. Takes a hash or hashref of the following arguments:
151
152=over
153
154=item * tradition - The Text::Tradition object to which the collation
155belongs. Required.
156
157=item * linear - Whether the collation should be linear; that is, whether
158transposed readings should be treated as two linked readings rather than one,
159and therefore whether the collation graph is acyclic. Defaults to true.
160
4e5a7b2c 161=item * baselabel - The default label for the path taken by a base text
162(if any). Defaults to 'base text'.
163
164=item * wit_list_separator - The string to join a list of witnesses for
165purposes of making labels in display graphs. Defaults to ', '.
166
167=item * ac_label - The extra label to tack onto a witness sigil when
168representing another layer of path for the given witness - that is, when
169a text has more than one possible reading due to scribal corrections or
170the like. Defaults to ' (a.c.)'.
171
4e483aa5 172=item * wordsep - The string used to separate words in the original text.
173Defaults to ' '.
174
4e5a7b2c 175=back
176
177=head1 ACCESSORS
178
179=head2 tradition
180
181=head2 linear
182
4e5a7b2c 183=head2 wit_list_separator
184
185=head2 baselabel
186
187=head2 ac_label
188
4e483aa5 189=head2 wordsep
190
4e5a7b2c 191Simple accessors for collation attributes.
192
193=head2 start
194
195The meta-reading at the start of every witness path.
196
197=head2 end
198
199The meta-reading at the end of every witness path.
200
201=head2 readings
202
203Returns all Reading objects in the graph.
204
205=head2 reading( $id )
206
207Returns the Reading object corresponding to the given ID.
208
209=head2 add_reading( $reading_args )
210
211Adds a new reading object to the collation.
212See L<Text::Tradition::Collation::Reading> for the available arguments.
213
214=head2 del_reading( $object_or_id )
215
216Removes the given reading from the collation, implicitly removing its
217paths and relationships.
218
4e483aa5 219=head2 merge_readings( $main, $second, $concatenate, $with_str )
220
221Merges the $second reading into the $main one. If $concatenate is true, then
222the merged node will carry the text of both readings, concatenated with either
223$with_str (if specified) or a sensible default (the empty string if the
224appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 225
4e483aa5 226The first two arguments may be either readings or reading IDs.
4e5a7b2c 227
228=head2 has_reading( $id )
229
230Predicate to see whether a given reading ID is in the graph.
231
232=head2 reading_witnesses( $object_or_id )
233
234Returns a list of sigils whose witnesses contain the reading.
235
236=head2 paths
237
238Returns all reading paths within the document - that is, all edges in the
239collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
240
241=head2 add_path( $source, $target, $sigil )
242
243Links the given readings in the collation in sequence, under the given witness
244sigil. The readings may be specified by object or ID.
245
246=head2 del_path( $source, $target, $sigil )
247
248Links the given readings in the collation in sequence, under the given witness
249sigil. The readings may be specified by object or ID.
250
251=head2 has_path( $source, $target );
252
253Returns true if the two readings are linked in sequence in any witness.
254The readings may be specified by object or ID.
255
256=head2 relationships
257
258Returns all Relationship objects in the collation.
259
260=head2 add_relationship( $reading, $other_reading, $options )
261
262Adds a new relationship of the type given in $options between the two readings,
263which may be specified by object or ID. Returns a value of ( $status, @vectors)
264where $status is true on success, and @vectors is a list of relationship edges
265that were ultimately added.
266See L<Text::Tradition::Collation::Relationship> for the available options.
267
268=cut
dd3b58b0 269
d047cd52 270sub BUILD {
3a2ebbf4 271 my $self = shift;
22222af9 272 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 273 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
274 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 275}
784877d9 276
3a2ebbf4 277### Reading construct/destruct functions
278
279sub add_reading {
280 my( $self, $reading ) = @_;
281 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
282 my %args = %$reading;
283 $reading = Text::Tradition::Collation::Reading->new(
284 'collation' => $self,
285 %args );
286 }
287 # First check to see if a reading with this ID exists.
288 if( $self->reading( $reading->id ) ) {
63778331 289 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 290 }
c1915ab9 291 $self->_graphcalc_done(0);
3a2ebbf4 292 $self->_add_reading( $reading->id => $reading );
293 # Once the reading has been added, put it in both graphs.
294 $self->sequence->add_vertex( $reading->id );
22222af9 295 $self->relations->add_reading( $reading->id );
3a2ebbf4 296 return $reading;
eca16057 297};
298
3a2ebbf4 299around del_reading => sub {
300 my $orig = shift;
301 my $self = shift;
302 my $arg = shift;
303
304 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
305 $arg = $arg->id;
306 }
3a2ebbf4 307 # Remove the reading from the graphs.
c1915ab9 308 $self->_graphcalc_done(0);
4e483aa5 309 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 310 $self->sequence->delete_vertex( $arg );
22222af9 311 $self->relations->delete_reading( $arg );
3a2ebbf4 312
313 # Carry on.
314 $self->$orig( $arg );
315};
7854e12e 316
4e483aa5 317=begin testing
318
319use Text::Tradition;
320
321my $cxfile = 't/data/Collatex-16.xml';
322my $t = Text::Tradition->new(
323 'name' => 'inline',
324 'input' => 'CollateX',
325 'file' => $cxfile,
326 );
327my $c = $t->collation;
328
329my $rno = scalar $c->readings;
330# Split n21 for testing purposes
331my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
332my $old_r = $c->reading( 'n21' );
333$old_r->alter_text( 'to' );
334$c->del_path( 'n20', 'n21', 'A' );
335$c->add_path( 'n20', 'n21p0', 'A' );
336$c->add_path( 'n21p0', 'n21', 'A' );
337$c->flatten_ranks();
338ok( $c->reading( 'n21p0' ), "New reading exists" );
339is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
340
341# Combine n3 and n4
342$c->merge_readings( 'n3', 'n4', 1 );
343ok( !$c->reading('n4'), "Reading n4 is gone" );
344is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
345
346# Collapse n25 and n26
347$c->merge_readings( 'n25', 'n26' );
348ok( !$c->reading('n26'), "Reading n26 is gone" );
349is( $c->reading('n25')->text, 'rood', "Reading n25 has an unchanged word" );
350
351# Combine n21 and n21p0
352my $remaining = $c->reading('n21');
353$remaining ||= $c->reading('n22'); # one of these should still exist
354$c->merge_readings( 'n21p0', $remaining, 1 );
355ok( !$c->reading('n21'), "Reading $remaining is gone" );
356is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
357
358=end testing
359
360=cut
7854e12e 361
3a2ebbf4 362sub merge_readings {
363 my $self = shift;
364
365 # We only need the IDs for adding paths to the graph, not the reading
366 # objects themselves.
4e483aa5 367 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 368 $self->_graphcalc_done(0);
3a2ebbf4 369
370 # The kept reading should inherit the paths and the relationships
371 # of the deleted reading.
372 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
373 my @vector = ( $kept );
374 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
375 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 376 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 377 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
378 $self->sequence->add_edge( @vector );
379 my $fwits = $self->sequence->get_edge_attributes( @vector );
380 @wits{keys %$fwits} = values %$fwits;
381 $self->sequence->set_edge_attributes( @vector, \%wits );
382 }
22222af9 383 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 384
385 # Do the deletion deed.
4e483aa5 386 if( $combine ) {
49d4f2ac 387 my $kept_obj = $self->reading( $kept );
4e483aa5 388 my $del_obj = $self->reading( $deleted );
389 my $joinstr = $combine_char;
390 unless( defined $joinstr ) {
391 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
392 $joinstr = $self->wordsep unless defined $joinstr;
393 }
394 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 395 }
3a2ebbf4 396 $self->del_reading( $deleted );
397}
7854e12e 398
3265b0ce 399
3a2ebbf4 400# Helper function for manipulating the graph.
401sub _stringify_args {
4e483aa5 402 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 403 $first = $first->id
404 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
405 $second = $second->id
406 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 407 return( $first, $second, @args );
3a2ebbf4 408}
df6d9812 409
4e5a7b2c 410# Helper function for manipulating the graph.
411sub _objectify_args {
412 my( $self, $first, $second, $arg ) = @_;
413 $first = $self->reading( $first )
414 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
415 $second = $self->reading( $second )
416 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
417 return( $first, $second, $arg );
418}
3a2ebbf4 419### Path logic
420
421sub add_path {
422 my $self = shift;
423
424 # We only need the IDs for adding paths to the graph, not the reading
425 # objects themselves.
426 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
427
c1915ab9 428 $self->_graphcalc_done(0);
3a2ebbf4 429 # Connect the readings
430 $self->sequence->add_edge( $source, $target );
431 # Note the witness in question
432 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 433};
434
3a2ebbf4 435sub del_path {
436 my $self = shift;
49d4f2ac 437 my @args;
438 if( ref( $_[0] ) eq 'ARRAY' ) {
439 my $e = shift @_;
440 @args = ( @$e, @_ );
441 } else {
442 @args = @_;
443 }
3a2ebbf4 444
445 # We only need the IDs for adding paths to the graph, not the reading
446 # objects themselves.
49d4f2ac 447 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 448
c1915ab9 449 $self->_graphcalc_done(0);
3a2ebbf4 450 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 451 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 452 }
453 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
454 $self->sequence->delete_edge( $source, $target );
455 }
784877d9 456}
457
3a2ebbf4 458
15d2d3df 459# Extra graph-alike utility
460sub has_path {
3a2ebbf4 461 my $self = shift;
462 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
463 return undef unless $self->sequence->has_edge( $source, $target );
464 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 465}
466
4e5a7b2c 467=head2 clear_witness( @sigil_list )
3a2ebbf4 468
4e5a7b2c 469Clear the given witnesses out of the collation entirely, removing references
470to them in paths, and removing readings that belong only to them. Should only
471be called via $tradition->del_witness.
3a2ebbf4 472
473=cut
474
4e5a7b2c 475sub clear_witness {
476 my( $self, @sigils ) = @_;
477
c1915ab9 478 $self->_graphcalc_done(0);
4e5a7b2c 479 # Clear the witness(es) out of the paths
480 foreach my $e ( $self->paths ) {
481 foreach my $sig ( @sigils ) {
482 $self->del_path( $e, $sig );
483 }
484 }
485
486 # Clear out the newly unused readings
487 foreach my $r ( $self->readings ) {
488 unless( $self->reading_witnesses( $r ) ) {
489 $self->del_reading( $r );
490 }
491 }
492}
3a2ebbf4 493
494sub add_relationship {
495 my $self = shift;
22222af9 496 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 497 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 498 $self->reading( $source ), $target, $self->reading( $target ), $opts );
c1915ab9 499 $self->_graphcalc_done(0);
63778331 500 return @vectors;
22222af9 501}
ef9d481f 502
ca6e6095 503around qw/ get_relationship del_relationship / => sub {
504 my $orig = shift;
505 my $self = shift;
506 my @args = @_;
507 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
508 @args = @{$_[0]};
509 }
510 my( $source, $target ) = $self->_stringify_args( @args );
511 $self->$orig( $source, $target );
512};
513
22222af9 514=head2 reading_witnesses( $reading )
910a0a6d 515
22222af9 516Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 517
22222af9 518=cut
1d310495 519
1d310495 520sub reading_witnesses {
521 my( $self, $reading ) = @_;
522 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 523 # arbitrarily chosen "incoming". Thus, special-case the start node.
524 if( $reading eq $self->start ) {
525 return map { $_->sigil } $self->tradition->witnesses;
526 }
1d310495 527 my %all_witnesses;
528 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
529 my $wits = $self->sequence->get_edge_attributes( @$e );
530 @all_witnesses{ keys %$wits } = 1;
531 }
c12bb878 532 my $acstr = $self->ac_label;
533 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
534 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
535 }
1d310495 536 return keys %all_witnesses;
910a0a6d 537}
538
4e5a7b2c 539=head1 OUTPUT METHODS
8e1394aa 540
0ecb975c 541=head2 as_svg( \%options )
8e1394aa 542
0068967c 543Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 544See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 545
546=cut
547
548sub as_svg {
0ecb975c 549 my( $self, $opts ) = @_;
bfcbcecb 550 throw( "Need GraphViz installed to output SVG" )
551 unless File::Which::which( 'dot' );
e247aad1 552 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
c1915ab9 553 $self->calculate_ranks() unless $self->_graphcalc_done;
e247aad1 554 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 555 my @cmd = qw/dot -Tsvg/;
556 my( $svg, $err );
557 my $dotfile = File::Temp->new();
558 ## USE FOR DEBUGGING
559 # $dotfile->unlink_on_destroy(0);
560 binmode $dotfile, ':utf8';
561 print $dotfile $self->as_dot( $opts );
562 push( @cmd, $dotfile->filename );
563 run( \@cmd, ">", binary(), \$svg );
e247aad1 564 $svg = decode_utf8( $svg );
565 $self->cached_svg( $svg ) unless $want_subgraph;
566 return $svg;
567 } else {
568 return $self->cached_svg;
b365fbae 569 }
8e1394aa 570}
571
b22576c6 572
0ecb975c 573=head2 as_dot( \%options )
b22576c6 574
0ecb975c 575Returns a string that is the collation graph expressed in dot
576(i.e. GraphViz) format. Options include:
b22576c6 577
0ecb975c 578=over 4
b22576c6 579
0ecb975c 580=item * from
b22576c6 581
0ecb975c 582=item * to
df6d9812 583
0ecb975c 584=item * color_common
585
586=back
df6d9812 587
588=cut
589
590sub as_dot {
0ecb975c 591 my( $self, $opts ) = @_;
592 my $startrank = $opts->{'from'} if $opts;
593 my $endrank = $opts->{'to'} if $opts;
594 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 595 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
596 && $self->end->rank > 100;
597
b22576c6 598 # Check the arguments
599 if( $startrank ) {
600 return if $endrank && $startrank > $endrank;
601 return if $startrank > $self->end->rank;
602 }
603 if( defined $endrank ) {
604 return if $endrank < 0;
f1b3b33a 605 $endrank = undef if $endrank == $self->end->rank;
b22576c6 606 }
607
67da8d6c 608 my $graph_name = $self->tradition->name;
609 $graph_name =~ s/[^\w\s]//g;
610 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 611
612 my %graph_attrs = (
613 'rankdir' => 'LR',
614 'bgcolor' => 'none',
615 );
616 my %node_attrs = (
b8990398 617 'fontsize' => 14,
f13b5582 618 'fillcolor' => 'white',
619 'style' => 'filled',
620 'shape' => 'ellipse'
621 );
622 my %edge_attrs = (
623 'arrowhead' => 'open',
624 'color' => '#000000',
625 'fontcolor' => '#000000',
626 );
627
67da8d6c 628 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 629 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
630 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 631
b22576c6 632 # Output substitute start/end readings if necessary
633 if( $startrank ) {
634 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
635 }
636 if( $endrank ) {
637 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
638 }
b365fbae 639 if( $STRAIGHTENHACK ) {
640 ## HACK part 1
641 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
642 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
643 }
b22576c6 644 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 645 # Sort the readings by rank if we have ranks; this speeds layout.
646 my @all_readings = $self->end->has_rank
647 ? sort { $a->rank <=> $b->rank } $self->readings
648 : $self->readings;
4633f9e4 649 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 650 foreach my $reading ( @all_readings ) {
b22576c6 651 # Only output readings within our rank range.
652 next if $startrank && $reading->rank < $startrank;
653 next if $endrank && $reading->rank > $endrank;
654 $used{$reading->id} = 1;
910a0a6d 655 # Need not output nodes without separate labels
3a2ebbf4 656 next if $reading->id eq $reading->text;
d4b75f44 657 my $rattrs;
30f0df34 658 my $label = $reading->text;
629e27b0 659 $label .= '-' if $reading->join_next;
660 $label = "-$label" if $reading->join_prior;
8f9cab7b 661 $label =~ s/\"/\\\"/g;
d4b75f44 662 $rattrs->{'label'} = $label;
0ecb975c 663 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 664 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 665 }
3a2ebbf4 666
30ddc24c 667 # Add the real edges. Need to weight one edge per rank jump, in a
668 # continuous line.
b365fbae 669 # my $weighted = $self->_add_edge_weights;
b22576c6 670 my @edges = $self->paths;
3bdec618 671 my( %substart, %subend );
b22576c6 672 foreach my $edge ( @edges ) {
673 # Do we need to output this edge?
508fd430 674 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 675 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 676 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 677
b22576c6 678 # Account for the rank gap if necessary
30ddc24c 679 my $rank0 = $self->reading( $edge->[0] )->rank
680 if $self->reading( $edge->[0] )->has_rank;
681 my $rank1 = $self->reading( $edge->[1] )->rank
682 if $self->reading( $edge->[1] )->has_rank;
683 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
684 $variables->{'minlen'} = $rank1 - $rank0;
685 }
686
687 # Add the calculated edge weights
b365fbae 688 # if( exists $weighted->{$edge->[0]}
e247aad1 689 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
690 # # $variables->{'color'} = 'red';
691 # $variables->{'weight'} = 3.0;
692 # }
30ddc24c 693
508fd430 694 # EXPERIMENTAL: make edge width reflect no. of witnesses
695 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
696 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
697
f13b5582 698 my $varopts = _dot_attr_string( $variables );
699 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
700 $edge->[0], $edge->[1], $varopts );
3bdec618 701 } elsif( $used{$edge->[0]} ) {
702 $subend{$edge->[0]} = 1;
703 } elsif( $used{$edge->[1]} ) {
704 $substart{$edge->[1]} = 1;
b22576c6 705 }
df6d9812 706 }
3bdec618 707 # Add substitute start and end edges if necessary
708 foreach my $node ( keys %substart ) {
027d819c 709 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 710 my $variables = { %edge_attrs, 'label' => $witstr };
711 my $varopts = _dot_attr_string( $variables );
712 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 713 }
714 foreach my $node ( keys %subend ) {
027d819c 715 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 716 my $variables = { %edge_attrs, 'label' => $witstr };
717 my $varopts = _dot_attr_string( $variables );
718 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 719 }
b365fbae 720 # HACK part 2
721 if( $STRAIGHTENHACK ) {
722 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
723 }
30ddc24c 724
df6d9812 725 $dot .= "}\n";
726 return $dot;
727}
728
f13b5582 729sub _dot_attr_string {
730 my( $hash ) = @_;
731 my @attrs;
732 foreach my $k ( sort keys %$hash ) {
733 my $v = $hash->{$k};
734 push( @attrs, $k.'="'.$v.'"' );
735 }
736 return( '[ ' . join( ', ', @attrs ) . ' ]' );
737}
738
30ddc24c 739sub _add_edge_weights {
740 my $self = shift;
741 # Walk the graph from START to END, choosing the successor node with
742 # the largest number of witness paths each time.
743 my $weighted = {};
744 my $curr = $self->start->id;
008fc8a6 745 my $ranked = $self->end->has_rank;
30ddc24c 746 while( $curr ne $self->end->id ) {
008fc8a6 747 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 748 my @succ = sort { $self->path_witnesses( $curr, $a )
749 <=> $self->path_witnesses( $curr, $b ) }
750 $self->sequence->successors( $curr );
751 my $next = pop @succ;
008fc8a6 752 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 753 # Try to avoid lacunae in the weighted path.
008fc8a6 754 while( @succ &&
755 ( $self->reading( $next )->is_lacuna ||
756 $nextrank - $rank > 1 ) ){
30ddc24c 757 $next = pop @succ;
758 }
759 $weighted->{$curr} = $next;
760 $curr = $next;
761 }
762 return $weighted;
763}
764
027d819c 765=head2 path_witnesses( $edge )
766
767Returns the list of sigils whose witnesses are associated with the given edge.
768The edge can be passed as either an array or an arrayref of ( $source, $target ).
769
770=cut
771
3a2ebbf4 772sub path_witnesses {
773 my( $self, @edge ) = @_;
774 # If edge is an arrayref, cope.
775 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
776 my $e = shift @edge;
777 @edge = @$e;
778 }
779 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 780 return @wits;
3a2ebbf4 781}
782
027d819c 783sub _path_display_label {
508fd430 784 my $self = shift;
785 my @wits = sort @_;
8f9cab7b 786 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
787 if( scalar @wits > $maj ) {
f13b5582 788 # TODO break out a.c. wits
8f9cab7b 789 return 'majority';
790 } else {
791 return join( ', ', @wits );
792 }
793}
1dd07bda 794
795=head2 witnesses_at_rank
796
797Returns a list of witnesses that are not lacunose, for a given rank.
798
799=cut
800
801sub witnesses_at_rank {
802 my( $self, $rank ) = @_;
803}
8f9cab7b 804
4e5a7b2c 805=head2 as_graphml
8e1394aa 806
4e5a7b2c 807Returns a GraphML representation of the collation. The GraphML will contain
808two graphs. The first expresses the attributes of the readings and the witness
809paths that link them; the second expresses the relationships that link the
810readings. This is the native transfer format for a tradition.
8e1394aa 811
56eefa04 812=begin testing
813
814use Text::Tradition;
815
816my $READINGS = 311;
817my $PATHS = 361;
818
819my $datafile = 't/data/florilegium_tei_ps.xml';
820my $tradition = Text::Tradition->new( 'input' => 'TEI',
821 'name' => 'test0',
822 'file' => $datafile,
823 'linear' => 1 );
824
825ok( $tradition, "Got a tradition object" );
826is( scalar $tradition->witnesses, 13, "Found all witnesses" );
827ok( $tradition->collation, "Tradition has a collation" );
828
829my $c = $tradition->collation;
830is( scalar $c->readings, $READINGS, "Collation has all readings" );
831is( scalar $c->paths, $PATHS, "Collation has all paths" );
832is( scalar $c->relationships, 0, "Collation has all relationships" );
833
834# Add a few relationships
835$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
836$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
837$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
838
839# Now write it to GraphML and parse it again.
840
841my $graphml = $c->as_graphml;
842my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
843is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
844is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
845is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
846
847=end testing
848
8e1394aa 849=cut
850
851sub as_graphml {
3a2ebbf4 852 my( $self ) = @_;
3d14b48e 853 $self->calculate_ranks unless $self->_graphcalc_done;
854
8e1394aa 855 # Some namespaces
856 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
857 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
858 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 859 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 860
861 # Create the document and root node
862 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
863 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
864 $graphml->setDocumentElement( $root );
865 $root->setNamespace( $xsi_ns, 'xsi', 0 );
866 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
867
e309421a 868 # Add the data keys for the graph
869 my %graph_data_keys;
870 my $gdi = 0;
1d310495 871 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 872 foreach my $datum ( @graph_attributes ) {
873 $graph_data_keys{$datum} = 'dg'.$gdi++;
874 my $key = $root->addNewChild( $graphml_ns, 'key' );
875 $key->setAttribute( 'attr.name', $datum );
4e483aa5 876 $key->setAttribute( 'attr.type', $datum eq 'linear' ? 'boolean' : 'string' );
e309421a 877 $key->setAttribute( 'for', 'graph' );
878 $key->setAttribute( 'id', $graph_data_keys{$datum} );
879 }
f6066bac 880
8e1394aa 881 # Add the data keys for nodes
ef9d481f 882 my %node_data_keys;
883 my $ndi = 0;
3a2ebbf4 884 my %node_data = (
885 id => 'string',
255875b8 886 text => 'string',
3a2ebbf4 887 rank => 'string',
888 is_start => 'boolean',
889 is_end => 'boolean',
890 is_lacuna => 'boolean',
629e27b0 891 is_common => 'boolean',
892 join_prior => 'boolean',
893 join_next => 'boolean',
3a2ebbf4 894 );
895 foreach my $datum ( keys %node_data ) {
910a0a6d 896 $node_data_keys{$datum} = 'dn'.$ndi++;
897 my $key = $root->addNewChild( $graphml_ns, 'key' );
898 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 899 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 900 $key->setAttribute( 'for', 'node' );
901 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 902 }
903
df6d9812 904 # Add the data keys for edges, i.e. witnesses
ef9d481f 905 my $edi = 0;
906 my %edge_data_keys;
3a2ebbf4 907 my %edge_data = (
f523c7a8 908 class => 'string', # Class, deprecated soon
3a2ebbf4 909 witness => 'string', # ID/label for a path
910 relationship => 'string', # ID/label for a relationship
911 extra => 'boolean', # Path key
c84275ff 912 scope => 'string', # Relationship key
3d14b48e 913 annotation => 'string', # Relationship key
3a2ebbf4 914 non_correctable => 'boolean', # Relationship key
915 non_independent => 'boolean', # Relationship key
916 );
917 foreach my $datum ( keys %edge_data ) {
918 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 919 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 920 $key->setAttribute( 'attr.name', $datum );
921 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 922 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 923 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 924 }
3a2ebbf4 925
22222af9 926 # Add the collation graph itself
2c669bca 927 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
928 $sgraph->setAttribute( 'edgedefault', 'directed' );
929 $sgraph->setAttribute( 'id', $self->tradition->name );
930 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
931 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
932 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
933 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
934 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 935
e309421a 936 # Collation attribute data
937 foreach my $datum ( @graph_attributes ) {
2c669bca 938 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
939 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 940 }
8e1394aa 941
942 my $node_ctr = 0;
943 my %node_hash;
22222af9 944 # Add our readings to the graph
3a2ebbf4 945 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 946 # Add to the main graph
947 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 948 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 949 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 950 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 951 foreach my $d ( keys %node_data ) {
952 my $nval = $n->$d;
953 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
954 if defined $nval;
955 }
b15511bf 956 }
957
2c669bca 958 # Add the path edges to the sequence graph
df6d9812 959 my $edge_ctr = 0;
3a2ebbf4 960 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
961 # We add an edge in the graphml for every witness in $e.
508fd430 962 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 963 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
964 $node_hash{ $e->[0] },
965 $node_hash{ $e->[1] } );
2c669bca 966 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 967 $edge_el->setAttribute( 'source', $from );
968 $edge_el->setAttribute( 'target', $to );
969 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 970
971 # It's a witness path, so add the witness
972 my $base = $wit;
973 my $key = $edge_data_keys{'witness'};
974 # Is this an ante-corr witness?
975 my $aclabel = $self->ac_label;
976 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
977 # Keep the base witness
978 $base = $1;
979 # ...and record that this is an 'extra' reading path
980 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
981 }
982 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 983 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 984 }
985 }
986
22222af9 987 # Add the relationship graph to the XML
027d819c 988 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 989 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 990
94c00c71 991 # Save and return the thing
992 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 993 return $result;
df6d9812 994}
995
b15511bf 996sub _add_graphml_data {
997 my( $el, $key, $value ) = @_;
b15511bf 998 return unless defined $value;
c9bf3dbf 999 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1000 $data_el->setAttribute( 'key', $key );
1001 $data_el->appendText( $value );
8e1394aa 1002}
1003
4e5a7b2c 1004=head2 as_csv
910a0a6d 1005
1006Returns a CSV alignment table representation of the collation graph, one
2c669bca 1007row per witness (or witness uncorrected.)
910a0a6d 1008
1009=cut
1010
1011sub as_csv {
3a2ebbf4 1012 my( $self ) = @_;
1dd07bda 1013 my $table = $self->alignment_table;
910a0a6d 1014 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
1015 my @result;
2c669bca 1016 # Make the header row
1017 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1018 push( @result, decode_utf8( $csv->string ) );
1019 # Make the rest of the rows
1020 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1021 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1022 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1023 $csv->combine( @row );
910a0a6d 1024 push( @result, decode_utf8( $csv->string ) );
1025 }
3a2ebbf4 1026 return join( "\n", @result );
910a0a6d 1027}
1028
1dd07bda 1029=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1030
566f4595 1031Return a reference to an alignment table, in a slightly enhanced CollateX
1032format which looks like this:
1033
1034 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1035 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1036 { witness => "SIG2",
4e5a7b2c 1037 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1038 ... ],
1039 length => TEXTLEN };
1040
1041If $use_refs is set to 1, the reading object is returned in the table
1042instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1043
1044If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1045keys have a true hash value will be included.
2c669bca 1046
1047=cut
9f3ba6f7 1048
1dd07bda 1049sub alignment_table {
1050 my( $self ) = @_;
c1915ab9 1051 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1052 return $self->cached_table if $self->has_cached_table;
1053
0ecb975c 1054 # Make sure we can do this
1055 throw( "Need a linear graph in order to make an alignment table" )
1056 unless $self->linear;
1057 $self->calculate_ranks unless $self->end->has_rank;
1058
2c669bca 1059 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1060 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1061 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1062 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1063 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1064 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1065 push( @{$table->{'alignment'}},
1066 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1067 if( $wit->is_layered ) {
1068 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1069 $wit->sigil.$self->ac_label );
1dd07bda 1070 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1071 push( @{$table->{'alignment'}},
1072 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1073 }
1074 }
1dd07bda 1075 $self->cached_table( $table );
1076 return $table;
910a0a6d 1077}
1078
1079sub _make_witness_row {
1dd07bda 1080 my( $path, $positions ) = @_;
910a0a6d 1081 my %char_hash;
1082 map { $char_hash{$_} = undef } @$positions;
2c669bca 1083 my $debug = 0;
910a0a6d 1084 foreach my $rdg ( @$path ) {
eca16057 1085 my $rtext = $rdg->text;
1086 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1087 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1088 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1089 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1090 }
1091 my @row = map { $char_hash{$_} } @$positions;
eca16057 1092 # Fill in lacuna markers for undef spots in the row
1093 my $last_el = shift @row;
1094 my @filled_row = ( $last_el );
1095 foreach my $el ( @row ) {
0e476982 1096 # If we are using node reference, make the lacuna node appear many times
1097 # in the table. If not, use the lacuna tag.
1dd07bda 1098 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1099 $el = $last_el;
eca16057 1100 }
1101 push( @filled_row, $el );
1102 $last_el = $el;
1103 }
1104 return @filled_row;
910a0a6d 1105}
1106
4e5a7b2c 1107=head1 NAVIGATION METHODS
910a0a6d 1108
4e5a7b2c 1109=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1110
1111Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1112with $last, for the witness given in $sigil. If a $backup sigil is
1113specified (e.g. when walking a layered witness), it will be used wherever
1114no $sigil path exists. If there is a base text reading, that will be
1115used wherever no path exists for $sigil or $backup.
e2902068 1116
1117=cut
1118
910a0a6d 1119# TODO Think about returning some lazy-eval iterator.
b0b4421a 1120# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1121
e2902068 1122sub reading_sequence {
861c3e27 1123 my( $self, $start, $end, $witness ) = @_;
e2902068 1124
930ff666 1125 $witness = $self->baselabel unless $witness;
e2902068 1126 my @readings = ( $start );
1127 my %seen;
1128 my $n = $start;
3a2ebbf4 1129 while( $n && $n->id ne $end->id ) {
1130 if( exists( $seen{$n->id} ) ) {
63778331 1131 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1132 }
3a2ebbf4 1133 $seen{$n->id} = 1;
910a0a6d 1134
861c3e27 1135 my $next = $self->next_reading( $n, $witness );
44771cf2 1136 unless( $next ) {
63778331 1137 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1138 }
910a0a6d 1139 push( @readings, $next );
1140 $n = $next;
e2902068 1141 }
1142 # Check that the last reading is our end reading.
1143 my $last = $readings[$#readings];
63778331 1144 throw( "Last reading found from " . $start->text .
1145 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1146 unless $last->id eq $end->id;
e2902068 1147
1148 return @readings;
1149}
1150
4e5a7b2c 1151=head2 next_reading( $reading, $sigil );
8e1394aa 1152
4a8828f0 1153Returns the reading that follows the given reading along the given witness
930ff666 1154path.
8e1394aa 1155
1156=cut
1157
4a8828f0 1158sub next_reading {
e2902068 1159 # Return the successor via the corresponding path.
8e1394aa 1160 my $self = shift;
3a2ebbf4 1161 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1162 return undef unless $answer;
3a2ebbf4 1163 return $self->reading( $answer );
8e1394aa 1164}
1165
4e5a7b2c 1166=head2 prior_reading( $reading, $sigil )
8e1394aa 1167
4a8828f0 1168Returns the reading that precedes the given reading along the given witness
930ff666 1169path.
8e1394aa 1170
1171=cut
1172
4a8828f0 1173sub prior_reading {
e2902068 1174 # Return the predecessor via the corresponding path.
8e1394aa 1175 my $self = shift;
3a2ebbf4 1176 my $answer = $self->_find_linked_reading( 'prior', @_ );
1177 return $self->reading( $answer );
8e1394aa 1178}
1179
4a8828f0 1180sub _find_linked_reading {
861c3e27 1181 my( $self, $direction, $node, $path ) = @_;
1182
1183 # Get a backup if we are dealing with a layered witness
1184 my $alt_path;
1185 my $aclabel = $self->ac_label;
1186 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1187 $alt_path = $1;
1188 }
1189
e2902068 1190 my @linked_paths = $direction eq 'next'
3a2ebbf4 1191 ? $self->sequence->edges_from( $node )
1192 : $self->sequence->edges_to( $node );
e2902068 1193 return undef unless scalar( @linked_paths );
8e1394aa 1194
e2902068 1195 # We have to find the linked path that contains all of the
1196 # witnesses supplied in $path.
1197 my( @path_wits, @alt_path_wits );
4e5a7b2c 1198 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1199 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1200 my $base_le;
1201 my $alt_le;
1202 foreach my $le ( @linked_paths ) {
3a2ebbf4 1203 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1204 $base_le = $le;
910a0a6d 1205 }
508fd430 1206 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1207 if( _is_within( \@path_wits, \@le_wits ) ) {
1208 # This is the right path.
1209 return $direction eq 'next' ? $le->[1] : $le->[0];
1210 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1211 $alt_le = $le;
1212 }
8e1394aa 1213 }
e2902068 1214 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1215 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1216 if $alt_le;
e2902068 1217
1218 # Got this far? Return the base path if it exists.
3a2ebbf4 1219 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1220 if $base_le;
e2902068 1221
1222 # Got this far? We have no appropriate path.
2c669bca 1223 warn "Could not find $direction node from " . $node->id
910a0a6d 1224 . " along path $path";
8e1394aa 1225 return undef;
1226}
1227
4a8828f0 1228# Some set logic.
1229sub _is_within {
1230 my( $set1, $set2 ) = @_;
7854e12e 1231 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1232 foreach my $el ( @$set1 ) {
910a0a6d 1233 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1234 }
1235 return $ret;
1236}
1237
4e5a7b2c 1238# Return the string that joins together a list of witnesses for
1239# display on a single path.
1240sub _witnesses_of_label {
1241 my( $self, $label ) = @_;
1242 my $regex = $self->wit_list_separator;
1243 my @answer = split( /\Q$regex\E/, $label );
1244 return @answer;
b0b4421a 1245}
1246
d4b75f44 1247=head2 common_readings
1248
1249Returns the list of common readings in the graph (i.e. those readings that are
1250shared by all non-lacunose witnesses.)
1251
1252=cut
1253
1254sub common_readings {
1255 my $self = shift;
1256 my @common = grep { $_->is_common } $self->readings;
1257 return @common;
1258}
1259
b0b4421a 1260=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1261
1262Returns the text of a witness (plus its backup, if we are using a layer)
1263as stored in the collation. The text is returned as a string, where the
1264individual readings are joined with spaces and the meta-readings (e.g.
1265lacunae) are omitted. Optional specification of $start and $end allows
1266the generation of a subset of the witness text.
4e5a7b2c 1267
b0b4421a 1268=cut
1269
1270sub path_text {
861c3e27 1271 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1272 $start = $self->start unless $start;
1273 $end = $self->end unless $end;
861c3e27 1274 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1275 my $pathtext = '';
1276 my $last;
1277 foreach my $r ( @path ) {
1278 if( $r->join_prior || !$last || $last->join_next ) {
1279 $pathtext .= $r->text;
1280 } else {
1281 $pathtext .= ' ' . $r->text;
1282 }
1283 $last = $r;
1284 }
1285 return $pathtext;
b0b4421a 1286}
4e5a7b2c 1287
1288=head1 INITIALIZATION METHODS
1289
1290These are mostly for use by parsers.
1291
1292=head2 make_witness_path( $witness )
1293
1294Link the array of readings contained in $witness->path (and in
1295$witness->uncorrected_path if it exists) into collation paths.
1296Clear out the arrays when finished.
de51424a 1297
4e5a7b2c 1298=head2 make_witness_paths
1299
1300Call make_witness_path for all witnesses in the tradition.
1301
1302=cut
930ff666 1303
7e450e44 1304# For use when a collation is constructed from a base text and an apparatus.
1305# We have the sequences of readings and just need to add path edges.
1f7aa795 1306# When we are done, clear out the witness path attributes, as they are no
1307# longer needed.
1308# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1309
6a222840 1310sub make_witness_paths {
1311 my( $self ) = @_;
910a0a6d 1312 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1313 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1314 $self->make_witness_path( $wit );
7854e12e 1315 }
7854e12e 1316}
1317
6a222840 1318sub make_witness_path {
7854e12e 1319 my( $self, $wit ) = @_;
1320 my @chain = @{$wit->path};
15d2d3df 1321 my $sig = $wit->sigil;
7854e12e 1322 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1323 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1324 }
1f7aa795 1325 if( $wit->is_layered ) {
d9e873d0 1326 @chain = @{$wit->uncorrected_path};
1327 foreach my $idx( 0 .. $#chain-1 ) {
1328 my $source = $chain[$idx];
1329 my $target = $chain[$idx+1];
1330 $self->add_path( $source, $target, $sig.$self->ac_label )
1331 unless $self->has_path( $source, $target, $sig );
1332 }
15d2d3df 1333 }
1f7aa795 1334 $wit->clear_path;
1335 $wit->clear_uncorrected_path;
e2902068 1336}
1337
4e5a7b2c 1338=head2 calculate_ranks
1339
1340Calculate the reading ranks (that is, their aligned positions relative
1341to each other) for the graph. This can only be called on linear collations.
1342
b365fbae 1343=begin testing
1344
1345use Text::Tradition;
1346
1347my $cxfile = 't/data/Collatex-16.xml';
1348my $t = Text::Tradition->new(
1349 'name' => 'inline',
1350 'input' => 'CollateX',
1351 'file' => $cxfile,
1352 );
1353my $c = $t->collation;
1354
1355# Make an svg
bfcbcecb 1356my $table = $c->alignment_table;
1357ok( $c->has_cached_table, "Alignment table was cached" );
1358is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1359$c->calculate_ranks;
bfcbcecb 1360is( $c->alignment_table, $table, "Cached table retained with no rank change" );
b365fbae 1361$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1362isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1363
1364=end testing
1365
4e5a7b2c 1366=cut
1367
910a0a6d 1368sub calculate_ranks {
1369 my $self = shift;
b365fbae 1370 # Save the existing ranks, in case we need to invalidate the cached SVG.
1371 my %existing_ranks;
910a0a6d 1372 # Walk a version of the graph where every node linked by a relationship
1373 # edge is fundamentally the same node, and do a topological ranking on
1374 # the nodes in this graph.
c9bf3dbf 1375 my $topo_graph = Graph->new();
910a0a6d 1376 my %rel_containers;
1377 my $rel_ctr = 0;
1378 # Add the nodes
1379 foreach my $r ( $self->readings ) {
3a2ebbf4 1380 next if exists $rel_containers{$r->id};
910a0a6d 1381 my @rels = $r->related_readings( 'colocated' );
1382 if( @rels ) {
1383 # Make a relationship container.
1384 push( @rels, $r );
c9bf3dbf 1385 my $rn = 'rel_container_' . $rel_ctr++;
1386 $topo_graph->add_vertex( $rn );
910a0a6d 1387 foreach( @rels ) {
3a2ebbf4 1388 $rel_containers{$_->id} = $rn;
910a0a6d 1389 }
1390 } else {
1391 # Add a new node to mirror the old node.
3a2ebbf4 1392 $rel_containers{$r->id} = $r->id;
1393 $topo_graph->add_vertex( $r->id );
910a0a6d 1394 }
4a8828f0 1395 }
3a1f2523 1396
3a2ebbf4 1397 # Add the edges.
910a0a6d 1398 foreach my $r ( $self->readings ) {
b365fbae 1399 $existing_ranks{$r} = $r->rank;
3a2ebbf4 1400 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1401 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1402 $rel_containers{$n} );
4e5a7b2c 1403 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1404 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1405 }
1406 }
1407
1408 # Now do the rankings, starting with the start node.
3a2ebbf4 1409 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1410 my $node_ranks = { $topo_start => 0 };
910a0a6d 1411 my @curr_origin = ( $topo_start );
1412 # A little iterative function.
1413 while( @curr_origin ) {
c9bf3dbf 1414 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1415 }
1416 # Transfer our rankings from the topological graph to the real one.
1417 foreach my $r ( $self->readings ) {
3a2ebbf4 1418 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1419 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1420 } else {
63778331 1421 # Die. Find the last rank we calculated.
1422 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1423 <=> $node_ranks->{$rel_containers{$b->id}} }
1424 $self->readings;
1425 my $last = pop @all_defined;
1426 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1427 }
de51424a 1428 }
bfcbcecb 1429 # Do we need to invalidate the cached data?
1430 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1431 foreach my $r ( $self->readings ) {
7c293912 1432 next if defined( $existing_ranks{$r} )
1433 && $existing_ranks{$r} == $r->rank;
c1915ab9 1434 # Something has changed, so clear the cache
bfcbcecb 1435 $self->_clear_cache;
c1915ab9 1436 # ...and recalculate the common readings.
1437 $self->calculate_common_readings();
b365fbae 1438 last;
1439 }
1440 }
c1915ab9 1441 # The graph calculation information is now up to date.
1442 $self->_graphcalc_done(1);
8e1394aa 1443}
3a1f2523 1444
910a0a6d 1445sub _assign_rank {
c9bf3dbf 1446 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1447 # Look at each of the children of @current_nodes. If all the child's
1448 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1449 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1450 # parent gets a rank.
910a0a6d 1451 my @next_nodes;
1452 foreach my $c ( @current_nodes ) {
c9bf3dbf 1453 warn "Current reading $c has no rank!"
1454 unless exists $node_ranks->{$c};
1455 # print STDERR "Looking at child of node $c, rank "
1456 # . $node_ranks->{$c} . "\n";
1457 foreach my $child ( $graph->successors( $c ) ) {
1458 next if exists $node_ranks->{$child};
910a0a6d 1459 my $highest_rank = -1;
1460 my $skip = 0;
c9bf3dbf 1461 foreach my $parent ( $graph->predecessors( $child ) ) {
1462 if( exists $node_ranks->{$parent} ) {
1463 $highest_rank = $node_ranks->{$parent}
1464 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1465 } else {
1466 $skip = 1;
1467 last;
1468 }
1469 }
1470 next if $skip;
c9bf3dbf 1471 my $c_rank = $highest_rank + 1;
1472 # print STDERR "Assigning rank $c_rank to node $child \n";
1473 $node_ranks->{$child} = $c_rank;
910a0a6d 1474 push( @next_nodes, $child );
1475 }
1476 }
1477 return @next_nodes;
4cdd82f1 1478}
910a0a6d 1479
c1915ab9 1480sub _clear_cache {
1481 my $self = shift;
1482 $self->wipe_svg if $self->has_cached_svg;
1483 $self->wipe_table if $self->has_cached_table;
1484}
1485
1486
4e5a7b2c 1487=head2 flatten_ranks
1488
1489A convenience method for parsing collation data. Searches the graph for readings
1490with the same text at the same rank, and merges any that are found.
1491
1492=cut
1493
0e476982 1494sub flatten_ranks {
1495 my $self = shift;
1496 my %unique_rank_rdg;
1497 foreach my $rdg ( $self->readings ) {
1498 next unless $rdg->has_rank;
1499 my $key = $rdg->rank . "||" . $rdg->text;
1500 if( exists $unique_rank_rdg{$key} ) {
1501 # Combine!
56eefa04 1502 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1503 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1504 # TODO see if this now makes a common point.
0e476982 1505 } else {
1506 $unique_rank_rdg{$key} = $rdg;
1507 }
1508 }
1509}
4633f9e4 1510
1511
d4b75f44 1512=head2 calculate_common_readings
1513
1514Goes through the graph identifying the readings that appear in every witness
1515(apart from those with lacunae at that spot.) Marks them as common and returns
1516the list.
1517
1518=begin testing
1519
1520use Text::Tradition;
1521
1522my $cxfile = 't/data/Collatex-16.xml';
1523my $t = Text::Tradition->new(
1524 'name' => 'inline',
1525 'input' => 'CollateX',
1526 'file' => $cxfile,
1527 );
1528my $c = $t->collation;
1529
1530my @common = $c->calculate_common_readings();
1531is( scalar @common, 8, "Found correct number of common readings" );
1532my @marked = sort $c->common_readings();
1533is( scalar @common, 8, "All common readings got marked as such" );
1534my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1535is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1536
1537=end testing
1538
1539=cut
1540
1541sub calculate_common_readings {
1542 my $self = shift;
1543 my @common;
c1915ab9 1544 map { $_->is_common( 0 ) } $self->readings;
1545 # Implicitly calls calculate_ranks
1dd07bda 1546 my $table = $self->alignment_table;
d4b75f44 1547 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1548 my @row = map { $_->{'tokens'}->[$idx]
1549 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1550 @{$table->{'alignment'}};
d4b75f44 1551 my %hash;
1552 foreach my $r ( @row ) {
1553 if( $r ) {
1554 $hash{$r->id} = $r unless $r->is_meta;
1555 } else {
1556 $hash{'UNDEF'} = $r;
1557 }
1558 }
1559 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1560 my( $r ) = values %hash;
1561 $r->is_common( 1 );
1562 push( @common, $r );
1563 }
1564 }
1565 return @common;
1566}
1567
861c3e27 1568=head2 text_from_paths
1569
1570Calculate the text array for all witnesses from the path, for later consistency
1571checking. Only to be used if there is no non-graph-based way to know the
1572original texts.
1573
1574=cut
1575
1576sub text_from_paths {
1577 my $self = shift;
1578 foreach my $wit ( $self->tradition->witnesses ) {
1579 my @text = split( /\s+/,
1580 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1581 $wit->text( \@text );
1582 if( $wit->is_layered ) {
1583 my @uctext = split( /\s+/,
1584 $self->reading_sequence( $self->start, $self->end,
1585 $wit->sigil.$self->ac_label ) );
1586 $wit->text( \@uctext );
1587 }
1588 }
1589}
0e476982 1590
4e5a7b2c 1591=head1 UTILITY FUNCTIONS
1592
1593=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1594
4e5a7b2c 1595Find the last reading that occurs in sequence before both the given readings.
1596
1597=head2 common_successor( $reading_a, $reading_b )
1598
1599Find the first reading that occurs in sequence after both the given readings.
1600
22222af9 1601=begin testing
1602
1603use Text::Tradition;
1604
1605my $cxfile = 't/data/Collatex-16.xml';
1606my $t = Text::Tradition->new(
1607 'name' => 'inline',
1608 'input' => 'CollateX',
1609 'file' => $cxfile,
1610 );
1611my $c = $t->collation;
1612
4e5a7b2c 1613is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1614 'n20', "Found correct common predecessor" );
4e5a7b2c 1615is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1616 '#END#', "Found correct common successor" );
1617
4e5a7b2c 1618is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1619 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1620is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1621 '#END#', "Found correct common successor for readings on same path" );
1622
1623=end testing
1624
1625=cut
1626
1627## Return the closest reading that is a predecessor of both the given readings.
1628sub common_predecessor {
1629 my $self = shift;
4e5a7b2c 1630 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1631 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1632}
1633
1634sub common_successor {
1635 my $self = shift;
4e5a7b2c 1636 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1637 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1638}
1639
027d819c 1640sub _common_in_path {
22222af9 1641 my( $self, $r1, $r2, $dir ) = @_;
1642 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1643 $iter = $self->end->rank - $iter if $dir eq 'successors';
1644 my @candidates;
1645 my @last_checked = ( $r1, $r2 );
1646 my %all_seen;
1647 while( !@candidates ) {
1648 my @new_lc;
1649 foreach my $lc ( @last_checked ) {
1650 foreach my $p ( $lc->$dir ) {
1651 if( $all_seen{$p->id} ) {
1652 push( @candidates, $p );
1653 } else {
1654 $all_seen{$p->id} = 1;
1655 push( @new_lc, $p );
1656 }
1657 }
1658 }
1659 @last_checked = @new_lc;
1660 }
1661 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1662 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1663}
1664
63778331 1665sub throw {
1666 Text::Tradition::Error->throw(
1667 'ident' => 'Collation error',
1668 'message' => $_[0],
1669 );
1670}
1671
dd3b58b0 1672no Moose;
1673__PACKAGE__->meta->make_immutable;
e867486f 1674
027d819c 1675=head1 LICENSE
e867486f 1676
027d819c 1677This package is free software and is provided "as is" without express
1678or implied warranty. You can redistribute it and/or modify it under
1679the same terms as Perl itself.
e867486f 1680
027d819c 1681=head1 AUTHOR
e867486f 1682
027d819c 1683Tara L Andrews E<lt>aurum@cpan.orgE<gt>