fix some subgraph rendering issues
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 277 $self->_set_start( $self->add_reading(
278 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
279 $self->_set_end( $self->add_reading(
280 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 281}
784877d9 282
3a2ebbf4 283### Reading construct/destruct functions
284
285sub add_reading {
286 my( $self, $reading ) = @_;
287 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
288 my %args = %$reading;
e4b73942 289 if( $args{'init'} ) {
290 # If we are initializing an empty collation, don't assume that we
291 # have set a tradition.
292 delete $args{'init'};
293 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
6ad2ce78 294 $args{'language'} = $self->tradition->language;
295 }
3a2ebbf4 296 $reading = Text::Tradition::Collation::Reading->new(
297 'collation' => $self,
298 %args );
299 }
300 # First check to see if a reading with this ID exists.
301 if( $self->reading( $reading->id ) ) {
63778331 302 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 303 }
c1915ab9 304 $self->_graphcalc_done(0);
3a2ebbf4 305 $self->_add_reading( $reading->id => $reading );
306 # Once the reading has been added, put it in both graphs.
307 $self->sequence->add_vertex( $reading->id );
cecbe56d 308 $self->relations->add_reading( $reading->id );
3a2ebbf4 309 return $reading;
eca16057 310};
311
3a2ebbf4 312around del_reading => sub {
313 my $orig = shift;
314 my $self = shift;
315 my $arg = shift;
316
cecbe56d 317 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
318 $arg = $arg->id;
3a2ebbf4 319 }
3a2ebbf4 320 # Remove the reading from the graphs.
c1915ab9 321 $self->_graphcalc_done(0);
4e483aa5 322 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 323 $self->sequence->delete_vertex( $arg );
324 $self->relations->delete_reading( $arg );
3a2ebbf4 325
326 # Carry on.
cecbe56d 327 $self->$orig( $arg );
3a2ebbf4 328};
7854e12e 329
4e483aa5 330=begin testing
331
332use Text::Tradition;
333
334my $cxfile = 't/data/Collatex-16.xml';
335my $t = Text::Tradition->new(
336 'name' => 'inline',
337 'input' => 'CollateX',
338 'file' => $cxfile,
339 );
340my $c = $t->collation;
341
342my $rno = scalar $c->readings;
343# Split n21 for testing purposes
344my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
345my $old_r = $c->reading( 'n21' );
346$old_r->alter_text( 'to' );
347$c->del_path( 'n20', 'n21', 'A' );
348$c->add_path( 'n20', 'n21p0', 'A' );
349$c->add_path( 'n21p0', 'n21', 'A' );
350$c->flatten_ranks();
351ok( $c->reading( 'n21p0' ), "New reading exists" );
352is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
353
679f17e1 354# Combine n3 and n4 ( with his )
4e483aa5 355$c->merge_readings( 'n3', 'n4', 1 );
356ok( !$c->reading('n4'), "Reading n4 is gone" );
357is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
358
679f17e1 359# Collapse n9 and n10 ( rood / root )
360$c->merge_readings( 'n9', 'n10' );
361ok( !$c->reading('n10'), "Reading n10 is gone" );
362is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 363
364# Combine n21 and n21p0
365my $remaining = $c->reading('n21');
366$remaining ||= $c->reading('n22'); # one of these should still exist
367$c->merge_readings( 'n21p0', $remaining, 1 );
368ok( !$c->reading('n21'), "Reading $remaining is gone" );
369is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
370
371=end testing
372
373=cut
7854e12e 374
3a2ebbf4 375sub merge_readings {
376 my $self = shift;
377
56772e8c 378 # Sanity check
379 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
380 my $mergemeta = $kept_obj->is_meta;
381 throw( "Cannot merge meta and non-meta reading" )
382 unless ( $mergemeta && $del_obj->is_meta )
383 || ( !$mergemeta && !$del_obj->is_meta );
384 if( $mergemeta ) {
385 throw( "Cannot merge with start or end node" )
386 if( $kept_obj eq $self->start || $kept_obj eq $self->end
387 || $del_obj eq $self->start || $del_obj eq $self->end );
388 }
3a2ebbf4 389 # We only need the IDs for adding paths to the graph, not the reading
390 # objects themselves.
56772e8c 391 my $kept = $kept_obj->id;
392 my $deleted = $del_obj->id;
c1915ab9 393 $self->_graphcalc_done(0);
10943ab0 394
3a2ebbf4 395 # The kept reading should inherit the paths and the relationships
396 # of the deleted reading.
397 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
398 my @vector = ( $kept );
399 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
400 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 401 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 402 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
403 $self->sequence->add_edge( @vector );
404 my $fwits = $self->sequence->get_edge_attributes( @vector );
405 @wits{keys %$fwits} = values %$fwits;
406 $self->sequence->set_edge_attributes( @vector, \%wits );
407 }
cecbe56d 408 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 409
410 # Do the deletion deed.
4e483aa5 411 if( $combine ) {
4e483aa5 412 my $joinstr = $combine_char;
413 unless( defined $joinstr ) {
414 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
415 $joinstr = $self->wordsep unless defined $joinstr;
416 }
417 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 418 }
3a2ebbf4 419 $self->del_reading( $deleted );
420}
7854e12e 421
3265b0ce 422
3a2ebbf4 423# Helper function for manipulating the graph.
424sub _stringify_args {
4e483aa5 425 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 426 $first = $first->id
427 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
428 $second = $second->id
429 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 430 return( $first, $second, @args );
3a2ebbf4 431}
df6d9812 432
4e5a7b2c 433# Helper function for manipulating the graph.
434sub _objectify_args {
435 my( $self, $first, $second, $arg ) = @_;
436 $first = $self->reading( $first )
437 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
438 $second = $self->reading( $second )
439 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
440 return( $first, $second, $arg );
441}
3a2ebbf4 442### Path logic
443
444sub add_path {
445 my $self = shift;
446
447 # We only need the IDs for adding paths to the graph, not the reading
448 # objects themselves.
cecbe56d 449 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 450
c1915ab9 451 $self->_graphcalc_done(0);
3a2ebbf4 452 # Connect the readings
359944f7 453 unless( $self->sequence->has_edge( $source, $target ) ) {
454 $self->sequence->add_edge( $source, $target );
455 $self->relations->add_equivalence_edge( $source, $target );
456 }
3a2ebbf4 457 # Note the witness in question
458 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 459}
b15511bf 460
3a2ebbf4 461sub del_path {
462 my $self = shift;
49d4f2ac 463 my @args;
464 if( ref( $_[0] ) eq 'ARRAY' ) {
465 my $e = shift @_;
466 @args = ( @$e, @_ );
467 } else {
468 @args = @_;
469 }
3a2ebbf4 470
471 # We only need the IDs for adding paths to the graph, not the reading
472 # objects themselves.
49d4f2ac 473 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 474
c1915ab9 475 $self->_graphcalc_done(0);
3a2ebbf4 476 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 477 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 478 }
479 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
480 $self->sequence->delete_edge( $source, $target );
359944f7 481 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 482 }
784877d9 483}
484
3a2ebbf4 485
15d2d3df 486# Extra graph-alike utility
487sub has_path {
3a2ebbf4 488 my $self = shift;
489 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
490 return undef unless $self->sequence->has_edge( $source, $target );
491 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 492}
493
4e5a7b2c 494=head2 clear_witness( @sigil_list )
3a2ebbf4 495
4e5a7b2c 496Clear the given witnesses out of the collation entirely, removing references
497to them in paths, and removing readings that belong only to them. Should only
498be called via $tradition->del_witness.
3a2ebbf4 499
500=cut
501
4e5a7b2c 502sub clear_witness {
503 my( $self, @sigils ) = @_;
504
c1915ab9 505 $self->_graphcalc_done(0);
4e5a7b2c 506 # Clear the witness(es) out of the paths
507 foreach my $e ( $self->paths ) {
508 foreach my $sig ( @sigils ) {
509 $self->del_path( $e, $sig );
510 }
511 }
512
513 # Clear out the newly unused readings
514 foreach my $r ( $self->readings ) {
515 unless( $self->reading_witnesses( $r ) ) {
516 $self->del_reading( $r );
517 }
518 }
519}
3a2ebbf4 520
521sub add_relationship {
522 my $self = shift;
22222af9 523 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 524 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 525 $self->_graphcalc_done(0);
63778331 526 return @vectors;
22222af9 527}
ef9d481f 528
ca6e6095 529around qw/ get_relationship del_relationship / => sub {
530 my $orig = shift;
531 my $self = shift;
532 my @args = @_;
533 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
534 @args = @{$_[0]};
535 }
536 my( $source, $target ) = $self->_stringify_args( @args );
537 $self->$orig( $source, $target );
538};
539
22222af9 540=head2 reading_witnesses( $reading )
910a0a6d 541
22222af9 542Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 543
22222af9 544=cut
1d310495 545
1d310495 546sub reading_witnesses {
547 my( $self, $reading ) = @_;
548 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 549 # arbitrarily chosen "incoming". Thus, special-case the start node.
550 if( $reading eq $self->start ) {
551 return map { $_->sigil } $self->tradition->witnesses;
552 }
1d310495 553 my %all_witnesses;
554 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
555 my $wits = $self->sequence->get_edge_attributes( @$e );
556 @all_witnesses{ keys %$wits } = 1;
557 }
c12bb878 558 my $acstr = $self->ac_label;
559 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
560 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
561 }
1d310495 562 return keys %all_witnesses;
910a0a6d 563}
564
4e5a7b2c 565=head1 OUTPUT METHODS
8e1394aa 566
0ecb975c 567=head2 as_svg( \%options )
8e1394aa 568
0068967c 569Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 570See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 571
572=cut
573
574sub as_svg {
0ecb975c 575 my( $self, $opts ) = @_;
bfcbcecb 576 throw( "Need GraphViz installed to output SVG" )
577 unless File::Which::which( 'dot' );
e247aad1 578 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 579 $self->calculate_ranks()
580 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
e247aad1 581 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 582 my @cmd = qw/dot -Tsvg/;
583 my( $svg, $err );
584 my $dotfile = File::Temp->new();
585 ## USE FOR DEBUGGING
586 # $dotfile->unlink_on_destroy(0);
587 binmode $dotfile, ':utf8';
588 print $dotfile $self->as_dot( $opts );
589 push( @cmd, $dotfile->filename );
590 run( \@cmd, ">", binary(), \$svg );
e247aad1 591 $svg = decode_utf8( $svg );
592 $self->cached_svg( $svg ) unless $want_subgraph;
593 return $svg;
594 } else {
595 return $self->cached_svg;
b365fbae 596 }
8e1394aa 597}
598
b22576c6 599
0ecb975c 600=head2 as_dot( \%options )
b22576c6 601
0ecb975c 602Returns a string that is the collation graph expressed in dot
603(i.e. GraphViz) format. Options include:
b22576c6 604
0ecb975c 605=over 4
b22576c6 606
0ecb975c 607=item * from
b22576c6 608
0ecb975c 609=item * to
df6d9812 610
0ecb975c 611=item * color_common
612
613=back
df6d9812 614
615=cut
616
617sub as_dot {
0ecb975c 618 my( $self, $opts ) = @_;
619 my $startrank = $opts->{'from'} if $opts;
620 my $endrank = $opts->{'to'} if $opts;
621 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 622 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
623 && $self->end->rank > 100;
6648ee3d 624 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 625
b22576c6 626 # Check the arguments
627 if( $startrank ) {
628 return if $endrank && $startrank > $endrank;
629 return if $startrank > $self->end->rank;
630 }
631 if( defined $endrank ) {
632 return if $endrank < 0;
f1b3b33a 633 $endrank = undef if $endrank == $self->end->rank;
b22576c6 634 }
635
67da8d6c 636 my $graph_name = $self->tradition->name;
637 $graph_name =~ s/[^\w\s]//g;
638 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 639
640 my %graph_attrs = (
641 'rankdir' => 'LR',
642 'bgcolor' => 'none',
643 );
644 my %node_attrs = (
b8990398 645 'fontsize' => 14,
f13b5582 646 'fillcolor' => 'white',
647 'style' => 'filled',
648 'shape' => 'ellipse'
649 );
650 my %edge_attrs = (
651 'arrowhead' => 'open',
652 'color' => '#000000',
653 'fontcolor' => '#000000',
654 );
655
67da8d6c 656 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 657 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
658 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 659
b22576c6 660 # Output substitute start/end readings if necessary
661 if( $startrank ) {
43c94341 662 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 663 }
664 if( $endrank ) {
43c94341 665 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 666 }
b365fbae 667 if( $STRAIGHTENHACK ) {
668 ## HACK part 1
43c94341 669 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
670 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 671 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
672 }
b22576c6 673 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 674 # Sort the readings by rank if we have ranks; this speeds layout.
675 my @all_readings = $self->end->has_rank
676 ? sort { $a->rank <=> $b->rank } $self->readings
677 : $self->readings;
4633f9e4 678 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 679 foreach my $reading ( @all_readings ) {
b22576c6 680 # Only output readings within our rank range.
681 next if $startrank && $reading->rank < $startrank;
682 next if $endrank && $reading->rank > $endrank;
683 $used{$reading->id} = 1;
910a0a6d 684 # Need not output nodes without separate labels
3a2ebbf4 685 next if $reading->id eq $reading->text;
d4b75f44 686 my $rattrs;
30f0df34 687 my $label = $reading->text;
629e27b0 688 $label .= '-' if $reading->join_next;
689 $label = "-$label" if $reading->join_prior;
8f9cab7b 690 $label =~ s/\"/\\\"/g;
d4b75f44 691 $rattrs->{'label'} = $label;
10e4b1ac 692 $rattrs->{'id'} = $reading->id;
0ecb975c 693 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 694 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 695 }
3a2ebbf4 696
30ddc24c 697 # Add the real edges. Need to weight one edge per rank jump, in a
698 # continuous line.
b365fbae 699 # my $weighted = $self->_add_edge_weights;
b22576c6 700 my @edges = $self->paths;
3bdec618 701 my( %substart, %subend );
b22576c6 702 foreach my $edge ( @edges ) {
703 # Do we need to output this edge?
508fd430 704 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 705 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 706 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 707
b22576c6 708 # Account for the rank gap if necessary
30ddc24c 709 my $rank0 = $self->reading( $edge->[0] )->rank
710 if $self->reading( $edge->[0] )->has_rank;
711 my $rank1 = $self->reading( $edge->[1] )->rank
712 if $self->reading( $edge->[1] )->has_rank;
713 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
714 $variables->{'minlen'} = $rank1 - $rank0;
715 }
716
717 # Add the calculated edge weights
b365fbae 718 # if( exists $weighted->{$edge->[0]}
e247aad1 719 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
720 # # $variables->{'color'} = 'red';
721 # $variables->{'weight'} = 3.0;
722 # }
30ddc24c 723
508fd430 724 # EXPERIMENTAL: make edge width reflect no. of witnesses
725 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
726 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
727
f13b5582 728 my $varopts = _dot_attr_string( $variables );
729 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
730 $edge->[0], $edge->[1], $varopts );
3bdec618 731 } elsif( $used{$edge->[0]} ) {
96ba0418 732 $subend{$edge->[0]} = $edge->[1];
3bdec618 733 } elsif( $used{$edge->[1]} ) {
96ba0418 734 $substart{$edge->[1]} = $edge->[0];
b22576c6 735 }
df6d9812 736 }
3bdec618 737 # Add substitute start and end edges if necessary
738 foreach my $node ( keys %substart ) {
96ba0418 739 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 740 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 741 my $nrdg = $self->reading( $node );
742 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
743 # Substart is actually one lower than $startrank
744 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
745 }
f13b5582 746 my $varopts = _dot_attr_string( $variables );
96ba0418 747 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 748 }
749 foreach my $node ( keys %subend ) {
96ba0418 750 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 751 my $variables = { %edge_attrs, 'label' => $witstr };
752 my $varopts = _dot_attr_string( $variables );
96ba0418 753 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 754 }
b365fbae 755 # HACK part 2
756 if( $STRAIGHTENHACK ) {
43c94341 757 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
758 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 759 }
30ddc24c 760
df6d9812 761 $dot .= "}\n";
762 return $dot;
763}
764
f13b5582 765sub _dot_attr_string {
766 my( $hash ) = @_;
767 my @attrs;
768 foreach my $k ( sort keys %$hash ) {
769 my $v = $hash->{$k};
770 push( @attrs, $k.'="'.$v.'"' );
771 }
772 return( '[ ' . join( ', ', @attrs ) . ' ]' );
773}
774
30ddc24c 775sub _add_edge_weights {
776 my $self = shift;
777 # Walk the graph from START to END, choosing the successor node with
778 # the largest number of witness paths each time.
779 my $weighted = {};
780 my $curr = $self->start->id;
008fc8a6 781 my $ranked = $self->end->has_rank;
30ddc24c 782 while( $curr ne $self->end->id ) {
008fc8a6 783 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 784 my @succ = sort { $self->path_witnesses( $curr, $a )
785 <=> $self->path_witnesses( $curr, $b ) }
786 $self->sequence->successors( $curr );
787 my $next = pop @succ;
008fc8a6 788 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 789 # Try to avoid lacunae in the weighted path.
008fc8a6 790 while( @succ &&
791 ( $self->reading( $next )->is_lacuna ||
792 $nextrank - $rank > 1 ) ){
30ddc24c 793 $next = pop @succ;
794 }
795 $weighted->{$curr} = $next;
796 $curr = $next;
797 }
798 return $weighted;
799}
800
027d819c 801=head2 path_witnesses( $edge )
802
803Returns the list of sigils whose witnesses are associated with the given edge.
804The edge can be passed as either an array or an arrayref of ( $source, $target ).
805
806=cut
807
3a2ebbf4 808sub path_witnesses {
809 my( $self, @edge ) = @_;
810 # If edge is an arrayref, cope.
811 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
812 my $e = shift @edge;
813 @edge = @$e;
814 }
815 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 816 return @wits;
3a2ebbf4 817}
818
7f9f05e8 819# Helper function. Make a display label for the given witnesses, showing a.c.
820# witnesses only where the main witness is not also in the list.
027d819c 821sub _path_display_label {
508fd430 822 my $self = shift;
7f9f05e8 823 my %wits;
824 map { $wits{$_} = 1 } @_;
825
826 # If an a.c. wit is listed, remove it if the main wit is also listed.
827 # Otherwise keep it for explicit listing.
828 my $aclabel = $self->ac_label;
829 my @disp_ac;
830 foreach my $w ( sort keys %wits ) {
831 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
832 if( exists $wits{$1} ) {
833 delete $wits{$w};
834 } else {
835 push( @disp_ac, $w );
836 }
837 }
838 }
839
840 # See if we are in a majority situation.
8f9cab7b 841 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 842 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 843 if( scalar keys %wits > $maj ) {
844 unshift( @disp_ac, 'majority' );
845 return join( ', ', @disp_ac );
8f9cab7b 846 } else {
7f9f05e8 847 return join( ', ', sort keys %wits );
8f9cab7b 848 }
849}
1dd07bda 850
bf6e338d 851=head2 readings_at_rank( $rank )
1dd07bda 852
bf6e338d 853Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 854
855=cut
856
bf6e338d 857sub readings_at_rank {
1dd07bda 858 my( $self, $rank ) = @_;
bf6e338d 859 my $table = $self->alignment_table;
860 # Table rank is real rank - 1.
861 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
862 my %readings;
863 foreach my $e ( @elements ) {
864 next unless ref( $e ) eq 'HASH';
865 next unless exists $e->{'t'};
866 $readings{$e->{'t'}->id} = $e->{'t'};
867 }
868 return values %readings;
1dd07bda 869}
8f9cab7b 870
4e5a7b2c 871=head2 as_graphml
8e1394aa 872
4e5a7b2c 873Returns a GraphML representation of the collation. The GraphML will contain
874two graphs. The first expresses the attributes of the readings and the witness
875paths that link them; the second expresses the relationships that link the
876readings. This is the native transfer format for a tradition.
8e1394aa 877
56eefa04 878=begin testing
879
880use Text::Tradition;
881
882my $READINGS = 311;
883my $PATHS = 361;
884
885my $datafile = 't/data/florilegium_tei_ps.xml';
886my $tradition = Text::Tradition->new( 'input' => 'TEI',
887 'name' => 'test0',
888 'file' => $datafile,
889 'linear' => 1 );
890
891ok( $tradition, "Got a tradition object" );
892is( scalar $tradition->witnesses, 13, "Found all witnesses" );
893ok( $tradition->collation, "Tradition has a collation" );
894
895my $c = $tradition->collation;
896is( scalar $c->readings, $READINGS, "Collation has all readings" );
897is( scalar $c->paths, $PATHS, "Collation has all paths" );
898is( scalar $c->relationships, 0, "Collation has all relationships" );
899
900# Add a few relationships
901$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
902$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
903$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
904
905# Now write it to GraphML and parse it again.
906
907my $graphml = $c->as_graphml;
908my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
909is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
910is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
911is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
912
2a812726 913# Now add a stemma, write to GraphML, and parse again.
914my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
915is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
916is( $tradition->stemmata, 1, "Tradition now has the stemma" );
917$graphml = $c->as_graphml;
918like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
919
56eefa04 920=end testing
921
8e1394aa 922=cut
923
924sub as_graphml {
a30ca502 925 my( $self, $options ) = @_;
3d14b48e 926 $self->calculate_ranks unless $self->_graphcalc_done;
927
a30ca502 928 my $start = $options->{'from'}
929 ? $self->reading( $options->{'from'} ) : $self->start;
930 my $end = $options->{'to'}
931 ? $self->reading( $options->{'to'} ) : $self->end;
932 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
933 throw( 'Start node must be before end node' );
934 }
935 # The readings need to be ranked for this to work.
936 $start = $self->start unless $start->has_rank;
937 $end = $self->end unless $end->has_rank;
414cc046 938 my $rankoffset = 0;
939 unless( $start eq $self->start ) {
940 $rankoffset = $start->rank - 1;
941 }
a30ca502 942 my %use_readings;
943
8e1394aa 944 # Some namespaces
945 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
946 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
947 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 948 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 949
950 # Create the document and root node
951 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
952 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
953 $graphml->setDocumentElement( $root );
954 $root->setNamespace( $xsi_ns, 'xsi', 0 );
955 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 956
957 # List of attribute types to save on our objects and their corresponding
958 # GraphML types
959 my %save_types = (
960 'Str' => 'string',
961 'Int' => 'int',
962 'Bool' => 'boolean',
10e4b1ac 963 'ReadingID' => 'string',
bbd064a9 964 'RelationshipType' => 'string',
965 'RelationshipScope' => 'string',
966 );
967
968 # List of attribute names *not* to save on our objects.
969 # We will also not save any attribute beginning with _.
970 my %skipsave;
971 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 972
bbd064a9 973 # Add the data keys for the graph. Include an extra key 'version' for the
974 # GraphML output version.
e309421a 975 my %graph_data_keys;
976 my $gdi = 0;
bbd064a9 977 my %graph_attributes = ( 'version' => 'string' );
978 # Graph attributes include those of Tradition and those of Collation.
979 my %gattr_from;
980 my $tmeta = $self->tradition->meta;
981 my $cmeta = $self->meta;
982 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
983 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
984 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
985 next if $attr->name =~ /^_/;
986 next if $skipsave{$attr->name};
987 next unless $save_types{$attr->type_constraint->name};
988 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
989 }
2a812726 990 # Extra custom key for the tradition stemma(ta)
991 $graph_attributes{'stemmata'} = 'string';
bbd064a9 992
993 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 994 $graph_data_keys{$datum} = 'dg'.$gdi++;
995 my $key = $root->addNewChild( $graphml_ns, 'key' );
996 $key->setAttribute( 'attr.name', $datum );
bbd064a9 997 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 998 $key->setAttribute( 'for', 'graph' );
999 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1000 }
f6066bac 1001
bbd064a9 1002 # Add the data keys for reading nodes
1003 my %reading_attributes;
1004 my $rmeta = Text::Tradition::Collation::Reading->meta;
1005 foreach my $attr( $rmeta->get_all_attributes ) {
1006 next if $attr->name =~ /^_/;
1007 next if $skipsave{$attr->name};
1008 next unless $save_types{$attr->type_constraint->name};
1009 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1010 }
7cd9f181 1011 # Extra custom key for the reading morphology
1012 $reading_attributes{'lexemes'} = 'string';
1013
ef9d481f 1014 my %node_data_keys;
1015 my $ndi = 0;
bbd064a9 1016 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1017 $node_data_keys{$datum} = 'dn'.$ndi++;
1018 my $key = $root->addNewChild( $graphml_ns, 'key' );
1019 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1020 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1021 $key->setAttribute( 'for', 'node' );
1022 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1023 }
1024
bbd064a9 1025 # Add the data keys for edges, that is, paths and relationships. Path
1026 # data does not come from a Moose class so is here manually.
ef9d481f 1027 my $edi = 0;
1028 my %edge_data_keys;
bbd064a9 1029 my %edge_attributes = (
3a2ebbf4 1030 witness => 'string', # ID/label for a path
3a2ebbf4 1031 extra => 'boolean', # Path key
3a2ebbf4 1032 );
bbd064a9 1033 my @path_attributes = keys %edge_attributes; # track our manual additions
1034 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1035 foreach my $attr( $pmeta->get_all_attributes ) {
1036 next if $attr->name =~ /^_/;
1037 next if $skipsave{$attr->name};
1038 next unless $save_types{$attr->type_constraint->name};
1039 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1040 }
1041 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1042 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1043 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1044 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1045 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1046 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1047 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1048 }
3a2ebbf4 1049
cc31ebaa 1050 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1051 my $xmlidname = $self->tradition->name;
1052 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1053 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1054 $xmlidname = '_'.$xmlidname;
1055 }
2c669bca 1056 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1057 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1058 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1059 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1060 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1061 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1062 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1063 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1064
2a812726 1065 # Tradition/collation attribute data
bbd064a9 1066 foreach my $datum ( keys %graph_attributes ) {
1067 my $value;
1068 if( $datum eq 'version' ) {
2a812726 1069 $value = '3.2';
1070 } elsif( $datum eq 'stemmata' ) {
1071 my @stemstrs;
1072 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1073 $self->tradition->stemmata;
1074 $value = join( "\n", @stemstrs );
bbd064a9 1075 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1076 $value = $self->tradition->$datum;
1077 } else {
1078 $value = $self->$datum;
1079 }
2c669bca 1080 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1081 }
8e1394aa 1082
1083 my $node_ctr = 0;
1084 my %node_hash;
22222af9 1085 # Add our readings to the graph
3a2ebbf4 1086 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1087 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1088 ( $n->rank < $start->rank || $n->rank > $end->rank );
1089 $use_readings{$n->id} = 1;
2c669bca 1090 # Add to the main graph
1091 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1092 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1093 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1094 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1095 foreach my $d ( keys %reading_attributes ) {
255875b8 1096 my $nval = $n->$d;
7cd9f181 1097 # Custom serialization
1098 if( $d eq 'lexemes' ) {
1099 # If nval is a true value, we have lexemes so we need to
1100 # serialize them. Otherwise set nval to undef so that the
1101 # key is excluded from this reading.
1102 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1103 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1104 $nval = undef;
7cd9f181 1105 }
cc31ebaa 1106 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1107 # Adjust the ranks within the subgraph.
cc31ebaa 1108 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1109 : $nval - $rankoffset;
414cc046 1110 }
255875b8 1111 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1112 if defined $nval;
1113 }
b15511bf 1114 }
1115
2c669bca 1116 # Add the path edges to the sequence graph
df6d9812 1117 my $edge_ctr = 0;
3a2ebbf4 1118 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1119 # We add an edge in the graphml for every witness in $e.
a30ca502 1120 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1121 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1122 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1123 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1124 # Skip any path from start to end; that witness is not in the subgraph.
1125 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1126 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1127 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1128 $node_hash{ $e->[0] },
1129 $node_hash{ $e->[1] } );
2c669bca 1130 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1131 $edge_el->setAttribute( 'source', $from );
1132 $edge_el->setAttribute( 'target', $to );
1133 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1134
1135 # It's a witness path, so add the witness
1136 my $base = $wit;
1137 my $key = $edge_data_keys{'witness'};
1138 # Is this an ante-corr witness?
1139 my $aclabel = $self->ac_label;
1140 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1141 # Keep the base witness
1142 $base = $1;
1143 # ...and record that this is an 'extra' reading path
1144 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1145 }
1146 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1147 }
1148 }
1149
cc31ebaa 1150 # Report the actual number of nodes and edges that went in
1151 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1152 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1153
22222af9 1154 # Add the relationship graph to the XML
bbd064a9 1155 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1156 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1157 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1158
94c00c71 1159 # Save and return the thing
1160 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1161 return $result;
df6d9812 1162}
1163
b15511bf 1164sub _add_graphml_data {
1165 my( $el, $key, $value ) = @_;
b15511bf 1166 return unless defined $value;
c9bf3dbf 1167 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1168 $data_el->setAttribute( 'key', $key );
1169 $data_el->appendText( $value );
8e1394aa 1170}
1171
4e5a7b2c 1172=head2 as_csv
910a0a6d 1173
1174Returns a CSV alignment table representation of the collation graph, one
2c669bca 1175row per witness (or witness uncorrected.)
910a0a6d 1176
1177=cut
1178
1179sub as_csv {
3a2ebbf4 1180 my( $self ) = @_;
1dd07bda 1181 my $table = $self->alignment_table;
82fa4d57 1182 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1183 my @result;
2c669bca 1184 # Make the header row
1185 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1186 push( @result, decode_utf8( $csv->string ) );
1187 # Make the rest of the rows
1188 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1189 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1190 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1191 $csv->combine( @row );
910a0a6d 1192 push( @result, decode_utf8( $csv->string ) );
1193 }
3a2ebbf4 1194 return join( "\n", @result );
910a0a6d 1195}
1196
1dd07bda 1197=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1198
566f4595 1199Return a reference to an alignment table, in a slightly enhanced CollateX
1200format which looks like this:
1201
1202 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1203 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1204 { witness => "SIG2",
4e5a7b2c 1205 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1206 ... ],
1207 length => TEXTLEN };
1208
1209If $use_refs is set to 1, the reading object is returned in the table
1210instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1211
1212If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1213keys have a true hash value will be included.
2c669bca 1214
1215=cut
9f3ba6f7 1216
1dd07bda 1217sub alignment_table {
1218 my( $self ) = @_;
c1915ab9 1219 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1220 return $self->cached_table if $self->has_cached_table;
1221
0ecb975c 1222 # Make sure we can do this
1223 throw( "Need a linear graph in order to make an alignment table" )
1224 unless $self->linear;
1225 $self->calculate_ranks unless $self->end->has_rank;
1226
2c669bca 1227 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1228 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1229 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1230 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1231 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1232 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1233 push( @{$table->{'alignment'}},
1234 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1235 if( $wit->is_layered ) {
1236 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1237 $wit->sigil.$self->ac_label );
1dd07bda 1238 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1239 push( @{$table->{'alignment'}},
1240 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1241 }
1242 }
1dd07bda 1243 $self->cached_table( $table );
1244 return $table;
910a0a6d 1245}
1246
1247sub _make_witness_row {
1dd07bda 1248 my( $path, $positions ) = @_;
910a0a6d 1249 my %char_hash;
1250 map { $char_hash{$_} = undef } @$positions;
2c669bca 1251 my $debug = 0;
910a0a6d 1252 foreach my $rdg ( @$path ) {
eca16057 1253 my $rtext = $rdg->text;
1254 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1255 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1256 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1257 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1258 }
1259 my @row = map { $char_hash{$_} } @$positions;
eca16057 1260 # Fill in lacuna markers for undef spots in the row
1261 my $last_el = shift @row;
1262 my @filled_row = ( $last_el );
1263 foreach my $el ( @row ) {
0e476982 1264 # If we are using node reference, make the lacuna node appear many times
1265 # in the table. If not, use the lacuna tag.
1dd07bda 1266 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1267 $el = $last_el;
eca16057 1268 }
1269 push( @filled_row, $el );
1270 $last_el = $el;
1271 }
1272 return @filled_row;
910a0a6d 1273}
1274
4e5a7b2c 1275=head1 NAVIGATION METHODS
910a0a6d 1276
4e5a7b2c 1277=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1278
1279Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1280with $last, for the witness given in $sigil. If a $backup sigil is
1281specified (e.g. when walking a layered witness), it will be used wherever
1282no $sigil path exists. If there is a base text reading, that will be
1283used wherever no path exists for $sigil or $backup.
e2902068 1284
1285=cut
1286
910a0a6d 1287# TODO Think about returning some lazy-eval iterator.
b0b4421a 1288# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1289
e2902068 1290sub reading_sequence {
861c3e27 1291 my( $self, $start, $end, $witness ) = @_;
e2902068 1292
930ff666 1293 $witness = $self->baselabel unless $witness;
e2902068 1294 my @readings = ( $start );
1295 my %seen;
1296 my $n = $start;
3a2ebbf4 1297 while( $n && $n->id ne $end->id ) {
1298 if( exists( $seen{$n->id} ) ) {
63778331 1299 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1300 }
3a2ebbf4 1301 $seen{$n->id} = 1;
910a0a6d 1302
861c3e27 1303 my $next = $self->next_reading( $n, $witness );
44771cf2 1304 unless( $next ) {
63778331 1305 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1306 }
910a0a6d 1307 push( @readings, $next );
1308 $n = $next;
e2902068 1309 }
1310 # Check that the last reading is our end reading.
1311 my $last = $readings[$#readings];
63778331 1312 throw( "Last reading found from " . $start->text .
1313 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1314 unless $last->id eq $end->id;
e2902068 1315
1316 return @readings;
1317}
1318
4e5a7b2c 1319=head2 next_reading( $reading, $sigil );
8e1394aa 1320
4a8828f0 1321Returns the reading that follows the given reading along the given witness
930ff666 1322path.
8e1394aa 1323
1324=cut
1325
4a8828f0 1326sub next_reading {
e2902068 1327 # Return the successor via the corresponding path.
8e1394aa 1328 my $self = shift;
3a2ebbf4 1329 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1330 return undef unless $answer;
3a2ebbf4 1331 return $self->reading( $answer );
8e1394aa 1332}
1333
4e5a7b2c 1334=head2 prior_reading( $reading, $sigil )
8e1394aa 1335
4a8828f0 1336Returns the reading that precedes the given reading along the given witness
930ff666 1337path.
8e1394aa 1338
1339=cut
1340
4a8828f0 1341sub prior_reading {
e2902068 1342 # Return the predecessor via the corresponding path.
8e1394aa 1343 my $self = shift;
3a2ebbf4 1344 my $answer = $self->_find_linked_reading( 'prior', @_ );
1345 return $self->reading( $answer );
8e1394aa 1346}
1347
4a8828f0 1348sub _find_linked_reading {
861c3e27 1349 my( $self, $direction, $node, $path ) = @_;
1350
1351 # Get a backup if we are dealing with a layered witness
1352 my $alt_path;
1353 my $aclabel = $self->ac_label;
1354 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1355 $alt_path = $1;
1356 }
1357
e2902068 1358 my @linked_paths = $direction eq 'next'
3a2ebbf4 1359 ? $self->sequence->edges_from( $node )
1360 : $self->sequence->edges_to( $node );
e2902068 1361 return undef unless scalar( @linked_paths );
8e1394aa 1362
e2902068 1363 # We have to find the linked path that contains all of the
1364 # witnesses supplied in $path.
1365 my( @path_wits, @alt_path_wits );
4e5a7b2c 1366 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1367 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1368 my $base_le;
1369 my $alt_le;
1370 foreach my $le ( @linked_paths ) {
3a2ebbf4 1371 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1372 $base_le = $le;
910a0a6d 1373 }
508fd430 1374 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1375 if( _is_within( \@path_wits, \@le_wits ) ) {
1376 # This is the right path.
1377 return $direction eq 'next' ? $le->[1] : $le->[0];
1378 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1379 $alt_le = $le;
1380 }
8e1394aa 1381 }
e2902068 1382 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1383 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1384 if $alt_le;
e2902068 1385
1386 # Got this far? Return the base path if it exists.
3a2ebbf4 1387 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1388 if $base_le;
e2902068 1389
1390 # Got this far? We have no appropriate path.
2c669bca 1391 warn "Could not find $direction node from " . $node->id
910a0a6d 1392 . " along path $path";
8e1394aa 1393 return undef;
1394}
1395
4a8828f0 1396# Some set logic.
1397sub _is_within {
1398 my( $set1, $set2 ) = @_;
7854e12e 1399 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1400 foreach my $el ( @$set1 ) {
910a0a6d 1401 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1402 }
1403 return $ret;
1404}
1405
4e5a7b2c 1406# Return the string that joins together a list of witnesses for
1407# display on a single path.
1408sub _witnesses_of_label {
1409 my( $self, $label ) = @_;
1410 my $regex = $self->wit_list_separator;
1411 my @answer = split( /\Q$regex\E/, $label );
1412 return @answer;
b0b4421a 1413}
1414
d4b75f44 1415=head2 common_readings
1416
1417Returns the list of common readings in the graph (i.e. those readings that are
1418shared by all non-lacunose witnesses.)
1419
1420=cut
1421
1422sub common_readings {
1423 my $self = shift;
1424 my @common = grep { $_->is_common } $self->readings;
1425 return @common;
1426}
1427
fae52efd 1428=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1429
1430Returns the text of a witness (plus its backup, if we are using a layer)
1431as stored in the collation. The text is returned as a string, where the
1432individual readings are joined with spaces and the meta-readings (e.g.
1433lacunae) are omitted. Optional specification of $start and $end allows
1434the generation of a subset of the witness text.
4e5a7b2c 1435
b0b4421a 1436=cut
1437
1438sub path_text {
861c3e27 1439 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1440 $start = $self->start unless $start;
1441 $end = $self->end unless $end;
861c3e27 1442 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1443 my $pathtext = '';
1444 my $last;
1445 foreach my $r ( @path ) {
6ad2ce78 1446 unless ( $r->join_prior || !$last || $last->join_next ) {
1447 $pathtext .= ' ';
1448 }
1449 $pathtext .= $r->text;
629e27b0 1450 $last = $r;
1451 }
1452 return $pathtext;
b0b4421a 1453}
4e5a7b2c 1454
1455=head1 INITIALIZATION METHODS
1456
1457These are mostly for use by parsers.
1458
1459=head2 make_witness_path( $witness )
1460
1461Link the array of readings contained in $witness->path (and in
1462$witness->uncorrected_path if it exists) into collation paths.
1463Clear out the arrays when finished.
de51424a 1464
4e5a7b2c 1465=head2 make_witness_paths
1466
1467Call make_witness_path for all witnesses in the tradition.
1468
1469=cut
930ff666 1470
7e450e44 1471# For use when a collation is constructed from a base text and an apparatus.
1472# We have the sequences of readings and just need to add path edges.
1f7aa795 1473# When we are done, clear out the witness path attributes, as they are no
1474# longer needed.
1475# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1476
6a222840 1477sub make_witness_paths {
1478 my( $self ) = @_;
910a0a6d 1479 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1480 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1481 $self->make_witness_path( $wit );
7854e12e 1482 }
7854e12e 1483}
1484
6a222840 1485sub make_witness_path {
7854e12e 1486 my( $self, $wit ) = @_;
1487 my @chain = @{$wit->path};
15d2d3df 1488 my $sig = $wit->sigil;
fae52efd 1489 # Add start and end if necessary
1490 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1491 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1492 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1493 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1494 }
1f7aa795 1495 if( $wit->is_layered ) {
d9e873d0 1496 @chain = @{$wit->uncorrected_path};
fae52efd 1497 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1498 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1499 foreach my $idx( 0 .. $#chain-1 ) {
1500 my $source = $chain[$idx];
1501 my $target = $chain[$idx+1];
1502 $self->add_path( $source, $target, $sig.$self->ac_label )
1503 unless $self->has_path( $source, $target, $sig );
1504 }
15d2d3df 1505 }
1f7aa795 1506 $wit->clear_path;
1507 $wit->clear_uncorrected_path;
e2902068 1508}
1509
4e5a7b2c 1510=head2 calculate_ranks
1511
1512Calculate the reading ranks (that is, their aligned positions relative
1513to each other) for the graph. This can only be called on linear collations.
1514
b365fbae 1515=begin testing
1516
1517use Text::Tradition;
1518
1519my $cxfile = 't/data/Collatex-16.xml';
1520my $t = Text::Tradition->new(
1521 'name' => 'inline',
1522 'input' => 'CollateX',
1523 'file' => $cxfile,
1524 );
1525my $c = $t->collation;
1526
1527# Make an svg
bfcbcecb 1528my $table = $c->alignment_table;
1529ok( $c->has_cached_table, "Alignment table was cached" );
1530is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1531$c->calculate_ranks;
bfcbcecb 1532is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1533$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1534isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1535
1536=end testing
1537
4e5a7b2c 1538=cut
1539
910a0a6d 1540sub calculate_ranks {
1541 my $self = shift;
b365fbae 1542 # Save the existing ranks, in case we need to invalidate the cached SVG.
1543 my %existing_ranks;
ac4d7ac2 1544 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1545
1546 # Do the rankings based on the relationship equivalence graph, starting
1547 # with the start node.
56772e8c 1548 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1549
910a0a6d 1550 # Transfer our rankings from the topological graph to the real one.
1551 foreach my $r ( $self->readings ) {
cecbe56d 1552 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1553 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1554 } else {
63778331 1555 # Die. Find the last rank we calculated.
359944f7 1556 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1557 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1558 $self->readings;
1559 my $last = pop @all_defined;
1560 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1561 }
de51424a 1562 }
bfcbcecb 1563 # Do we need to invalidate the cached data?
1564 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1565 foreach my $r ( $self->readings ) {
7c293912 1566 next if defined( $existing_ranks{$r} )
1567 && $existing_ranks{$r} == $r->rank;
c1915ab9 1568 # Something has changed, so clear the cache
bfcbcecb 1569 $self->_clear_cache;
c1915ab9 1570 # ...and recalculate the common readings.
1571 $self->calculate_common_readings();
b365fbae 1572 last;
1573 }
1574 }
c1915ab9 1575 # The graph calculation information is now up to date.
1576 $self->_graphcalc_done(1);
8e1394aa 1577}
3a1f2523 1578
c1915ab9 1579sub _clear_cache {
1580 my $self = shift;
1581 $self->wipe_svg if $self->has_cached_svg;
1582 $self->wipe_table if $self->has_cached_table;
1583}
1584
1585
4e5a7b2c 1586=head2 flatten_ranks
1587
1588A convenience method for parsing collation data. Searches the graph for readings
1589with the same text at the same rank, and merges any that are found.
1590
1591=cut
1592
0e476982 1593sub flatten_ranks {
1594 my $self = shift;
1595 my %unique_rank_rdg;
bf6e338d 1596 my $changed;
0e476982 1597 foreach my $rdg ( $self->readings ) {
1598 next unless $rdg->has_rank;
1599 my $key = $rdg->rank . "||" . $rdg->text;
1600 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1601 # Make sure they don't have different grammatical forms
1602 my $ur = $unique_rank_rdg{$key};
1603 if( $rdg->disambiguated && $ur->disambiguated ) {
1604 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1605 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1606 next unless $rform eq $uform;
1607 } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1608 next;
1609 }
0e476982 1610 # Combine!
cecbe56d 1611 #print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1612 $changed = 1;
0e476982 1613 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1614 # TODO see if this now makes a common point.
0e476982 1615 } else {
1616 $unique_rank_rdg{$key} = $rdg;
1617 }
1618 }
bf6e338d 1619 # If we merged readings, the ranks are still fine but the alignment
1620 # table is wrong. Wipe it.
1621 $self->wipe_table() if $changed;
0e476982 1622}
4633f9e4 1623
1624
d4b75f44 1625=head2 calculate_common_readings
1626
1627Goes through the graph identifying the readings that appear in every witness
1628(apart from those with lacunae at that spot.) Marks them as common and returns
1629the list.
1630
1631=begin testing
1632
1633use Text::Tradition;
1634
1635my $cxfile = 't/data/Collatex-16.xml';
1636my $t = Text::Tradition->new(
1637 'name' => 'inline',
1638 'input' => 'CollateX',
1639 'file' => $cxfile,
1640 );
1641my $c = $t->collation;
1642
1643my @common = $c->calculate_common_readings();
1644is( scalar @common, 8, "Found correct number of common readings" );
1645my @marked = sort $c->common_readings();
1646is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1647my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1648is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1649
1650=end testing
1651
1652=cut
1653
1654sub calculate_common_readings {
1655 my $self = shift;
1656 my @common;
c1915ab9 1657 map { $_->is_common( 0 ) } $self->readings;
1658 # Implicitly calls calculate_ranks
1dd07bda 1659 my $table = $self->alignment_table;
d4b75f44 1660 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1661 my @row = map { $_->{'tokens'}->[$idx]
1662 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1663 @{$table->{'alignment'}};
d4b75f44 1664 my %hash;
1665 foreach my $r ( @row ) {
1666 if( $r ) {
1667 $hash{$r->id} = $r unless $r->is_meta;
1668 } else {
1669 $hash{'UNDEF'} = $r;
1670 }
1671 }
1672 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1673 my( $r ) = values %hash;
1674 $r->is_common( 1 );
1675 push( @common, $r );
1676 }
1677 }
1678 return @common;
1679}
1680
861c3e27 1681=head2 text_from_paths
1682
1683Calculate the text array for all witnesses from the path, for later consistency
1684checking. Only to be used if there is no non-graph-based way to know the
1685original texts.
1686
1687=cut
1688
1689sub text_from_paths {
1690 my $self = shift;
1691 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1692 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1693 my @text;
1694 foreach my $r ( @readings ) {
1695 next if $r->is_meta;
1696 push( @text, $r->text );
1697 }
861c3e27 1698 $wit->text( \@text );
1699 if( $wit->is_layered ) {
5164a6f0 1700 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1701 $wit->sigil.$self->ac_label );
1702 my @uctext;
1703 foreach my $r ( @ucrdgs ) {
1704 next if $r->is_meta;
1705 push( @uctext, $r->text );
1706 }
1707 $wit->layertext( \@uctext );
861c3e27 1708 }
1709 }
1710}
0e476982 1711
4e5a7b2c 1712=head1 UTILITY FUNCTIONS
1713
1714=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1715
4e5a7b2c 1716Find the last reading that occurs in sequence before both the given readings.
414cc046 1717At the very least this should be $self->start.
4e5a7b2c 1718
1719=head2 common_successor( $reading_a, $reading_b )
1720
1721Find the first reading that occurs in sequence after both the given readings.
414cc046 1722At the very least this should be $self->end.
4e5a7b2c 1723
22222af9 1724=begin testing
1725
1726use Text::Tradition;
1727
1728my $cxfile = 't/data/Collatex-16.xml';
1729my $t = Text::Tradition->new(
1730 'name' => 'inline',
1731 'input' => 'CollateX',
1732 'file' => $cxfile,
1733 );
1734my $c = $t->collation;
1735
679f17e1 1736is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1737 'n20', "Found correct common predecessor" );
679f17e1 1738is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1739 '__END__', "Found correct common successor" );
22222af9 1740
4e5a7b2c 1741is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1742 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1743is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1744 '__END__', "Found correct common successor for readings on same path" );
22222af9 1745
1746=end testing
1747
1748=cut
1749
1750## Return the closest reading that is a predecessor of both the given readings.
1751sub common_predecessor {
1752 my $self = shift;
4e5a7b2c 1753 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1754 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1755}
1756
1757sub common_successor {
1758 my $self = shift;
4e5a7b2c 1759 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1760 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1761}
1762
414cc046 1763
1764# TODO think about how to do this without ranks...
027d819c 1765sub _common_in_path {
22222af9 1766 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1767 my $iter = $self->end->rank;
22222af9 1768 my @candidates;
414cc046 1769 my @last_r1 = ( $r1 );
1770 my @last_r2 = ( $r2 );
1771 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1772 my %all_seen;
414cc046 1773 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1774 while( !@candidates ) {
414cc046 1775 last unless $iter--; # Avoid looping infinitely
1776 # Iterate separately down the graph from r1 and r2
1777 my( @new_lc1, @new_lc2 );
1778 foreach my $lc ( @last_r1 ) {
1779 foreach my $p ( $lc->$dir ) {
1780 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1781 # print STDERR "Path candidate $p from $lc\n";
1782 push( @candidates, $p );
002e3600 1783 } elsif( !$all_seen{$p->id} ) {
414cc046 1784 $all_seen{$p->id} = 'r1';
1785 push( @new_lc1, $p );
1786 }
1787 }
1788 }
1789 foreach my $lc ( @last_r2 ) {
22222af9 1790 foreach my $p ( $lc->$dir ) {
414cc046 1791 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1792 # print STDERR "Path candidate $p from $lc\n";
22222af9 1793 push( @candidates, $p );
002e3600 1794 } elsif( !$all_seen{$p->id} ) {
414cc046 1795 $all_seen{$p->id} = 'r2';
1796 push( @new_lc2, $p );
22222af9 1797 }
1798 }
1799 }
414cc046 1800 @last_r1 = @new_lc1;
1801 @last_r2 = @new_lc2;
22222af9 1802 }
1803 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1804 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1805}
1806
63778331 1807sub throw {
1808 Text::Tradition::Error->throw(
1809 'ident' => 'Collation error',
1810 'message' => $_[0],
1811 );
1812}
1813
dd3b58b0 1814no Moose;
1815__PACKAGE__->meta->make_immutable;
e867486f 1816
027d819c 1817=head1 LICENSE
e867486f 1818
027d819c 1819This package is free software and is provided "as is" without express
1820or implied warranty. You can redistribute it and/or modify it under
1821the same terms as Perl itself.
e867486f 1822
027d819c 1823=head1 AUTHOR
e867486f 1824
027d819c 1825Tara L Andrews E<lt>aurum@cpan.orgE<gt>