invalidate alignment table cache on relationship add if appropriate
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
82fa4d57 9use Text::CSV;
5c0072ef 10use Text::Tradition::Collation::Data;
b15511bf 11use Text::Tradition::Collation::Reading;
22222af9 12use Text::Tradition::Collation::RelationshipStore;
63778331 13use Text::Tradition::Error;
cc31ebaa 14use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
5c0072ef 15use XML::LibXML;
16use XML::LibXML::XPathContext;
dd3b58b0 17use Moose;
18
5c0072ef 19has _data => (
20 isa => 'Text::Tradition::Collation::Data',
21 is => 'ro',
22 required => 1,
23 handles => [ qw(
24 sequence
25 paths
26 _set_relations
27 relations
28 _set_start
29 _set_end
30 ac_label
31 has_cached_table
32 relationships
33 related_readings
34 get_relationship
35 del_relationship
36 equivalence
37 equivalence_graph
38 readings
39 reading
40 _add_reading
41 del_reading
42 has_reading
43 wit_list_separator
44 baselabel
45 linear
46 wordsep
47 start
48 end
49 cached_table
50 _graphcalc_done
51 has_cached_svg
52 wipe_table
53 )]
54);
dd3b58b0 55
3a2ebbf4 56has 'tradition' => (
57 is => 'ro',
d047cd52 58 isa => 'Text::Tradition',
8cfd99c4 59 writer => '_set_tradition',
8d9a1cd8 60 weak_ref => 1,
d047cd52 61 );
dd3b58b0 62
4e5a7b2c 63=head1 NAME
64
65Text::Tradition::Collation - a software model for a text collation
66
67=head1 SYNOPSIS
68
69 use Text::Tradition;
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
72 'input' => 'TEI',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
74
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
79
80 my $svg_variant_graph = $t->collation->as_svg();
81
82=head1 DESCRIPTION
83
84Text::Tradition is a library for representation and analysis of collated
85texts, particularly medieval ones. The Collation is the central feature of
86a Tradition, where the text, its sequence of readings, and its relationships
87between readings are actually kept.
88
89=head1 CONSTRUCTOR
90
91=head2 new
92
93The constructor. Takes a hash or hashref of the following arguments:
94
95=over
96
97=item * tradition - The Text::Tradition object to which the collation
98belongs. Required.
99
100=item * linear - Whether the collation should be linear; that is, whether
101transposed readings should be treated as two linked readings rather than one,
102and therefore whether the collation graph is acyclic. Defaults to true.
103
4e5a7b2c 104=item * baselabel - The default label for the path taken by a base text
105(if any). Defaults to 'base text'.
106
107=item * wit_list_separator - The string to join a list of witnesses for
108purposes of making labels in display graphs. Defaults to ', '.
109
110=item * ac_label - The extra label to tack onto a witness sigil when
111representing another layer of path for the given witness - that is, when
112a text has more than one possible reading due to scribal corrections or
113the like. Defaults to ' (a.c.)'.
114
4e483aa5 115=item * wordsep - The string used to separate words in the original text.
116Defaults to ' '.
117
4e5a7b2c 118=back
119
120=head1 ACCESSORS
121
122=head2 tradition
123
124=head2 linear
125
4e5a7b2c 126=head2 wit_list_separator
127
128=head2 baselabel
129
130=head2 ac_label
131
4e483aa5 132=head2 wordsep
133
4e5a7b2c 134Simple accessors for collation attributes.
135
136=head2 start
137
138The meta-reading at the start of every witness path.
139
140=head2 end
141
142The meta-reading at the end of every witness path.
143
144=head2 readings
145
146Returns all Reading objects in the graph.
147
148=head2 reading( $id )
149
150Returns the Reading object corresponding to the given ID.
151
152=head2 add_reading( $reading_args )
153
154Adds a new reading object to the collation.
155See L<Text::Tradition::Collation::Reading> for the available arguments.
156
157=head2 del_reading( $object_or_id )
158
159Removes the given reading from the collation, implicitly removing its
160paths and relationships.
161
4e483aa5 162=head2 merge_readings( $main, $second, $concatenate, $with_str )
163
164Merges the $second reading into the $main one. If $concatenate is true, then
165the merged node will carry the text of both readings, concatenated with either
166$with_str (if specified) or a sensible default (the empty string if the
167appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 168
4e483aa5 169The first two arguments may be either readings or reading IDs.
4e5a7b2c 170
171=head2 has_reading( $id )
172
173Predicate to see whether a given reading ID is in the graph.
174
175=head2 reading_witnesses( $object_or_id )
176
177Returns a list of sigils whose witnesses contain the reading.
178
179=head2 paths
180
181Returns all reading paths within the document - that is, all edges in the
182collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
183
184=head2 add_path( $source, $target, $sigil )
185
186Links the given readings in the collation in sequence, under the given witness
187sigil. The readings may be specified by object or ID.
188
189=head2 del_path( $source, $target, $sigil )
190
191Links the given readings in the collation in sequence, under the given witness
192sigil. The readings may be specified by object or ID.
193
194=head2 has_path( $source, $target );
195
196Returns true if the two readings are linked in sequence in any witness.
197The readings may be specified by object or ID.
198
199=head2 relationships
200
201Returns all Relationship objects in the collation.
202
203=head2 add_relationship( $reading, $other_reading, $options )
204
205Adds a new relationship of the type given in $options between the two readings,
206which may be specified by object or ID. Returns a value of ( $status, @vectors)
207where $status is true on success, and @vectors is a list of relationship edges
208that were ultimately added.
209See L<Text::Tradition::Collation::Relationship> for the available options.
210
211=cut
dd3b58b0 212
5c0072ef 213sub BUILDARGS {
214 my ( $class, @args ) = @_;
215 my %args = @args == 1 ? %{ $args[0] } : @args;
216 # TODO determine these from the Moose::Meta object
217 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218 linear wordsep start end cached_table _graphcalc_done);
219 my %data_args;
220 for my $attr (@delegate_attrs) {
221 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
222 }
223 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
224 return \%args;
225}
226
d047cd52 227sub BUILD {
3a2ebbf4 228 my $self = shift;
22222af9 229 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 230 $self->_set_start( $self->add_reading(
231 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232 $self->_set_end( $self->add_reading(
233 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 234}
784877d9 235
24efa55d 236sub register_relationship_type {
237 my $self = shift;
238 my %args = @_ == 1 ? %{$_[0]} : @_;
239 if( $self->relations->has_type( $args{name} ) ) {
240 throw( 'Relationship type ' . $args{name} . ' already registered' );
241 }
242 $self->relations->add_type( %args );
243}
244
3a2ebbf4 245### Reading construct/destruct functions
246
247sub add_reading {
248 my( $self, $reading ) = @_;
249 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
250 my %args = %$reading;
e4b73942 251 if( $args{'init'} ) {
252 # If we are initializing an empty collation, don't assume that we
253 # have set a tradition.
254 delete $args{'init'};
8943ff68 255 } elsif( $self->tradition->can('language') && $self->tradition->has_language
256 && !exists $args{'language'} ) {
6ad2ce78 257 $args{'language'} = $self->tradition->language;
258 }
3a2ebbf4 259 $reading = Text::Tradition::Collation::Reading->new(
260 'collation' => $self,
261 %args );
262 }
263 # First check to see if a reading with this ID exists.
264 if( $self->reading( $reading->id ) ) {
63778331 265 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 266 }
c1915ab9 267 $self->_graphcalc_done(0);
3a2ebbf4 268 $self->_add_reading( $reading->id => $reading );
269 # Once the reading has been added, put it in both graphs.
270 $self->sequence->add_vertex( $reading->id );
cecbe56d 271 $self->relations->add_reading( $reading->id );
3a2ebbf4 272 return $reading;
eca16057 273};
274
3a2ebbf4 275around del_reading => sub {
276 my $orig = shift;
277 my $self = shift;
278 my $arg = shift;
279
cecbe56d 280 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
281 $arg = $arg->id;
3a2ebbf4 282 }
3a2ebbf4 283 # Remove the reading from the graphs.
c1915ab9 284 $self->_graphcalc_done(0);
4e483aa5 285 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 286 $self->sequence->delete_vertex( $arg );
287 $self->relations->delete_reading( $arg );
3a2ebbf4 288
289 # Carry on.
cecbe56d 290 $self->$orig( $arg );
3a2ebbf4 291};
7854e12e 292
4e483aa5 293=begin testing
294
295use Text::Tradition;
296
297my $cxfile = 't/data/Collatex-16.xml';
298my $t = Text::Tradition->new(
299 'name' => 'inline',
300 'input' => 'CollateX',
301 'file' => $cxfile,
302 );
303my $c = $t->collation;
304
305my $rno = scalar $c->readings;
306# Split n21 for testing purposes
307my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
308my $old_r = $c->reading( 'n21' );
309$old_r->alter_text( 'to' );
310$c->del_path( 'n20', 'n21', 'A' );
311$c->add_path( 'n20', 'n21p0', 'A' );
312$c->add_path( 'n21p0', 'n21', 'A' );
313$c->flatten_ranks();
314ok( $c->reading( 'n21p0' ), "New reading exists" );
315is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
316
679f17e1 317# Combine n3 and n4 ( with his )
4e483aa5 318$c->merge_readings( 'n3', 'n4', 1 );
319ok( !$c->reading('n4'), "Reading n4 is gone" );
320is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
321
679f17e1 322# Collapse n9 and n10 ( rood / root )
323$c->merge_readings( 'n9', 'n10' );
324ok( !$c->reading('n10'), "Reading n10 is gone" );
325is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 326
327# Combine n21 and n21p0
328my $remaining = $c->reading('n21');
329$remaining ||= $c->reading('n22'); # one of these should still exist
330$c->merge_readings( 'n21p0', $remaining, 1 );
331ok( !$c->reading('n21'), "Reading $remaining is gone" );
332is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
333
334=end testing
335
336=cut
7854e12e 337
3a2ebbf4 338sub merge_readings {
339 my $self = shift;
340
56772e8c 341 # Sanity check
342 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
343 my $mergemeta = $kept_obj->is_meta;
344 throw( "Cannot merge meta and non-meta reading" )
345 unless ( $mergemeta && $del_obj->is_meta )
346 || ( !$mergemeta && !$del_obj->is_meta );
347 if( $mergemeta ) {
348 throw( "Cannot merge with start or end node" )
349 if( $kept_obj eq $self->start || $kept_obj eq $self->end
350 || $del_obj eq $self->start || $del_obj eq $self->end );
a445ce40 351 throw( "Cannot combine text of meta readings" ) if $combine;
56772e8c 352 }
3a2ebbf4 353 # We only need the IDs for adding paths to the graph, not the reading
354 # objects themselves.
56772e8c 355 my $kept = $kept_obj->id;
356 my $deleted = $del_obj->id;
c1915ab9 357 $self->_graphcalc_done(0);
10943ab0 358
3a2ebbf4 359 # The kept reading should inherit the paths and the relationships
360 # of the deleted reading.
361 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
362 my @vector = ( $kept );
363 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
364 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 365 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 366 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
367 $self->sequence->add_edge( @vector );
368 my $fwits = $self->sequence->get_edge_attributes( @vector );
369 @wits{keys %$fwits} = values %$fwits;
370 $self->sequence->set_edge_attributes( @vector, \%wits );
371 }
cecbe56d 372 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 373
374 # Do the deletion deed.
4e483aa5 375 if( $combine ) {
869a1ada 376 # Combine the text of the readings
4e483aa5 377 my $joinstr = $combine_char;
378 unless( defined $joinstr ) {
379 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
380 $joinstr = $self->wordsep unless defined $joinstr;
381 }
a445ce40 382 $kept_obj->_combine( $del_obj, $joinstr );
49d4f2ac 383 }
3a2ebbf4 384 $self->del_reading( $deleted );
385}
7854e12e 386
6771a1b1 387=head2 compress_readings
388
389Where possible in the graph, compresses plain sequences of readings into a
390single reading. The sequences must consist of readings with no
391relationships to other readings, with only a single witness path between
392them and no other witness paths from either that would skip the other. The
393readings must also not be marked as nonsense or bad grammar.
394
395WARNING: This operation cannot be undone.
396
397=cut
398
399sub compress_readings {
400 my $self = shift;
401 # Anywhere in the graph that there is a reading that joins only to a single
402 # successor, and neither of these have any relationships, just join the two
403 # readings.
6771a1b1 404 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 405 # Now look for readings that can be joined to their successors.
a445ce40 406 next unless $rdg->is_combinable;
6771a1b1 407 my %seen;
408 while( $self->sequence->successors( $rdg ) == 1 ) {
409 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
410 throw( "Infinite loop" ) if $seen{$next->id};
411 $seen{$next->id} = 1;
412 last if $self->sequence->predecessors( $next ) > 1;
a445ce40 413 last unless $next->is_combinable;
6771a1b1 414 say "Joining readings $rdg and $next";
415 $self->merge_readings( $rdg, $next, 1 );
416 }
417 }
418 # Make sure we haven't screwed anything up
419 foreach my $wit ( $self->tradition->witnesses ) {
420 my $pathtext = $self->path_text( $wit->sigil );
421 my $origtext = join( ' ', @{$wit->text} );
422 throw( "Text differs for witness " . $wit->sigil )
423 unless $pathtext eq $origtext;
424 if( $wit->is_layered ) {
425 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
426 $origtext = join( ' ', @{$wit->layertext} );
427 throw( "Ante-corr text differs for witness " . $wit->sigil )
428 unless $pathtext eq $origtext;
429 }
430 }
431
432 $self->relations->rebuild_equivalence();
433 $self->calculate_ranks();
434}
3265b0ce 435
3a2ebbf4 436# Helper function for manipulating the graph.
437sub _stringify_args {
4e483aa5 438 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 439 $first = $first->id
440 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
441 $second = $second->id
442 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 443 return( $first, $second, @args );
3a2ebbf4 444}
df6d9812 445
4e5a7b2c 446# Helper function for manipulating the graph.
447sub _objectify_args {
448 my( $self, $first, $second, $arg ) = @_;
449 $first = $self->reading( $first )
450 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
451 $second = $self->reading( $second )
452 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
453 return( $first, $second, $arg );
454}
3a2ebbf4 455### Path logic
456
457sub add_path {
458 my $self = shift;
459
460 # We only need the IDs for adding paths to the graph, not the reading
461 # objects themselves.
cecbe56d 462 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 463
c1915ab9 464 $self->_graphcalc_done(0);
3a2ebbf4 465 # Connect the readings
359944f7 466 unless( $self->sequence->has_edge( $source, $target ) ) {
467 $self->sequence->add_edge( $source, $target );
468 $self->relations->add_equivalence_edge( $source, $target );
469 }
3a2ebbf4 470 # Note the witness in question
471 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 472}
b15511bf 473
3a2ebbf4 474sub del_path {
475 my $self = shift;
49d4f2ac 476 my @args;
477 if( ref( $_[0] ) eq 'ARRAY' ) {
478 my $e = shift @_;
479 @args = ( @$e, @_ );
480 } else {
481 @args = @_;
482 }
3a2ebbf4 483
484 # We only need the IDs for adding paths to the graph, not the reading
485 # objects themselves.
49d4f2ac 486 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 487
c1915ab9 488 $self->_graphcalc_done(0);
3a2ebbf4 489 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 490 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 491 }
30897024 492 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
3a2ebbf4 493 $self->sequence->delete_edge( $source, $target );
359944f7 494 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 495 }
784877d9 496}
497
3a2ebbf4 498
15d2d3df 499# Extra graph-alike utility
500sub has_path {
3a2ebbf4 501 my $self = shift;
502 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
503 return undef unless $self->sequence->has_edge( $source, $target );
504 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 505}
506
4e5a7b2c 507=head2 clear_witness( @sigil_list )
3a2ebbf4 508
4e5a7b2c 509Clear the given witnesses out of the collation entirely, removing references
510to them in paths, and removing readings that belong only to them. Should only
511be called via $tradition->del_witness.
3a2ebbf4 512
513=cut
514
4e5a7b2c 515sub clear_witness {
516 my( $self, @sigils ) = @_;
517
c1915ab9 518 $self->_graphcalc_done(0);
4e5a7b2c 519 # Clear the witness(es) out of the paths
520 foreach my $e ( $self->paths ) {
521 foreach my $sig ( @sigils ) {
522 $self->del_path( $e, $sig );
523 }
524 }
525
526 # Clear out the newly unused readings
527 foreach my $r ( $self->readings ) {
528 unless( $self->reading_witnesses( $r ) ) {
529 $self->del_reading( $r );
530 }
531 }
532}
3a2ebbf4 533
534sub add_relationship {
535 my $self = shift;
22222af9 536 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 537 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
864ee4bf 538 foreach my $v ( @vectors ) {
539 next unless $self->get_relationship( $v )->colocated;
540 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
541 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
542 $self->_graphcalc_done(0);
543 $self->_clear_cache;
544 last;
545 }
546 }
63778331 547 return @vectors;
22222af9 548}
ef9d481f 549
ca6e6095 550around qw/ get_relationship del_relationship / => sub {
551 my $orig = shift;
552 my $self = shift;
553 my @args = @_;
554 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
555 @args = @{$_[0]};
556 }
557 my( $source, $target ) = $self->_stringify_args( @args );
558 $self->$orig( $source, $target );
559};
560
22222af9 561=head2 reading_witnesses( $reading )
910a0a6d 562
22222af9 563Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 564
22222af9 565=cut
1d310495 566
1d310495 567sub reading_witnesses {
568 my( $self, $reading ) = @_;
569 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 570 # arbitrarily chosen "incoming". Thus, special-case the start node.
571 if( $reading eq $self->start ) {
bed6ce83 572 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
96dc90ec 573 }
1d310495 574 my %all_witnesses;
575 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
576 my $wits = $self->sequence->get_edge_attributes( @$e );
577 @all_witnesses{ keys %$wits } = 1;
578 }
c12bb878 579 my $acstr = $self->ac_label;
580 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
581 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
582 }
1d310495 583 return keys %all_witnesses;
910a0a6d 584}
585
4e5a7b2c 586=head1 OUTPUT METHODS
8e1394aa 587
0ecb975c 588=head2 as_svg( \%options )
8e1394aa 589
0068967c 590Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 591See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 592
593=cut
594
595sub as_svg {
0ecb975c 596 my( $self, $opts ) = @_;
bfcbcecb 597 throw( "Need GraphViz installed to output SVG" )
598 unless File::Which::which( 'dot' );
e247aad1 599 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 600 $self->calculate_ranks()
601 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
be3af600 602 my @cmd = qw/dot -Tsvg/;
603 my( $svg, $err );
604 my $dotfile = File::Temp->new();
605 ## USE FOR DEBUGGING
606 # $dotfile->unlink_on_destroy(0);
607 binmode $dotfile, ':utf8';
608 print $dotfile $self->as_dot( $opts );
609 push( @cmd, $dotfile->filename );
610 run( \@cmd, ">", binary(), \$svg );
611 $svg = decode_utf8( $svg );
612 return $svg;
8e1394aa 613}
614
b22576c6 615
0ecb975c 616=head2 as_dot( \%options )
b22576c6 617
0ecb975c 618Returns a string that is the collation graph expressed in dot
619(i.e. GraphViz) format. Options include:
b22576c6 620
0ecb975c 621=over 4
b22576c6 622
0ecb975c 623=item * from
b22576c6 624
0ecb975c 625=item * to
df6d9812 626
0ecb975c 627=item * color_common
628
629=back
df6d9812 630
631=cut
632
633sub as_dot {
0ecb975c 634 my( $self, $opts ) = @_;
635 my $startrank = $opts->{'from'} if $opts;
636 my $endrank = $opts->{'to'} if $opts;
637 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 638 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
639 && $self->end->rank > 100;
6648ee3d 640 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 641
b22576c6 642 # Check the arguments
643 if( $startrank ) {
644 return if $endrank && $startrank > $endrank;
645 return if $startrank > $self->end->rank;
646 }
647 if( defined $endrank ) {
648 return if $endrank < 0;
f1b3b33a 649 $endrank = undef if $endrank == $self->end->rank;
b22576c6 650 }
651
67da8d6c 652 my $graph_name = $self->tradition->name;
653 $graph_name =~ s/[^\w\s]//g;
654 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 655
656 my %graph_attrs = (
657 'rankdir' => 'LR',
658 'bgcolor' => 'none',
659 );
660 my %node_attrs = (
b8990398 661 'fontsize' => 14,
f13b5582 662 'fillcolor' => 'white',
663 'style' => 'filled',
664 'shape' => 'ellipse'
665 );
666 my %edge_attrs = (
667 'arrowhead' => 'open',
668 'color' => '#000000',
669 'fontcolor' => '#000000',
670 );
671
67da8d6c 672 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 673 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
674 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 675
b22576c6 676 # Output substitute start/end readings if necessary
677 if( $startrank ) {
43c94341 678 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 679 }
680 if( $endrank ) {
43c94341 681 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 682 }
b365fbae 683 if( $STRAIGHTENHACK ) {
684 ## HACK part 1
43c94341 685 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
686 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 687 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
688 }
b22576c6 689 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 690 # Sort the readings by rank if we have ranks; this speeds layout.
691 my @all_readings = $self->end->has_rank
692 ? sort { $a->rank <=> $b->rank } $self->readings
693 : $self->readings;
4633f9e4 694 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 695 foreach my $reading ( @all_readings ) {
b22576c6 696 # Only output readings within our rank range.
697 next if $startrank && $reading->rank < $startrank;
698 next if $endrank && $reading->rank > $endrank;
699 $used{$reading->id} = 1;
910a0a6d 700 # Need not output nodes without separate labels
3a2ebbf4 701 next if $reading->id eq $reading->text;
d4b75f44 702 my $rattrs;
30f0df34 703 my $label = $reading->text;
629e27b0 704 $label .= '-' if $reading->join_next;
705 $label = "-$label" if $reading->join_prior;
8f9cab7b 706 $label =~ s/\"/\\\"/g;
d4b75f44 707 $rattrs->{'label'} = $label;
10e4b1ac 708 $rattrs->{'id'} = $reading->id;
0ecb975c 709 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 710 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 711 }
3a2ebbf4 712
30ddc24c 713 # Add the real edges. Need to weight one edge per rank jump, in a
714 # continuous line.
b365fbae 715 # my $weighted = $self->_add_edge_weights;
b22576c6 716 my @edges = $self->paths;
3bdec618 717 my( %substart, %subend );
b22576c6 718 foreach my $edge ( @edges ) {
719 # Do we need to output this edge?
508fd430 720 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 721 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 722 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 723
b22576c6 724 # Account for the rank gap if necessary
30ddc24c 725 my $rank0 = $self->reading( $edge->[0] )->rank
726 if $self->reading( $edge->[0] )->has_rank;
727 my $rank1 = $self->reading( $edge->[1] )->rank
728 if $self->reading( $edge->[1] )->has_rank;
729 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
730 $variables->{'minlen'} = $rank1 - $rank0;
731 }
732
733 # Add the calculated edge weights
b365fbae 734 # if( exists $weighted->{$edge->[0]}
e247aad1 735 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
736 # # $variables->{'color'} = 'red';
737 # $variables->{'weight'} = 3.0;
738 # }
30ddc24c 739
508fd430 740 # EXPERIMENTAL: make edge width reflect no. of witnesses
741 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
742 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
743
f13b5582 744 my $varopts = _dot_attr_string( $variables );
745 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
746 $edge->[0], $edge->[1], $varopts );
3bdec618 747 } elsif( $used{$edge->[0]} ) {
96ba0418 748 $subend{$edge->[0]} = $edge->[1];
3bdec618 749 } elsif( $used{$edge->[1]} ) {
96ba0418 750 $substart{$edge->[1]} = $edge->[0];
b22576c6 751 }
df6d9812 752 }
bed6ce83 753
754 # If we are asked to, add relationship links
755 if( exists $opts->{show_relations} ) {
756 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
757 if( $filter eq 'transposition' ) {
758 $filter =~ qr/^transposition$/;
759 }
760 foreach my $redge ( $self->relationships ) {
761 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
762 if( $filter ne 'all' ) {
763 my $rel = $self->get_relationship( $redge );
764 next unless $rel->type =~ /$filter/;
765 my $variables = {
766 arrowhead => 'none',
767 color => '#FFA14F',
768 constraint => 'false',
769 label => uc( substr( $rel->type, 0, 4 ) ),
770 penwidth => '3',
771 };
772 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
773 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
774 }
775 }
776 }
777 }
778
3bdec618 779 # Add substitute start and end edges if necessary
780 foreach my $node ( keys %substart ) {
96ba0418 781 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 782 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 783 my $nrdg = $self->reading( $node );
784 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
785 # Substart is actually one lower than $startrank
786 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
787 }
f13b5582 788 my $varopts = _dot_attr_string( $variables );
96ba0418 789 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 790 }
791 foreach my $node ( keys %subend ) {
96ba0418 792 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 793 my $variables = { %edge_attrs, 'label' => $witstr };
794 my $varopts = _dot_attr_string( $variables );
96ba0418 795 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 796 }
b365fbae 797 # HACK part 2
798 if( $STRAIGHTENHACK ) {
43c94341 799 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
800 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 801 }
30ddc24c 802
df6d9812 803 $dot .= "}\n";
804 return $dot;
805}
806
f13b5582 807sub _dot_attr_string {
808 my( $hash ) = @_;
809 my @attrs;
810 foreach my $k ( sort keys %$hash ) {
811 my $v = $hash->{$k};
812 push( @attrs, $k.'="'.$v.'"' );
813 }
814 return( '[ ' . join( ', ', @attrs ) . ' ]' );
815}
816
30ddc24c 817sub _add_edge_weights {
818 my $self = shift;
819 # Walk the graph from START to END, choosing the successor node with
820 # the largest number of witness paths each time.
821 my $weighted = {};
822 my $curr = $self->start->id;
008fc8a6 823 my $ranked = $self->end->has_rank;
30ddc24c 824 while( $curr ne $self->end->id ) {
008fc8a6 825 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 826 my @succ = sort { $self->path_witnesses( $curr, $a )
827 <=> $self->path_witnesses( $curr, $b ) }
828 $self->sequence->successors( $curr );
829 my $next = pop @succ;
008fc8a6 830 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 831 # Try to avoid lacunae in the weighted path.
008fc8a6 832 while( @succ &&
833 ( $self->reading( $next )->is_lacuna ||
834 $nextrank - $rank > 1 ) ){
30ddc24c 835 $next = pop @succ;
836 }
837 $weighted->{$curr} = $next;
838 $curr = $next;
839 }
840 return $weighted;
841}
842
027d819c 843=head2 path_witnesses( $edge )
844
845Returns the list of sigils whose witnesses are associated with the given edge.
846The edge can be passed as either an array or an arrayref of ( $source, $target ).
847
848=cut
849
3a2ebbf4 850sub path_witnesses {
851 my( $self, @edge ) = @_;
852 # If edge is an arrayref, cope.
853 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
854 my $e = shift @edge;
855 @edge = @$e;
856 }
857 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 858 return @wits;
3a2ebbf4 859}
860
7f9f05e8 861# Helper function. Make a display label for the given witnesses, showing a.c.
862# witnesses only where the main witness is not also in the list.
027d819c 863sub _path_display_label {
508fd430 864 my $self = shift;
7f9f05e8 865 my %wits;
866 map { $wits{$_} = 1 } @_;
867
868 # If an a.c. wit is listed, remove it if the main wit is also listed.
869 # Otherwise keep it for explicit listing.
870 my $aclabel = $self->ac_label;
871 my @disp_ac;
872 foreach my $w ( sort keys %wits ) {
873 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
874 if( exists $wits{$1} ) {
875 delete $wits{$w};
876 } else {
877 push( @disp_ac, $w );
878 }
879 }
880 }
881
882 # See if we are in a majority situation.
8f9cab7b 883 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 884 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 885 if( scalar keys %wits > $maj ) {
886 unshift( @disp_ac, 'majority' );
887 return join( ', ', @disp_ac );
8f9cab7b 888 } else {
7f9f05e8 889 return join( ', ', sort keys %wits );
8f9cab7b 890 }
891}
1dd07bda 892
bf6e338d 893=head2 readings_at_rank( $rank )
1dd07bda 894
bf6e338d 895Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 896
897=cut
898
bf6e338d 899sub readings_at_rank {
1dd07bda 900 my( $self, $rank ) = @_;
bf6e338d 901 my $table = $self->alignment_table;
902 # Table rank is real rank - 1.
903 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
904 my %readings;
905 foreach my $e ( @elements ) {
906 next unless ref( $e ) eq 'HASH';
907 next unless exists $e->{'t'};
908 $readings{$e->{'t'}->id} = $e->{'t'};
909 }
910 return values %readings;
1dd07bda 911}
8f9cab7b 912
4e5a7b2c 913=head2 as_graphml
8e1394aa 914
4e5a7b2c 915Returns a GraphML representation of the collation. The GraphML will contain
916two graphs. The first expresses the attributes of the readings and the witness
917paths that link them; the second expresses the relationships that link the
918readings. This is the native transfer format for a tradition.
8e1394aa 919
56eefa04 920=begin testing
921
922use Text::Tradition;
951ddfe8 923use TryCatch;
56eefa04 924
925my $READINGS = 311;
926my $PATHS = 361;
927
928my $datafile = 't/data/florilegium_tei_ps.xml';
929my $tradition = Text::Tradition->new( 'input' => 'TEI',
930 'name' => 'test0',
931 'file' => $datafile,
932 'linear' => 1 );
933
934ok( $tradition, "Got a tradition object" );
935is( scalar $tradition->witnesses, 13, "Found all witnesses" );
936ok( $tradition->collation, "Tradition has a collation" );
937
938my $c = $tradition->collation;
939is( scalar $c->readings, $READINGS, "Collation has all readings" );
940is( scalar $c->paths, $PATHS, "Collation has all paths" );
941is( scalar $c->relationships, 0, "Collation has all relationships" );
942
943# Add a few relationships
944$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
945$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
946$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
947
948# Now write it to GraphML and parse it again.
949
950my $graphml = $c->as_graphml;
951my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
952is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
953is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
954is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
955
9fef629b 956# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 957SKIP: {
37bf09f4 958 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 959 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
960 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
961 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
962 $graphml = $c->as_graphml;
963 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
964}
2a812726 965
56eefa04 966=end testing
967
8e1394aa 968=cut
969
a445ce40 970## TODO MOVE this to Tradition.pm and modularize it better
8e1394aa 971sub as_graphml {
a30ca502 972 my( $self, $options ) = @_;
3d14b48e 973 $self->calculate_ranks unless $self->_graphcalc_done;
974
a30ca502 975 my $start = $options->{'from'}
976 ? $self->reading( $options->{'from'} ) : $self->start;
977 my $end = $options->{'to'}
978 ? $self->reading( $options->{'to'} ) : $self->end;
979 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
980 throw( 'Start node must be before end node' );
981 }
982 # The readings need to be ranked for this to work.
983 $start = $self->start unless $start->has_rank;
984 $end = $self->end unless $end->has_rank;
414cc046 985 my $rankoffset = 0;
986 unless( $start eq $self->start ) {
987 $rankoffset = $start->rank - 1;
988 }
a30ca502 989 my %use_readings;
990
8e1394aa 991 # Some namespaces
992 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
993 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
994 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 995 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 996
997 # Create the document and root node
428bcf0b 998 require XML::LibXML;
8e1394aa 999 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1000 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1001 $graphml->setDocumentElement( $root );
1002 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1003 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 1004
1005 # List of attribute types to save on our objects and their corresponding
1006 # GraphML types
1007 my %save_types = (
1008 'Str' => 'string',
1009 'Int' => 'int',
1010 'Bool' => 'boolean',
10e4b1ac 1011 'ReadingID' => 'string',
bbd064a9 1012 'RelationshipType' => 'string',
1013 'RelationshipScope' => 'string',
1014 );
1015
bbd064a9 1016 # Add the data keys for the graph. Include an extra key 'version' for the
1017 # GraphML output version.
e309421a 1018 my %graph_data_keys;
1019 my $gdi = 0;
bbd064a9 1020 my %graph_attributes = ( 'version' => 'string' );
1021 # Graph attributes include those of Tradition and those of Collation.
1022 my %gattr_from;
1023 my $tmeta = $self->tradition->meta;
1024 my $cmeta = $self->meta;
1025 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1026 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1027 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1028 next if $attr->name =~ /^_/;
bbd064a9 1029 next unless $save_types{$attr->type_constraint->name};
1030 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1031 }
9fef629b 1032 # Extra custom keys for complex objects that should be saved in some form.
1033 # The subroutine should return a string, or undef/empty.
951ddfe8 1034 if( $tmeta->has_method('stemmata') ) {
1035 $graph_attributes{'stemmata'} = sub {
1036 my @stemstrs;
1037 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1038 $self->tradition->stemmata;
1039 join( "\n", @stemstrs );
1040 };
1041 }
1042
8943ff68 1043 if( $tmeta->has_method('user') ) {
1044 $graph_attributes{'user'} = sub {
1045 $self->tradition->user ? $self->tradition->user->id : undef
1046 };
1047 }
bbd064a9 1048
1049 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1050 $graph_data_keys{$datum} = 'dg'.$gdi++;
1051 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1052 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1053 : $graph_attributes{$datum};
e309421a 1054 $key->setAttribute( 'attr.name', $datum );
9fef629b 1055 $key->setAttribute( 'attr.type', $dtype );
e309421a 1056 $key->setAttribute( 'for', 'graph' );
1057 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1058 }
f6066bac 1059
bbd064a9 1060 # Add the data keys for reading nodes
1061 my %reading_attributes;
1062 my $rmeta = Text::Tradition::Collation::Reading->meta;
1063 foreach my $attr( $rmeta->get_all_attributes ) {
1064 next if $attr->name =~ /^_/;
bbd064a9 1065 next unless $save_types{$attr->type_constraint->name};
1066 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1067 }
a445ce40 1068 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1069 # Extra custom key for the reading morphology
1070 $reading_attributes{'lexemes'} = 'string';
1071 }
7cd9f181 1072
ef9d481f 1073 my %node_data_keys;
1074 my $ndi = 0;
bbd064a9 1075 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1076 $node_data_keys{$datum} = 'dn'.$ndi++;
1077 my $key = $root->addNewChild( $graphml_ns, 'key' );
1078 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1079 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1080 $key->setAttribute( 'for', 'node' );
1081 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1082 }
1083
bbd064a9 1084 # Add the data keys for edges, that is, paths and relationships. Path
1085 # data does not come from a Moose class so is here manually.
ef9d481f 1086 my $edi = 0;
1087 my %edge_data_keys;
bbd064a9 1088 my %edge_attributes = (
3a2ebbf4 1089 witness => 'string', # ID/label for a path
3a2ebbf4 1090 extra => 'boolean', # Path key
3a2ebbf4 1091 );
bbd064a9 1092 my @path_attributes = keys %edge_attributes; # track our manual additions
1093 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1094 foreach my $attr( $pmeta->get_all_attributes ) {
1095 next if $attr->name =~ /^_/;
bbd064a9 1096 next unless $save_types{$attr->type_constraint->name};
1097 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1098 }
1099 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1100 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1101 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1102 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1103 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1104 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1105 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1106 }
3a2ebbf4 1107
cc31ebaa 1108 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1109 my $xmlidname = $self->tradition->name;
1110 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1111 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1112 $xmlidname = '_'.$xmlidname;
1113 }
2c669bca 1114 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1115 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1116 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1117 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1118 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1119 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1120 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1121 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1122
2a812726 1123 # Tradition/collation attribute data
bbd064a9 1124 foreach my $datum ( keys %graph_attributes ) {
1125 my $value;
1126 if( $datum eq 'version' ) {
2a812726 1127 $value = '3.2';
9fef629b 1128 } elsif( ref( $graph_attributes{$datum} ) ) {
1129 my $sub = $graph_attributes{$datum};
1130 $value = &$sub();
bbd064a9 1131 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1132 $value = $self->tradition->$datum;
1133 } else {
1134 $value = $self->$datum;
1135 }
2c669bca 1136 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1137 }
8e1394aa 1138
1139 my $node_ctr = 0;
1140 my %node_hash;
22222af9 1141 # Add our readings to the graph
3a2ebbf4 1142 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1143 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1144 ( $n->rank < $start->rank || $n->rank > $end->rank );
1145 $use_readings{$n->id} = 1;
2c669bca 1146 # Add to the main graph
1147 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1148 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1149 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1150 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1151 foreach my $d ( keys %reading_attributes ) {
255875b8 1152 my $nval = $n->$d;
7cd9f181 1153 # Custom serialization
1154 if( $d eq 'lexemes' ) {
1155 # If nval is a true value, we have lexemes so we need to
1156 # serialize them. Otherwise set nval to undef so that the
1157 # key is excluded from this reading.
1158 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1159 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1160 $nval = undef;
7cd9f181 1161 }
cc31ebaa 1162 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1163 # Adjust the ranks within the subgraph.
cc31ebaa 1164 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1165 : $nval - $rankoffset;
414cc046 1166 }
255875b8 1167 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1168 if defined $nval;
1169 }
b15511bf 1170 }
1171
2c669bca 1172 # Add the path edges to the sequence graph
df6d9812 1173 my $edge_ctr = 0;
3a2ebbf4 1174 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1175 # We add an edge in the graphml for every witness in $e.
a30ca502 1176 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1177 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1178 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1179 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1180 # Skip any path from start to end; that witness is not in the subgraph.
1181 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1182 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1183 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1184 $node_hash{ $e->[0] },
1185 $node_hash{ $e->[1] } );
2c669bca 1186 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1187 $edge_el->setAttribute( 'source', $from );
1188 $edge_el->setAttribute( 'target', $to );
1189 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1190
1191 # It's a witness path, so add the witness
1192 my $base = $wit;
1193 my $key = $edge_data_keys{'witness'};
1194 # Is this an ante-corr witness?
1195 my $aclabel = $self->ac_label;
1196 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1197 # Keep the base witness
1198 $base = $1;
1199 # ...and record that this is an 'extra' reading path
1200 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1201 }
1202 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1203 }
1204 }
1205
cc31ebaa 1206 # Report the actual number of nodes and edges that went in
1207 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1208 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1209
22222af9 1210 # Add the relationship graph to the XML
bbd064a9 1211 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1212 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1213 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1214
94c00c71 1215 # Save and return the thing
1216 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1217 return $result;
df6d9812 1218}
1219
b15511bf 1220sub _add_graphml_data {
1221 my( $el, $key, $value ) = @_;
b15511bf 1222 return unless defined $value;
c9bf3dbf 1223 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1224 $data_el->setAttribute( 'key', $key );
1225 $data_el->appendText( $value );
8e1394aa 1226}
1227
4e5a7b2c 1228=head2 as_csv
910a0a6d 1229
1230Returns a CSV alignment table representation of the collation graph, one
2c669bca 1231row per witness (or witness uncorrected.)
910a0a6d 1232
1233=cut
1234
1235sub as_csv {
3a2ebbf4 1236 my( $self ) = @_;
1dd07bda 1237 my $table = $self->alignment_table;
82fa4d57 1238 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1239 my @result;
2c669bca 1240 # Make the header row
1241 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1242 push( @result, decode_utf8( $csv->string ) );
1243 # Make the rest of the rows
1244 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1245 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1246 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1247 $csv->combine( @row );
910a0a6d 1248 push( @result, decode_utf8( $csv->string ) );
1249 }
3a2ebbf4 1250 return join( "\n", @result );
910a0a6d 1251}
1252
248276a2 1253=head2 alignment_table
2c669bca 1254
566f4595 1255Return a reference to an alignment table, in a slightly enhanced CollateX
1256format which looks like this:
1257
1258 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1259 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1260 { witness => "SIG2",
4e5a7b2c 1261 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1262 ... ],
1263 length => TEXTLEN };
1264
2c669bca 1265=cut
9f3ba6f7 1266
1dd07bda 1267sub alignment_table {
1268 my( $self ) = @_;
1dd07bda 1269 return $self->cached_table if $self->has_cached_table;
1270
0ecb975c 1271 # Make sure we can do this
1272 throw( "Need a linear graph in order to make an alignment table" )
1273 unless $self->linear;
b39e7cb5 1274 $self->calculate_ranks()
1275 unless $self->_graphcalc_done && $self->end->has_rank;
1276
2c669bca 1277 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1278 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1279 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1280 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1281 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1282 my @row = _make_witness_row( \@wit_path, \@all_pos );
bed6ce83 1283 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1284 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1285 push( @{$table->{'alignment'}}, $witobj );
1f7aa795 1286 if( $wit->is_layered ) {
1287 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1288 $wit->sigil.$self->ac_label );
1dd07bda 1289 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
bed6ce83 1290 my $witacobj = { 'witness' => $wit->sigil.$self->ac_label,
1291 'tokens' => \@ac_row };
1292 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1293 push( @{$table->{'alignment'}}, $witacobj );
910a0a6d 1294 }
1295 }
1dd07bda 1296 $self->cached_table( $table );
1297 return $table;
910a0a6d 1298}
1299
1300sub _make_witness_row {
1dd07bda 1301 my( $path, $positions ) = @_;
910a0a6d 1302 my %char_hash;
1303 map { $char_hash{$_} = undef } @$positions;
2c669bca 1304 my $debug = 0;
910a0a6d 1305 foreach my $rdg ( @$path ) {
6771a1b1 1306 say STDERR "rank " . $rdg->rank if $debug;
1307 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1308 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1309 }
1310 my @row = map { $char_hash{$_} } @$positions;
eca16057 1311 # Fill in lacuna markers for undef spots in the row
1312 my $last_el = shift @row;
1313 my @filled_row = ( $last_el );
1314 foreach my $el ( @row ) {
0e476982 1315 # If we are using node reference, make the lacuna node appear many times
1316 # in the table. If not, use the lacuna tag.
1dd07bda 1317 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1318 $el = $last_el;
eca16057 1319 }
1320 push( @filled_row, $el );
1321 $last_el = $el;
1322 }
1323 return @filled_row;
910a0a6d 1324}
1325
248276a2 1326
4e5a7b2c 1327=head1 NAVIGATION METHODS
910a0a6d 1328
4e5a7b2c 1329=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1330
1331Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1332with $last, for the witness given in $sigil. If a $backup sigil is
1333specified (e.g. when walking a layered witness), it will be used wherever
1334no $sigil path exists. If there is a base text reading, that will be
1335used wherever no path exists for $sigil or $backup.
e2902068 1336
1337=cut
1338
910a0a6d 1339# TODO Think about returning some lazy-eval iterator.
b0b4421a 1340# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1341
e2902068 1342sub reading_sequence {
861c3e27 1343 my( $self, $start, $end, $witness ) = @_;
e2902068 1344
930ff666 1345 $witness = $self->baselabel unless $witness;
e2902068 1346 my @readings = ( $start );
1347 my %seen;
1348 my $n = $start;
3a2ebbf4 1349 while( $n && $n->id ne $end->id ) {
1350 if( exists( $seen{$n->id} ) ) {
63778331 1351 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1352 }
3a2ebbf4 1353 $seen{$n->id} = 1;
910a0a6d 1354
861c3e27 1355 my $next = $self->next_reading( $n, $witness );
44771cf2 1356 unless( $next ) {
63778331 1357 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1358 }
910a0a6d 1359 push( @readings, $next );
1360 $n = $next;
e2902068 1361 }
1362 # Check that the last reading is our end reading.
1363 my $last = $readings[$#readings];
63778331 1364 throw( "Last reading found from " . $start->text .
1365 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1366 unless $last->id eq $end->id;
e2902068 1367
1368 return @readings;
1369}
1370
4e5a7b2c 1371=head2 next_reading( $reading, $sigil );
8e1394aa 1372
4a8828f0 1373Returns the reading that follows the given reading along the given witness
930ff666 1374path.
8e1394aa 1375
1376=cut
1377
4a8828f0 1378sub next_reading {
e2902068 1379 # Return the successor via the corresponding path.
8e1394aa 1380 my $self = shift;
3a2ebbf4 1381 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1382 return undef unless $answer;
3a2ebbf4 1383 return $self->reading( $answer );
8e1394aa 1384}
1385
4e5a7b2c 1386=head2 prior_reading( $reading, $sigil )
8e1394aa 1387
4a8828f0 1388Returns the reading that precedes the given reading along the given witness
930ff666 1389path.
8e1394aa 1390
1391=cut
1392
4a8828f0 1393sub prior_reading {
e2902068 1394 # Return the predecessor via the corresponding path.
8e1394aa 1395 my $self = shift;
3a2ebbf4 1396 my $answer = $self->_find_linked_reading( 'prior', @_ );
1397 return $self->reading( $answer );
8e1394aa 1398}
1399
4a8828f0 1400sub _find_linked_reading {
861c3e27 1401 my( $self, $direction, $node, $path ) = @_;
1402
1403 # Get a backup if we are dealing with a layered witness
1404 my $alt_path;
1405 my $aclabel = $self->ac_label;
1406 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1407 $alt_path = $1;
1408 }
1409
e2902068 1410 my @linked_paths = $direction eq 'next'
3a2ebbf4 1411 ? $self->sequence->edges_from( $node )
1412 : $self->sequence->edges_to( $node );
e2902068 1413 return undef unless scalar( @linked_paths );
8e1394aa 1414
e2902068 1415 # We have to find the linked path that contains all of the
1416 # witnesses supplied in $path.
1417 my( @path_wits, @alt_path_wits );
4e5a7b2c 1418 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1419 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1420 my $base_le;
1421 my $alt_le;
1422 foreach my $le ( @linked_paths ) {
3a2ebbf4 1423 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1424 $base_le = $le;
910a0a6d 1425 }
508fd430 1426 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1427 if( _is_within( \@path_wits, \@le_wits ) ) {
1428 # This is the right path.
1429 return $direction eq 'next' ? $le->[1] : $le->[0];
1430 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1431 $alt_le = $le;
1432 }
8e1394aa 1433 }
e2902068 1434 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1435 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1436 if $alt_le;
e2902068 1437
1438 # Got this far? Return the base path if it exists.
3a2ebbf4 1439 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1440 if $base_le;
e2902068 1441
1442 # Got this far? We have no appropriate path.
2c669bca 1443 warn "Could not find $direction node from " . $node->id
910a0a6d 1444 . " along path $path";
8e1394aa 1445 return undef;
1446}
1447
4a8828f0 1448# Some set logic.
1449sub _is_within {
1450 my( $set1, $set2 ) = @_;
7854e12e 1451 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1452 foreach my $el ( @$set1 ) {
910a0a6d 1453 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1454 }
1455 return $ret;
1456}
1457
4e5a7b2c 1458# Return the string that joins together a list of witnesses for
1459# display on a single path.
1460sub _witnesses_of_label {
1461 my( $self, $label ) = @_;
1462 my $regex = $self->wit_list_separator;
1463 my @answer = split( /\Q$regex\E/, $label );
1464 return @answer;
b0b4421a 1465}
1466
d4b75f44 1467=head2 common_readings
1468
1469Returns the list of common readings in the graph (i.e. those readings that are
1470shared by all non-lacunose witnesses.)
1471
1472=cut
1473
1474sub common_readings {
1475 my $self = shift;
1476 my @common = grep { $_->is_common } $self->readings;
1477 return @common;
1478}
1479
fae52efd 1480=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1481
1482Returns the text of a witness (plus its backup, if we are using a layer)
1483as stored in the collation. The text is returned as a string, where the
1484individual readings are joined with spaces and the meta-readings (e.g.
1485lacunae) are omitted. Optional specification of $start and $end allows
1486the generation of a subset of the witness text.
4e5a7b2c 1487
b0b4421a 1488=cut
1489
1490sub path_text {
861c3e27 1491 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1492 $start = $self->start unless $start;
1493 $end = $self->end unless $end;
861c3e27 1494 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1495 my $pathtext = '';
1496 my $last;
1497 foreach my $r ( @path ) {
6ad2ce78 1498 unless ( $r->join_prior || !$last || $last->join_next ) {
1499 $pathtext .= ' ';
1500 }
1501 $pathtext .= $r->text;
629e27b0 1502 $last = $r;
1503 }
1504 return $pathtext;
b0b4421a 1505}
4e5a7b2c 1506
1507=head1 INITIALIZATION METHODS
1508
1509These are mostly for use by parsers.
1510
1511=head2 make_witness_path( $witness )
1512
1513Link the array of readings contained in $witness->path (and in
1514$witness->uncorrected_path if it exists) into collation paths.
1515Clear out the arrays when finished.
de51424a 1516
4e5a7b2c 1517=head2 make_witness_paths
1518
1519Call make_witness_path for all witnesses in the tradition.
1520
1521=cut
930ff666 1522
7e450e44 1523# For use when a collation is constructed from a base text and an apparatus.
1524# We have the sequences of readings and just need to add path edges.
1f7aa795 1525# When we are done, clear out the witness path attributes, as they are no
1526# longer needed.
1527# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1528
6a222840 1529sub make_witness_paths {
1530 my( $self ) = @_;
910a0a6d 1531 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 1532 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 1533 $self->make_witness_path( $wit );
7854e12e 1534 }
7854e12e 1535}
1536
6a222840 1537sub make_witness_path {
7854e12e 1538 my( $self, $wit ) = @_;
1539 my @chain = @{$wit->path};
15d2d3df 1540 my $sig = $wit->sigil;
fae52efd 1541 # Add start and end if necessary
1542 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1543 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1544 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1545 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1546 }
1f7aa795 1547 if( $wit->is_layered ) {
d9e873d0 1548 @chain = @{$wit->uncorrected_path};
fae52efd 1549 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1550 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1551 foreach my $idx( 0 .. $#chain-1 ) {
1552 my $source = $chain[$idx];
1553 my $target = $chain[$idx+1];
1554 $self->add_path( $source, $target, $sig.$self->ac_label )
1555 unless $self->has_path( $source, $target, $sig );
1556 }
15d2d3df 1557 }
1f7aa795 1558 $wit->clear_path;
1559 $wit->clear_uncorrected_path;
e2902068 1560}
1561
4e5a7b2c 1562=head2 calculate_ranks
1563
1564Calculate the reading ranks (that is, their aligned positions relative
1565to each other) for the graph. This can only be called on linear collations.
1566
b365fbae 1567=begin testing
1568
1569use Text::Tradition;
1570
1571my $cxfile = 't/data/Collatex-16.xml';
1572my $t = Text::Tradition->new(
1573 'name' => 'inline',
1574 'input' => 'CollateX',
1575 'file' => $cxfile,
1576 );
1577my $c = $t->collation;
1578
1579# Make an svg
bfcbcecb 1580my $table = $c->alignment_table;
1581ok( $c->has_cached_table, "Alignment table was cached" );
1582is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1583$c->calculate_ranks;
bfcbcecb 1584is( $c->alignment_table, $table, "Cached table retained with no rank change" );
864ee4bf 1585$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
1586is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
1587$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
1588isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
b365fbae 1589
1590=end testing
1591
4e5a7b2c 1592=cut
1593
910a0a6d 1594sub calculate_ranks {
1595 my $self = shift;
b365fbae 1596 # Save the existing ranks, in case we need to invalidate the cached SVG.
1597 my %existing_ranks;
ac4d7ac2 1598 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1599
1600 # Do the rankings based on the relationship equivalence graph, starting
1601 # with the start node.
56772e8c 1602 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1603
910a0a6d 1604 # Transfer our rankings from the topological graph to the real one.
1605 foreach my $r ( $self->readings ) {
cecbe56d 1606 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1607 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1608 } else {
63778331 1609 # Die. Find the last rank we calculated.
359944f7 1610 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1611 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1612 $self->readings;
1613 my $last = pop @all_defined;
1614 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1615 }
de51424a 1616 }
bfcbcecb 1617 # Do we need to invalidate the cached data?
be3af600 1618 if( $self->has_cached_table ) {
b365fbae 1619 foreach my $r ( $self->readings ) {
7c293912 1620 next if defined( $existing_ranks{$r} )
1621 && $existing_ranks{$r} == $r->rank;
c1915ab9 1622 # Something has changed, so clear the cache
bfcbcecb 1623 $self->_clear_cache;
c1915ab9 1624 # ...and recalculate the common readings.
1625 $self->calculate_common_readings();
b365fbae 1626 last;
1627 }
1628 }
c1915ab9 1629 # The graph calculation information is now up to date.
1630 $self->_graphcalc_done(1);
8e1394aa 1631}
3a1f2523 1632
c1915ab9 1633sub _clear_cache {
1634 my $self = shift;
c1915ab9 1635 $self->wipe_table if $self->has_cached_table;
1636}
1637
1638
4e5a7b2c 1639=head2 flatten_ranks
1640
1641A convenience method for parsing collation data. Searches the graph for readings
1642with the same text at the same rank, and merges any that are found.
1643
1644=cut
1645
0e476982 1646sub flatten_ranks {
1647 my $self = shift;
1648 my %unique_rank_rdg;
bf6e338d 1649 my $changed;
0e476982 1650 foreach my $rdg ( $self->readings ) {
1651 next unless $rdg->has_rank;
1652 my $key = $rdg->rank . "||" . $rdg->text;
1653 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1654 # Make sure they don't have different grammatical forms
1655 my $ur = $unique_rank_rdg{$key};
a445ce40 1656 if( $rdg->is_identical( $ur ) ) {
1657 # Combine!
1658 #say STDERR "Combining readings at same rank: $key";
1659 $changed = 1;
1660 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1661 # TODO see if this now makes a common point.
07e6765f 1662 }
0e476982 1663 } else {
1664 $unique_rank_rdg{$key} = $rdg;
1665 }
1666 }
bf6e338d 1667 # If we merged readings, the ranks are still fine but the alignment
1668 # table is wrong. Wipe it.
1669 $self->wipe_table() if $changed;
0e476982 1670}
4633f9e4 1671
1672
d4b75f44 1673=head2 calculate_common_readings
1674
1675Goes through the graph identifying the readings that appear in every witness
1676(apart from those with lacunae at that spot.) Marks them as common and returns
1677the list.
1678
1679=begin testing
1680
1681use Text::Tradition;
1682
1683my $cxfile = 't/data/Collatex-16.xml';
1684my $t = Text::Tradition->new(
1685 'name' => 'inline',
1686 'input' => 'CollateX',
1687 'file' => $cxfile,
1688 );
1689my $c = $t->collation;
1690
1691my @common = $c->calculate_common_readings();
1692is( scalar @common, 8, "Found correct number of common readings" );
1693my @marked = sort $c->common_readings();
1694is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1695my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1696is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1697
1698=end testing
1699
1700=cut
1701
1702sub calculate_common_readings {
1703 my $self = shift;
1704 my @common;
c1915ab9 1705 map { $_->is_common( 0 ) } $self->readings;
1706 # Implicitly calls calculate_ranks
1dd07bda 1707 my $table = $self->alignment_table;
d4b75f44 1708 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1709 my @row = map { $_->{'tokens'}->[$idx]
1710 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1711 @{$table->{'alignment'}};
d4b75f44 1712 my %hash;
1713 foreach my $r ( @row ) {
1714 if( $r ) {
1715 $hash{$r->id} = $r unless $r->is_meta;
1716 } else {
1717 $hash{'UNDEF'} = $r;
1718 }
1719 }
1720 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1721 my( $r ) = values %hash;
1722 $r->is_common( 1 );
1723 push( @common, $r );
1724 }
1725 }
1726 return @common;
1727}
1728
861c3e27 1729=head2 text_from_paths
1730
1731Calculate the text array for all witnesses from the path, for later consistency
1732checking. Only to be used if there is no non-graph-based way to know the
1733original texts.
1734
1735=cut
1736
1737sub text_from_paths {
1738 my $self = shift;
1739 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1740 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1741 my @text;
1742 foreach my $r ( @readings ) {
1743 next if $r->is_meta;
1744 push( @text, $r->text );
1745 }
861c3e27 1746 $wit->text( \@text );
1747 if( $wit->is_layered ) {
5164a6f0 1748 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1749 $wit->sigil.$self->ac_label );
1750 my @uctext;
1751 foreach my $r ( @ucrdgs ) {
1752 next if $r->is_meta;
1753 push( @uctext, $r->text );
1754 }
1755 $wit->layertext( \@uctext );
861c3e27 1756 }
1757 }
1758}
0e476982 1759
4e5a7b2c 1760=head1 UTILITY FUNCTIONS
1761
1762=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1763
4e5a7b2c 1764Find the last reading that occurs in sequence before both the given readings.
414cc046 1765At the very least this should be $self->start.
4e5a7b2c 1766
1767=head2 common_successor( $reading_a, $reading_b )
1768
1769Find the first reading that occurs in sequence after both the given readings.
414cc046 1770At the very least this should be $self->end.
4e5a7b2c 1771
22222af9 1772=begin testing
1773
1774use Text::Tradition;
1775
1776my $cxfile = 't/data/Collatex-16.xml';
1777my $t = Text::Tradition->new(
1778 'name' => 'inline',
1779 'input' => 'CollateX',
1780 'file' => $cxfile,
1781 );
1782my $c = $t->collation;
1783
679f17e1 1784is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1785 'n20', "Found correct common predecessor" );
679f17e1 1786is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1787 '__END__', "Found correct common successor" );
22222af9 1788
4e5a7b2c 1789is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1790 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1791is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1792 '__END__', "Found correct common successor for readings on same path" );
22222af9 1793
1794=end testing
1795
1796=cut
1797
1798## Return the closest reading that is a predecessor of both the given readings.
1799sub common_predecessor {
1800 my $self = shift;
4e5a7b2c 1801 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1802 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1803}
1804
1805sub common_successor {
1806 my $self = shift;
4e5a7b2c 1807 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1808 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1809}
1810
414cc046 1811
1812# TODO think about how to do this without ranks...
027d819c 1813sub _common_in_path {
22222af9 1814 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1815 my $iter = $self->end->rank;
22222af9 1816 my @candidates;
414cc046 1817 my @last_r1 = ( $r1 );
1818 my @last_r2 = ( $r2 );
1819 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1820 my %all_seen;
6771a1b1 1821 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 1822 while( !@candidates ) {
414cc046 1823 last unless $iter--; # Avoid looping infinitely
1824 # Iterate separately down the graph from r1 and r2
1825 my( @new_lc1, @new_lc2 );
1826 foreach my $lc ( @last_r1 ) {
1827 foreach my $p ( $lc->$dir ) {
1828 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 1829 # say STDERR "Path candidate $p from $lc";
414cc046 1830 push( @candidates, $p );
002e3600 1831 } elsif( !$all_seen{$p->id} ) {
414cc046 1832 $all_seen{$p->id} = 'r1';
1833 push( @new_lc1, $p );
1834 }
1835 }
1836 }
1837 foreach my $lc ( @last_r2 ) {
22222af9 1838 foreach my $p ( $lc->$dir ) {
414cc046 1839 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 1840 # say STDERR "Path candidate $p from $lc";
22222af9 1841 push( @candidates, $p );
002e3600 1842 } elsif( !$all_seen{$p->id} ) {
414cc046 1843 $all_seen{$p->id} = 'r2';
1844 push( @new_lc2, $p );
22222af9 1845 }
1846 }
1847 }
414cc046 1848 @last_r1 = @new_lc1;
1849 @last_r2 = @new_lc2;
22222af9 1850 }
1851 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1852 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1853}
1854
63778331 1855sub throw {
1856 Text::Tradition::Error->throw(
1857 'ident' => 'Collation error',
1858 'message' => $_[0],
1859 );
1860}
1861
dd3b58b0 1862no Moose;
1863__PACKAGE__->meta->make_immutable;
e867486f 1864
a445ce40 1865=head1 BUGS/TODO
1866
1867=over
1868
1869=item * Rework XML serialization in a more modular way
1870
1871=back
1872
027d819c 1873=head1 LICENSE
e867486f 1874
027d819c 1875This package is free software and is provided "as is" without express
1876or implied warranty. You can redistribute it and/or modify it under
1877the same terms as Perl itself.
e867486f 1878
027d819c 1879=head1 AUTHOR
e867486f 1880
027d819c 1881Tara L Andrews E<lt>aurum@cpan.orgE<gt>