add in columns for excluding nothing
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
82fa4d57 9use Text::CSV;
5c0072ef 10use Text::Tradition::Collation::Data;
b15511bf 11use Text::Tradition::Collation::Reading;
22222af9 12use Text::Tradition::Collation::RelationshipStore;
63778331 13use Text::Tradition::Error;
cc31ebaa 14use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
5c0072ef 15use XML::LibXML;
16use XML::LibXML::XPathContext;
dd3b58b0 17use Moose;
18
5c0072ef 19has _data => (
20 isa => 'Text::Tradition::Collation::Data',
21 is => 'ro',
22 required => 1,
23 handles => [ qw(
24 sequence
25 paths
26 _set_relations
27 relations
28 _set_start
29 _set_end
30 ac_label
31 has_cached_table
32 relationships
33 related_readings
34 get_relationship
35 del_relationship
36 equivalence
37 equivalence_graph
38 readings
39 reading
40 _add_reading
41 del_reading
42 has_reading
43 wit_list_separator
44 baselabel
45 linear
46 wordsep
47 start
48 end
49 cached_table
50 _graphcalc_done
51 has_cached_svg
52 wipe_table
53 )]
54);
dd3b58b0 55
3a2ebbf4 56has 'tradition' => (
57 is => 'ro',
d047cd52 58 isa => 'Text::Tradition',
8cfd99c4 59 writer => '_set_tradition',
8d9a1cd8 60 weak_ref => 1,
d047cd52 61 );
dd3b58b0 62
4e5a7b2c 63=head1 NAME
64
65Text::Tradition::Collation - a software model for a text collation
66
67=head1 SYNOPSIS
68
69 use Text::Tradition;
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
72 'input' => 'TEI',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
74
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
79
80 my $svg_variant_graph = $t->collation->as_svg();
81
82=head1 DESCRIPTION
83
84Text::Tradition is a library for representation and analysis of collated
85texts, particularly medieval ones. The Collation is the central feature of
86a Tradition, where the text, its sequence of readings, and its relationships
87between readings are actually kept.
88
89=head1 CONSTRUCTOR
90
91=head2 new
92
93The constructor. Takes a hash or hashref of the following arguments:
94
95=over
96
97=item * tradition - The Text::Tradition object to which the collation
98belongs. Required.
99
100=item * linear - Whether the collation should be linear; that is, whether
101transposed readings should be treated as two linked readings rather than one,
102and therefore whether the collation graph is acyclic. Defaults to true.
103
4e5a7b2c 104=item * baselabel - The default label for the path taken by a base text
105(if any). Defaults to 'base text'.
106
107=item * wit_list_separator - The string to join a list of witnesses for
108purposes of making labels in display graphs. Defaults to ', '.
109
110=item * ac_label - The extra label to tack onto a witness sigil when
111representing another layer of path for the given witness - that is, when
112a text has more than one possible reading due to scribal corrections or
113the like. Defaults to ' (a.c.)'.
114
4e483aa5 115=item * wordsep - The string used to separate words in the original text.
116Defaults to ' '.
117
4e5a7b2c 118=back
119
120=head1 ACCESSORS
121
122=head2 tradition
123
124=head2 linear
125
4e5a7b2c 126=head2 wit_list_separator
127
128=head2 baselabel
129
130=head2 ac_label
131
4e483aa5 132=head2 wordsep
133
4e5a7b2c 134Simple accessors for collation attributes.
135
136=head2 start
137
138The meta-reading at the start of every witness path.
139
140=head2 end
141
142The meta-reading at the end of every witness path.
143
144=head2 readings
145
146Returns all Reading objects in the graph.
147
148=head2 reading( $id )
149
150Returns the Reading object corresponding to the given ID.
151
152=head2 add_reading( $reading_args )
153
154Adds a new reading object to the collation.
155See L<Text::Tradition::Collation::Reading> for the available arguments.
156
157=head2 del_reading( $object_or_id )
158
159Removes the given reading from the collation, implicitly removing its
160paths and relationships.
161
4e483aa5 162=head2 merge_readings( $main, $second, $concatenate, $with_str )
163
164Merges the $second reading into the $main one. If $concatenate is true, then
165the merged node will carry the text of both readings, concatenated with either
166$with_str (if specified) or a sensible default (the empty string if the
167appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 168
4e483aa5 169The first two arguments may be either readings or reading IDs.
4e5a7b2c 170
171=head2 has_reading( $id )
172
173Predicate to see whether a given reading ID is in the graph.
174
175=head2 reading_witnesses( $object_or_id )
176
177Returns a list of sigils whose witnesses contain the reading.
178
179=head2 paths
180
181Returns all reading paths within the document - that is, all edges in the
182collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
183
184=head2 add_path( $source, $target, $sigil )
185
186Links the given readings in the collation in sequence, under the given witness
187sigil. The readings may be specified by object or ID.
188
189=head2 del_path( $source, $target, $sigil )
190
191Links the given readings in the collation in sequence, under the given witness
192sigil. The readings may be specified by object or ID.
193
194=head2 has_path( $source, $target );
195
196Returns true if the two readings are linked in sequence in any witness.
197The readings may be specified by object or ID.
198
199=head2 relationships
200
201Returns all Relationship objects in the collation.
202
203=head2 add_relationship( $reading, $other_reading, $options )
204
205Adds a new relationship of the type given in $options between the two readings,
206which may be specified by object or ID. Returns a value of ( $status, @vectors)
207where $status is true on success, and @vectors is a list of relationship edges
208that were ultimately added.
209See L<Text::Tradition::Collation::Relationship> for the available options.
210
211=cut
dd3b58b0 212
5c0072ef 213sub BUILDARGS {
214 my ( $class, @args ) = @_;
215 my %args = @args == 1 ? %{ $args[0] } : @args;
216 # TODO determine these from the Moose::Meta object
217 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218 linear wordsep start end cached_table _graphcalc_done);
219 my %data_args;
220 for my $attr (@delegate_attrs) {
221 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
222 }
223 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
224 return \%args;
225}
226
d047cd52 227sub BUILD {
3a2ebbf4 228 my $self = shift;
22222af9 229 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 230 $self->_set_start( $self->add_reading(
231 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232 $self->_set_end( $self->add_reading(
233 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 234}
784877d9 235
24efa55d 236sub register_relationship_type {
237 my $self = shift;
238 my %args = @_ == 1 ? %{$_[0]} : @_;
239 if( $self->relations->has_type( $args{name} ) ) {
240 throw( 'Relationship type ' . $args{name} . ' already registered' );
241 }
242 $self->relations->add_type( %args );
243}
244
3a2ebbf4 245### Reading construct/destruct functions
246
247sub add_reading {
248 my( $self, $reading ) = @_;
249 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
250 my %args = %$reading;
e4b73942 251 if( $args{'init'} ) {
252 # If we are initializing an empty collation, don't assume that we
253 # have set a tradition.
254 delete $args{'init'};
8943ff68 255 } elsif( $self->tradition->can('language') && $self->tradition->has_language
256 && !exists $args{'language'} ) {
6ad2ce78 257 $args{'language'} = $self->tradition->language;
258 }
3a2ebbf4 259 $reading = Text::Tradition::Collation::Reading->new(
260 'collation' => $self,
261 %args );
262 }
263 # First check to see if a reading with this ID exists.
264 if( $self->reading( $reading->id ) ) {
63778331 265 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 266 }
c1915ab9 267 $self->_graphcalc_done(0);
3a2ebbf4 268 $self->_add_reading( $reading->id => $reading );
269 # Once the reading has been added, put it in both graphs.
270 $self->sequence->add_vertex( $reading->id );
cecbe56d 271 $self->relations->add_reading( $reading->id );
3a2ebbf4 272 return $reading;
eca16057 273};
274
3a2ebbf4 275around del_reading => sub {
276 my $orig = shift;
277 my $self = shift;
278 my $arg = shift;
279
cecbe56d 280 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
281 $arg = $arg->id;
3a2ebbf4 282 }
3a2ebbf4 283 # Remove the reading from the graphs.
c1915ab9 284 $self->_graphcalc_done(0);
4e483aa5 285 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 286 $self->sequence->delete_vertex( $arg );
287 $self->relations->delete_reading( $arg );
3a2ebbf4 288
289 # Carry on.
cecbe56d 290 $self->$orig( $arg );
3a2ebbf4 291};
7854e12e 292
4e483aa5 293=begin testing
294
295use Text::Tradition;
296
297my $cxfile = 't/data/Collatex-16.xml';
298my $t = Text::Tradition->new(
299 'name' => 'inline',
300 'input' => 'CollateX',
301 'file' => $cxfile,
302 );
303my $c = $t->collation;
304
305my $rno = scalar $c->readings;
306# Split n21 for testing purposes
307my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
308my $old_r = $c->reading( 'n21' );
309$old_r->alter_text( 'to' );
310$c->del_path( 'n20', 'n21', 'A' );
311$c->add_path( 'n20', 'n21p0', 'A' );
312$c->add_path( 'n21p0', 'n21', 'A' );
313$c->flatten_ranks();
314ok( $c->reading( 'n21p0' ), "New reading exists" );
315is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
316
679f17e1 317# Combine n3 and n4 ( with his )
4e483aa5 318$c->merge_readings( 'n3', 'n4', 1 );
319ok( !$c->reading('n4'), "Reading n4 is gone" );
320is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
321
679f17e1 322# Collapse n9 and n10 ( rood / root )
323$c->merge_readings( 'n9', 'n10' );
324ok( !$c->reading('n10'), "Reading n10 is gone" );
325is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 326
327# Combine n21 and n21p0
328my $remaining = $c->reading('n21');
329$remaining ||= $c->reading('n22'); # one of these should still exist
330$c->merge_readings( 'n21p0', $remaining, 1 );
331ok( !$c->reading('n21'), "Reading $remaining is gone" );
332is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
333
334=end testing
335
336=cut
7854e12e 337
3a2ebbf4 338sub merge_readings {
339 my $self = shift;
340
56772e8c 341 # Sanity check
342 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
343 my $mergemeta = $kept_obj->is_meta;
344 throw( "Cannot merge meta and non-meta reading" )
345 unless ( $mergemeta && $del_obj->is_meta )
346 || ( !$mergemeta && !$del_obj->is_meta );
347 if( $mergemeta ) {
348 throw( "Cannot merge with start or end node" )
349 if( $kept_obj eq $self->start || $kept_obj eq $self->end
350 || $del_obj eq $self->start || $del_obj eq $self->end );
a445ce40 351 throw( "Cannot combine text of meta readings" ) if $combine;
56772e8c 352 }
3a2ebbf4 353 # We only need the IDs for adding paths to the graph, not the reading
354 # objects themselves.
56772e8c 355 my $kept = $kept_obj->id;
356 my $deleted = $del_obj->id;
c1915ab9 357 $self->_graphcalc_done(0);
10943ab0 358
3a2ebbf4 359 # The kept reading should inherit the paths and the relationships
360 # of the deleted reading.
361 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
362 my @vector = ( $kept );
363 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
364 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 365 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 366 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
367 $self->sequence->add_edge( @vector );
368 my $fwits = $self->sequence->get_edge_attributes( @vector );
369 @wits{keys %$fwits} = values %$fwits;
370 $self->sequence->set_edge_attributes( @vector, \%wits );
371 }
cecbe56d 372 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 373
374 # Do the deletion deed.
4e483aa5 375 if( $combine ) {
869a1ada 376 # Combine the text of the readings
4e483aa5 377 my $joinstr = $combine_char;
378 unless( defined $joinstr ) {
379 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
380 $joinstr = $self->wordsep unless defined $joinstr;
381 }
a445ce40 382 $kept_obj->_combine( $del_obj, $joinstr );
49d4f2ac 383 }
3a2ebbf4 384 $self->del_reading( $deleted );
385}
7854e12e 386
6771a1b1 387=head2 compress_readings
388
389Where possible in the graph, compresses plain sequences of readings into a
390single reading. The sequences must consist of readings with no
391relationships to other readings, with only a single witness path between
392them and no other witness paths from either that would skip the other. The
393readings must also not be marked as nonsense or bad grammar.
394
395WARNING: This operation cannot be undone.
396
397=cut
398
399sub compress_readings {
400 my $self = shift;
401 # Anywhere in the graph that there is a reading that joins only to a single
402 # successor, and neither of these have any relationships, just join the two
403 # readings.
6771a1b1 404 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 405 # Now look for readings that can be joined to their successors.
a445ce40 406 next unless $rdg->is_combinable;
6771a1b1 407 my %seen;
408 while( $self->sequence->successors( $rdg ) == 1 ) {
409 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
410 throw( "Infinite loop" ) if $seen{$next->id};
411 $seen{$next->id} = 1;
412 last if $self->sequence->predecessors( $next ) > 1;
a445ce40 413 last unless $next->is_combinable;
6771a1b1 414 say "Joining readings $rdg and $next";
415 $self->merge_readings( $rdg, $next, 1 );
416 }
417 }
418 # Make sure we haven't screwed anything up
419 foreach my $wit ( $self->tradition->witnesses ) {
420 my $pathtext = $self->path_text( $wit->sigil );
421 my $origtext = join( ' ', @{$wit->text} );
422 throw( "Text differs for witness " . $wit->sigil )
423 unless $pathtext eq $origtext;
424 if( $wit->is_layered ) {
425 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
426 $origtext = join( ' ', @{$wit->layertext} );
427 throw( "Ante-corr text differs for witness " . $wit->sigil )
428 unless $pathtext eq $origtext;
429 }
430 }
431
432 $self->relations->rebuild_equivalence();
433 $self->calculate_ranks();
434}
3265b0ce 435
3a2ebbf4 436# Helper function for manipulating the graph.
437sub _stringify_args {
4e483aa5 438 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 439 $first = $first->id
440 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
441 $second = $second->id
442 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 443 return( $first, $second, @args );
3a2ebbf4 444}
df6d9812 445
4e5a7b2c 446# Helper function for manipulating the graph.
447sub _objectify_args {
448 my( $self, $first, $second, $arg ) = @_;
449 $first = $self->reading( $first )
450 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
451 $second = $self->reading( $second )
452 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
453 return( $first, $second, $arg );
454}
3a2ebbf4 455### Path logic
456
457sub add_path {
458 my $self = shift;
459
460 # We only need the IDs for adding paths to the graph, not the reading
461 # objects themselves.
cecbe56d 462 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 463
c1915ab9 464 $self->_graphcalc_done(0);
3a2ebbf4 465 # Connect the readings
359944f7 466 unless( $self->sequence->has_edge( $source, $target ) ) {
467 $self->sequence->add_edge( $source, $target );
468 $self->relations->add_equivalence_edge( $source, $target );
469 }
3a2ebbf4 470 # Note the witness in question
471 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 472}
b15511bf 473
3a2ebbf4 474sub del_path {
475 my $self = shift;
49d4f2ac 476 my @args;
477 if( ref( $_[0] ) eq 'ARRAY' ) {
478 my $e = shift @_;
479 @args = ( @$e, @_ );
480 } else {
481 @args = @_;
482 }
3a2ebbf4 483
484 # We only need the IDs for adding paths to the graph, not the reading
485 # objects themselves.
49d4f2ac 486 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 487
c1915ab9 488 $self->_graphcalc_done(0);
3a2ebbf4 489 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 490 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 491 }
30897024 492 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
3a2ebbf4 493 $self->sequence->delete_edge( $source, $target );
359944f7 494 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 495 }
784877d9 496}
497
3a2ebbf4 498
15d2d3df 499# Extra graph-alike utility
500sub has_path {
3a2ebbf4 501 my $self = shift;
502 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
503 return undef unless $self->sequence->has_edge( $source, $target );
504 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 505}
506
4e5a7b2c 507=head2 clear_witness( @sigil_list )
3a2ebbf4 508
4e5a7b2c 509Clear the given witnesses out of the collation entirely, removing references
510to them in paths, and removing readings that belong only to them. Should only
511be called via $tradition->del_witness.
3a2ebbf4 512
513=cut
514
4e5a7b2c 515sub clear_witness {
516 my( $self, @sigils ) = @_;
517
c1915ab9 518 $self->_graphcalc_done(0);
4e5a7b2c 519 # Clear the witness(es) out of the paths
520 foreach my $e ( $self->paths ) {
521 foreach my $sig ( @sigils ) {
522 $self->del_path( $e, $sig );
523 }
524 }
525
526 # Clear out the newly unused readings
527 foreach my $r ( $self->readings ) {
528 unless( $self->reading_witnesses( $r ) ) {
529 $self->del_reading( $r );
530 }
531 }
532}
3a2ebbf4 533
534sub add_relationship {
535 my $self = shift;
22222af9 536 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 537 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 538 $self->_graphcalc_done(0);
63778331 539 return @vectors;
22222af9 540}
ef9d481f 541
ca6e6095 542around qw/ get_relationship del_relationship / => sub {
543 my $orig = shift;
544 my $self = shift;
545 my @args = @_;
546 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
547 @args = @{$_[0]};
548 }
549 my( $source, $target ) = $self->_stringify_args( @args );
550 $self->$orig( $source, $target );
551};
552
22222af9 553=head2 reading_witnesses( $reading )
910a0a6d 554
22222af9 555Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 556
22222af9 557=cut
1d310495 558
1d310495 559sub reading_witnesses {
560 my( $self, $reading ) = @_;
561 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 562 # arbitrarily chosen "incoming". Thus, special-case the start node.
563 if( $reading eq $self->start ) {
bed6ce83 564 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
96dc90ec 565 }
1d310495 566 my %all_witnesses;
567 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
568 my $wits = $self->sequence->get_edge_attributes( @$e );
569 @all_witnesses{ keys %$wits } = 1;
570 }
c12bb878 571 my $acstr = $self->ac_label;
572 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
573 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
574 }
1d310495 575 return keys %all_witnesses;
910a0a6d 576}
577
4e5a7b2c 578=head1 OUTPUT METHODS
8e1394aa 579
0ecb975c 580=head2 as_svg( \%options )
8e1394aa 581
0068967c 582Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 583See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 584
585=cut
586
587sub as_svg {
0ecb975c 588 my( $self, $opts ) = @_;
bfcbcecb 589 throw( "Need GraphViz installed to output SVG" )
590 unless File::Which::which( 'dot' );
e247aad1 591 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 592 $self->calculate_ranks()
593 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
be3af600 594 my @cmd = qw/dot -Tsvg/;
595 my( $svg, $err );
596 my $dotfile = File::Temp->new();
597 ## USE FOR DEBUGGING
598 # $dotfile->unlink_on_destroy(0);
599 binmode $dotfile, ':utf8';
600 print $dotfile $self->as_dot( $opts );
601 push( @cmd, $dotfile->filename );
602 run( \@cmd, ">", binary(), \$svg );
603 $svg = decode_utf8( $svg );
604 return $svg;
8e1394aa 605}
606
b22576c6 607
0ecb975c 608=head2 as_dot( \%options )
b22576c6 609
0ecb975c 610Returns a string that is the collation graph expressed in dot
611(i.e. GraphViz) format. Options include:
b22576c6 612
0ecb975c 613=over 4
b22576c6 614
0ecb975c 615=item * from
b22576c6 616
0ecb975c 617=item * to
df6d9812 618
0ecb975c 619=item * color_common
620
621=back
df6d9812 622
623=cut
624
625sub as_dot {
0ecb975c 626 my( $self, $opts ) = @_;
627 my $startrank = $opts->{'from'} if $opts;
628 my $endrank = $opts->{'to'} if $opts;
629 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 630 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
631 && $self->end->rank > 100;
6648ee3d 632 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 633
b22576c6 634 # Check the arguments
635 if( $startrank ) {
636 return if $endrank && $startrank > $endrank;
637 return if $startrank > $self->end->rank;
638 }
639 if( defined $endrank ) {
640 return if $endrank < 0;
f1b3b33a 641 $endrank = undef if $endrank == $self->end->rank;
b22576c6 642 }
643
67da8d6c 644 my $graph_name = $self->tradition->name;
645 $graph_name =~ s/[^\w\s]//g;
646 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 647
648 my %graph_attrs = (
649 'rankdir' => 'LR',
650 'bgcolor' => 'none',
651 );
652 my %node_attrs = (
b8990398 653 'fontsize' => 14,
f13b5582 654 'fillcolor' => 'white',
655 'style' => 'filled',
656 'shape' => 'ellipse'
657 );
658 my %edge_attrs = (
659 'arrowhead' => 'open',
660 'color' => '#000000',
661 'fontcolor' => '#000000',
662 );
663
67da8d6c 664 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 665 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
666 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 667
b22576c6 668 # Output substitute start/end readings if necessary
669 if( $startrank ) {
43c94341 670 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 671 }
672 if( $endrank ) {
43c94341 673 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 674 }
b365fbae 675 if( $STRAIGHTENHACK ) {
676 ## HACK part 1
43c94341 677 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
678 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 679 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
680 }
b22576c6 681 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 682 # Sort the readings by rank if we have ranks; this speeds layout.
683 my @all_readings = $self->end->has_rank
684 ? sort { $a->rank <=> $b->rank } $self->readings
685 : $self->readings;
4633f9e4 686 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 687 foreach my $reading ( @all_readings ) {
b22576c6 688 # Only output readings within our rank range.
689 next if $startrank && $reading->rank < $startrank;
690 next if $endrank && $reading->rank > $endrank;
691 $used{$reading->id} = 1;
910a0a6d 692 # Need not output nodes without separate labels
3a2ebbf4 693 next if $reading->id eq $reading->text;
d4b75f44 694 my $rattrs;
30f0df34 695 my $label = $reading->text;
629e27b0 696 $label .= '-' if $reading->join_next;
697 $label = "-$label" if $reading->join_prior;
8f9cab7b 698 $label =~ s/\"/\\\"/g;
d4b75f44 699 $rattrs->{'label'} = $label;
10e4b1ac 700 $rattrs->{'id'} = $reading->id;
0ecb975c 701 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 702 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 703 }
3a2ebbf4 704
30ddc24c 705 # Add the real edges. Need to weight one edge per rank jump, in a
706 # continuous line.
b365fbae 707 # my $weighted = $self->_add_edge_weights;
b22576c6 708 my @edges = $self->paths;
3bdec618 709 my( %substart, %subend );
b22576c6 710 foreach my $edge ( @edges ) {
711 # Do we need to output this edge?
508fd430 712 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 713 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 714 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 715
b22576c6 716 # Account for the rank gap if necessary
30ddc24c 717 my $rank0 = $self->reading( $edge->[0] )->rank
718 if $self->reading( $edge->[0] )->has_rank;
719 my $rank1 = $self->reading( $edge->[1] )->rank
720 if $self->reading( $edge->[1] )->has_rank;
721 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
722 $variables->{'minlen'} = $rank1 - $rank0;
723 }
724
725 # Add the calculated edge weights
b365fbae 726 # if( exists $weighted->{$edge->[0]}
e247aad1 727 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
728 # # $variables->{'color'} = 'red';
729 # $variables->{'weight'} = 3.0;
730 # }
30ddc24c 731
508fd430 732 # EXPERIMENTAL: make edge width reflect no. of witnesses
733 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
734 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
735
f13b5582 736 my $varopts = _dot_attr_string( $variables );
737 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
738 $edge->[0], $edge->[1], $varopts );
3bdec618 739 } elsif( $used{$edge->[0]} ) {
96ba0418 740 $subend{$edge->[0]} = $edge->[1];
3bdec618 741 } elsif( $used{$edge->[1]} ) {
96ba0418 742 $substart{$edge->[1]} = $edge->[0];
b22576c6 743 }
df6d9812 744 }
bed6ce83 745
746 # If we are asked to, add relationship links
747 if( exists $opts->{show_relations} ) {
748 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
749 if( $filter eq 'transposition' ) {
750 $filter =~ qr/^transposition$/;
751 }
752 foreach my $redge ( $self->relationships ) {
753 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
754 if( $filter ne 'all' ) {
755 my $rel = $self->get_relationship( $redge );
756 next unless $rel->type =~ /$filter/;
757 my $variables = {
758 arrowhead => 'none',
759 color => '#FFA14F',
760 constraint => 'false',
761 label => uc( substr( $rel->type, 0, 4 ) ),
762 penwidth => '3',
763 };
764 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
765 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
766 }
767 }
768 }
769 }
770
3bdec618 771 # Add substitute start and end edges if necessary
772 foreach my $node ( keys %substart ) {
96ba0418 773 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 774 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 775 my $nrdg = $self->reading( $node );
776 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
777 # Substart is actually one lower than $startrank
778 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
779 }
f13b5582 780 my $varopts = _dot_attr_string( $variables );
96ba0418 781 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 782 }
783 foreach my $node ( keys %subend ) {
96ba0418 784 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 785 my $variables = { %edge_attrs, 'label' => $witstr };
786 my $varopts = _dot_attr_string( $variables );
96ba0418 787 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 788 }
b365fbae 789 # HACK part 2
790 if( $STRAIGHTENHACK ) {
43c94341 791 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
792 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 793 }
30ddc24c 794
df6d9812 795 $dot .= "}\n";
796 return $dot;
797}
798
f13b5582 799sub _dot_attr_string {
800 my( $hash ) = @_;
801 my @attrs;
802 foreach my $k ( sort keys %$hash ) {
803 my $v = $hash->{$k};
804 push( @attrs, $k.'="'.$v.'"' );
805 }
806 return( '[ ' . join( ', ', @attrs ) . ' ]' );
807}
808
30ddc24c 809sub _add_edge_weights {
810 my $self = shift;
811 # Walk the graph from START to END, choosing the successor node with
812 # the largest number of witness paths each time.
813 my $weighted = {};
814 my $curr = $self->start->id;
008fc8a6 815 my $ranked = $self->end->has_rank;
30ddc24c 816 while( $curr ne $self->end->id ) {
008fc8a6 817 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 818 my @succ = sort { $self->path_witnesses( $curr, $a )
819 <=> $self->path_witnesses( $curr, $b ) }
820 $self->sequence->successors( $curr );
821 my $next = pop @succ;
008fc8a6 822 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 823 # Try to avoid lacunae in the weighted path.
008fc8a6 824 while( @succ &&
825 ( $self->reading( $next )->is_lacuna ||
826 $nextrank - $rank > 1 ) ){
30ddc24c 827 $next = pop @succ;
828 }
829 $weighted->{$curr} = $next;
830 $curr = $next;
831 }
832 return $weighted;
833}
834
027d819c 835=head2 path_witnesses( $edge )
836
837Returns the list of sigils whose witnesses are associated with the given edge.
838The edge can be passed as either an array or an arrayref of ( $source, $target ).
839
840=cut
841
3a2ebbf4 842sub path_witnesses {
843 my( $self, @edge ) = @_;
844 # If edge is an arrayref, cope.
845 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
846 my $e = shift @edge;
847 @edge = @$e;
848 }
849 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 850 return @wits;
3a2ebbf4 851}
852
7f9f05e8 853# Helper function. Make a display label for the given witnesses, showing a.c.
854# witnesses only where the main witness is not also in the list.
027d819c 855sub _path_display_label {
508fd430 856 my $self = shift;
7f9f05e8 857 my %wits;
858 map { $wits{$_} = 1 } @_;
859
860 # If an a.c. wit is listed, remove it if the main wit is also listed.
861 # Otherwise keep it for explicit listing.
862 my $aclabel = $self->ac_label;
863 my @disp_ac;
864 foreach my $w ( sort keys %wits ) {
865 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
866 if( exists $wits{$1} ) {
867 delete $wits{$w};
868 } else {
869 push( @disp_ac, $w );
870 }
871 }
872 }
873
874 # See if we are in a majority situation.
8f9cab7b 875 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 876 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 877 if( scalar keys %wits > $maj ) {
878 unshift( @disp_ac, 'majority' );
879 return join( ', ', @disp_ac );
8f9cab7b 880 } else {
7f9f05e8 881 return join( ', ', sort keys %wits );
8f9cab7b 882 }
883}
1dd07bda 884
bf6e338d 885=head2 readings_at_rank( $rank )
1dd07bda 886
bf6e338d 887Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 888
889=cut
890
bf6e338d 891sub readings_at_rank {
1dd07bda 892 my( $self, $rank ) = @_;
bf6e338d 893 my $table = $self->alignment_table;
894 # Table rank is real rank - 1.
895 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
896 my %readings;
897 foreach my $e ( @elements ) {
898 next unless ref( $e ) eq 'HASH';
899 next unless exists $e->{'t'};
900 $readings{$e->{'t'}->id} = $e->{'t'};
901 }
902 return values %readings;
1dd07bda 903}
8f9cab7b 904
4e5a7b2c 905=head2 as_graphml
8e1394aa 906
4e5a7b2c 907Returns a GraphML representation of the collation. The GraphML will contain
908two graphs. The first expresses the attributes of the readings and the witness
909paths that link them; the second expresses the relationships that link the
910readings. This is the native transfer format for a tradition.
8e1394aa 911
56eefa04 912=begin testing
913
914use Text::Tradition;
951ddfe8 915use TryCatch;
56eefa04 916
917my $READINGS = 311;
918my $PATHS = 361;
919
920my $datafile = 't/data/florilegium_tei_ps.xml';
921my $tradition = Text::Tradition->new( 'input' => 'TEI',
922 'name' => 'test0',
923 'file' => $datafile,
924 'linear' => 1 );
925
926ok( $tradition, "Got a tradition object" );
927is( scalar $tradition->witnesses, 13, "Found all witnesses" );
928ok( $tradition->collation, "Tradition has a collation" );
929
930my $c = $tradition->collation;
931is( scalar $c->readings, $READINGS, "Collation has all readings" );
932is( scalar $c->paths, $PATHS, "Collation has all paths" );
933is( scalar $c->relationships, 0, "Collation has all relationships" );
934
935# Add a few relationships
936$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
937$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
938$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
939
940# Now write it to GraphML and parse it again.
941
942my $graphml = $c->as_graphml;
943my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
944is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
945is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
946is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
947
9fef629b 948# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 949SKIP: {
37bf09f4 950 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 951 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
952 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
953 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
954 $graphml = $c->as_graphml;
955 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
956}
2a812726 957
56eefa04 958=end testing
959
8e1394aa 960=cut
961
a445ce40 962## TODO MOVE this to Tradition.pm and modularize it better
8e1394aa 963sub as_graphml {
a30ca502 964 my( $self, $options ) = @_;
3d14b48e 965 $self->calculate_ranks unless $self->_graphcalc_done;
966
a30ca502 967 my $start = $options->{'from'}
968 ? $self->reading( $options->{'from'} ) : $self->start;
969 my $end = $options->{'to'}
970 ? $self->reading( $options->{'to'} ) : $self->end;
971 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
972 throw( 'Start node must be before end node' );
973 }
974 # The readings need to be ranked for this to work.
975 $start = $self->start unless $start->has_rank;
976 $end = $self->end unless $end->has_rank;
414cc046 977 my $rankoffset = 0;
978 unless( $start eq $self->start ) {
979 $rankoffset = $start->rank - 1;
980 }
a30ca502 981 my %use_readings;
982
8e1394aa 983 # Some namespaces
984 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
985 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
986 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 987 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 988
989 # Create the document and root node
428bcf0b 990 require XML::LibXML;
8e1394aa 991 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
992 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
993 $graphml->setDocumentElement( $root );
994 $root->setNamespace( $xsi_ns, 'xsi', 0 );
995 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 996
997 # List of attribute types to save on our objects and their corresponding
998 # GraphML types
999 my %save_types = (
1000 'Str' => 'string',
1001 'Int' => 'int',
1002 'Bool' => 'boolean',
10e4b1ac 1003 'ReadingID' => 'string',
bbd064a9 1004 'RelationshipType' => 'string',
1005 'RelationshipScope' => 'string',
1006 );
1007
bbd064a9 1008 # Add the data keys for the graph. Include an extra key 'version' for the
1009 # GraphML output version.
e309421a 1010 my %graph_data_keys;
1011 my $gdi = 0;
bbd064a9 1012 my %graph_attributes = ( 'version' => 'string' );
1013 # Graph attributes include those of Tradition and those of Collation.
1014 my %gattr_from;
1015 my $tmeta = $self->tradition->meta;
1016 my $cmeta = $self->meta;
1017 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1018 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1019 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1020 next if $attr->name =~ /^_/;
bbd064a9 1021 next unless $save_types{$attr->type_constraint->name};
1022 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1023 }
9fef629b 1024 # Extra custom keys for complex objects that should be saved in some form.
1025 # The subroutine should return a string, or undef/empty.
951ddfe8 1026 if( $tmeta->has_method('stemmata') ) {
1027 $graph_attributes{'stemmata'} = sub {
1028 my @stemstrs;
1029 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1030 $self->tradition->stemmata;
1031 join( "\n", @stemstrs );
1032 };
1033 }
1034
8943ff68 1035 if( $tmeta->has_method('user') ) {
1036 $graph_attributes{'user'} = sub {
1037 $self->tradition->user ? $self->tradition->user->id : undef
1038 };
1039 }
bbd064a9 1040
1041 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1042 $graph_data_keys{$datum} = 'dg'.$gdi++;
1043 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1044 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1045 : $graph_attributes{$datum};
e309421a 1046 $key->setAttribute( 'attr.name', $datum );
9fef629b 1047 $key->setAttribute( 'attr.type', $dtype );
e309421a 1048 $key->setAttribute( 'for', 'graph' );
1049 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1050 }
f6066bac 1051
bbd064a9 1052 # Add the data keys for reading nodes
1053 my %reading_attributes;
1054 my $rmeta = Text::Tradition::Collation::Reading->meta;
1055 foreach my $attr( $rmeta->get_all_attributes ) {
1056 next if $attr->name =~ /^_/;
bbd064a9 1057 next unless $save_types{$attr->type_constraint->name};
1058 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1059 }
a445ce40 1060 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1061 # Extra custom key for the reading morphology
1062 $reading_attributes{'lexemes'} = 'string';
1063 }
7cd9f181 1064
ef9d481f 1065 my %node_data_keys;
1066 my $ndi = 0;
bbd064a9 1067 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1068 $node_data_keys{$datum} = 'dn'.$ndi++;
1069 my $key = $root->addNewChild( $graphml_ns, 'key' );
1070 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1071 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1072 $key->setAttribute( 'for', 'node' );
1073 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1074 }
1075
bbd064a9 1076 # Add the data keys for edges, that is, paths and relationships. Path
1077 # data does not come from a Moose class so is here manually.
ef9d481f 1078 my $edi = 0;
1079 my %edge_data_keys;
bbd064a9 1080 my %edge_attributes = (
3a2ebbf4 1081 witness => 'string', # ID/label for a path
3a2ebbf4 1082 extra => 'boolean', # Path key
3a2ebbf4 1083 );
bbd064a9 1084 my @path_attributes = keys %edge_attributes; # track our manual additions
1085 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1086 foreach my $attr( $pmeta->get_all_attributes ) {
1087 next if $attr->name =~ /^_/;
bbd064a9 1088 next unless $save_types{$attr->type_constraint->name};
1089 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1090 }
1091 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1092 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1093 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1094 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1095 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1096 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1097 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1098 }
3a2ebbf4 1099
cc31ebaa 1100 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1101 my $xmlidname = $self->tradition->name;
1102 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1103 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1104 $xmlidname = '_'.$xmlidname;
1105 }
2c669bca 1106 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1107 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1108 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1109 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1110 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1111 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1112 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1113 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1114
2a812726 1115 # Tradition/collation attribute data
bbd064a9 1116 foreach my $datum ( keys %graph_attributes ) {
1117 my $value;
1118 if( $datum eq 'version' ) {
2a812726 1119 $value = '3.2';
9fef629b 1120 } elsif( ref( $graph_attributes{$datum} ) ) {
1121 my $sub = $graph_attributes{$datum};
1122 $value = &$sub();
bbd064a9 1123 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1124 $value = $self->tradition->$datum;
1125 } else {
1126 $value = $self->$datum;
1127 }
2c669bca 1128 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1129 }
8e1394aa 1130
1131 my $node_ctr = 0;
1132 my %node_hash;
22222af9 1133 # Add our readings to the graph
3a2ebbf4 1134 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1135 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1136 ( $n->rank < $start->rank || $n->rank > $end->rank );
1137 $use_readings{$n->id} = 1;
2c669bca 1138 # Add to the main graph
1139 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1140 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1141 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1142 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1143 foreach my $d ( keys %reading_attributes ) {
255875b8 1144 my $nval = $n->$d;
7cd9f181 1145 # Custom serialization
1146 if( $d eq 'lexemes' ) {
1147 # If nval is a true value, we have lexemes so we need to
1148 # serialize them. Otherwise set nval to undef so that the
1149 # key is excluded from this reading.
1150 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1151 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1152 $nval = undef;
7cd9f181 1153 }
cc31ebaa 1154 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1155 # Adjust the ranks within the subgraph.
cc31ebaa 1156 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1157 : $nval - $rankoffset;
414cc046 1158 }
255875b8 1159 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1160 if defined $nval;
1161 }
b15511bf 1162 }
1163
2c669bca 1164 # Add the path edges to the sequence graph
df6d9812 1165 my $edge_ctr = 0;
3a2ebbf4 1166 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1167 # We add an edge in the graphml for every witness in $e.
a30ca502 1168 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1169 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1170 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1171 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1172 # Skip any path from start to end; that witness is not in the subgraph.
1173 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1174 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1175 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1176 $node_hash{ $e->[0] },
1177 $node_hash{ $e->[1] } );
2c669bca 1178 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1179 $edge_el->setAttribute( 'source', $from );
1180 $edge_el->setAttribute( 'target', $to );
1181 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1182
1183 # It's a witness path, so add the witness
1184 my $base = $wit;
1185 my $key = $edge_data_keys{'witness'};
1186 # Is this an ante-corr witness?
1187 my $aclabel = $self->ac_label;
1188 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1189 # Keep the base witness
1190 $base = $1;
1191 # ...and record that this is an 'extra' reading path
1192 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1193 }
1194 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1195 }
1196 }
1197
cc31ebaa 1198 # Report the actual number of nodes and edges that went in
1199 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1200 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1201
22222af9 1202 # Add the relationship graph to the XML
bbd064a9 1203 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1204 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1205 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1206
94c00c71 1207 # Save and return the thing
1208 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1209 return $result;
df6d9812 1210}
1211
b15511bf 1212sub _add_graphml_data {
1213 my( $el, $key, $value ) = @_;
b15511bf 1214 return unless defined $value;
c9bf3dbf 1215 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1216 $data_el->setAttribute( 'key', $key );
1217 $data_el->appendText( $value );
8e1394aa 1218}
1219
4e5a7b2c 1220=head2 as_csv
910a0a6d 1221
1222Returns a CSV alignment table representation of the collation graph, one
2c669bca 1223row per witness (or witness uncorrected.)
910a0a6d 1224
1225=cut
1226
1227sub as_csv {
3a2ebbf4 1228 my( $self ) = @_;
1dd07bda 1229 my $table = $self->alignment_table;
82fa4d57 1230 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1231 my @result;
2c669bca 1232 # Make the header row
1233 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1234 push( @result, decode_utf8( $csv->string ) );
1235 # Make the rest of the rows
1236 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1237 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1238 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1239 $csv->combine( @row );
910a0a6d 1240 push( @result, decode_utf8( $csv->string ) );
1241 }
3a2ebbf4 1242 return join( "\n", @result );
910a0a6d 1243}
1244
248276a2 1245=head2 alignment_table
2c669bca 1246
566f4595 1247Return a reference to an alignment table, in a slightly enhanced CollateX
1248format which looks like this:
1249
1250 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1251 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1252 { witness => "SIG2",
4e5a7b2c 1253 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1254 ... ],
1255 length => TEXTLEN };
1256
2c669bca 1257=cut
9f3ba6f7 1258
1dd07bda 1259sub alignment_table {
1260 my( $self ) = @_;
c1915ab9 1261 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1262 return $self->cached_table if $self->has_cached_table;
1263
0ecb975c 1264 # Make sure we can do this
1265 throw( "Need a linear graph in order to make an alignment table" )
1266 unless $self->linear;
1267 $self->calculate_ranks unless $self->end->has_rank;
1268
2c669bca 1269 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1270 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1271 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1272 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1273 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1274 my @row = _make_witness_row( \@wit_path, \@all_pos );
bed6ce83 1275 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1276 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1277 push( @{$table->{'alignment'}}, $witobj );
1f7aa795 1278 if( $wit->is_layered ) {
1279 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1280 $wit->sigil.$self->ac_label );
1dd07bda 1281 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
bed6ce83 1282 my $witacobj = { 'witness' => $wit->sigil.$self->ac_label,
1283 'tokens' => \@ac_row };
1284 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1285 push( @{$table->{'alignment'}}, $witacobj );
910a0a6d 1286 }
1287 }
1dd07bda 1288 $self->cached_table( $table );
1289 return $table;
910a0a6d 1290}
1291
1292sub _make_witness_row {
1dd07bda 1293 my( $path, $positions ) = @_;
910a0a6d 1294 my %char_hash;
1295 map { $char_hash{$_} = undef } @$positions;
2c669bca 1296 my $debug = 0;
910a0a6d 1297 foreach my $rdg ( @$path ) {
6771a1b1 1298 say STDERR "rank " . $rdg->rank if $debug;
1299 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1300 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1301 }
1302 my @row = map { $char_hash{$_} } @$positions;
eca16057 1303 # Fill in lacuna markers for undef spots in the row
1304 my $last_el = shift @row;
1305 my @filled_row = ( $last_el );
1306 foreach my $el ( @row ) {
0e476982 1307 # If we are using node reference, make the lacuna node appear many times
1308 # in the table. If not, use the lacuna tag.
1dd07bda 1309 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1310 $el = $last_el;
eca16057 1311 }
1312 push( @filled_row, $el );
1313 $last_el = $el;
1314 }
1315 return @filled_row;
910a0a6d 1316}
1317
248276a2 1318
4e5a7b2c 1319=head1 NAVIGATION METHODS
910a0a6d 1320
4e5a7b2c 1321=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1322
1323Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1324with $last, for the witness given in $sigil. If a $backup sigil is
1325specified (e.g. when walking a layered witness), it will be used wherever
1326no $sigil path exists. If there is a base text reading, that will be
1327used wherever no path exists for $sigil or $backup.
e2902068 1328
1329=cut
1330
910a0a6d 1331# TODO Think about returning some lazy-eval iterator.
b0b4421a 1332# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1333
e2902068 1334sub reading_sequence {
861c3e27 1335 my( $self, $start, $end, $witness ) = @_;
e2902068 1336
930ff666 1337 $witness = $self->baselabel unless $witness;
e2902068 1338 my @readings = ( $start );
1339 my %seen;
1340 my $n = $start;
3a2ebbf4 1341 while( $n && $n->id ne $end->id ) {
1342 if( exists( $seen{$n->id} ) ) {
63778331 1343 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1344 }
3a2ebbf4 1345 $seen{$n->id} = 1;
910a0a6d 1346
861c3e27 1347 my $next = $self->next_reading( $n, $witness );
44771cf2 1348 unless( $next ) {
63778331 1349 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1350 }
910a0a6d 1351 push( @readings, $next );
1352 $n = $next;
e2902068 1353 }
1354 # Check that the last reading is our end reading.
1355 my $last = $readings[$#readings];
63778331 1356 throw( "Last reading found from " . $start->text .
1357 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1358 unless $last->id eq $end->id;
e2902068 1359
1360 return @readings;
1361}
1362
4e5a7b2c 1363=head2 next_reading( $reading, $sigil );
8e1394aa 1364
4a8828f0 1365Returns the reading that follows the given reading along the given witness
930ff666 1366path.
8e1394aa 1367
1368=cut
1369
4a8828f0 1370sub next_reading {
e2902068 1371 # Return the successor via the corresponding path.
8e1394aa 1372 my $self = shift;
3a2ebbf4 1373 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1374 return undef unless $answer;
3a2ebbf4 1375 return $self->reading( $answer );
8e1394aa 1376}
1377
4e5a7b2c 1378=head2 prior_reading( $reading, $sigil )
8e1394aa 1379
4a8828f0 1380Returns the reading that precedes the given reading along the given witness
930ff666 1381path.
8e1394aa 1382
1383=cut
1384
4a8828f0 1385sub prior_reading {
e2902068 1386 # Return the predecessor via the corresponding path.
8e1394aa 1387 my $self = shift;
3a2ebbf4 1388 my $answer = $self->_find_linked_reading( 'prior', @_ );
1389 return $self->reading( $answer );
8e1394aa 1390}
1391
4a8828f0 1392sub _find_linked_reading {
861c3e27 1393 my( $self, $direction, $node, $path ) = @_;
1394
1395 # Get a backup if we are dealing with a layered witness
1396 my $alt_path;
1397 my $aclabel = $self->ac_label;
1398 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1399 $alt_path = $1;
1400 }
1401
e2902068 1402 my @linked_paths = $direction eq 'next'
3a2ebbf4 1403 ? $self->sequence->edges_from( $node )
1404 : $self->sequence->edges_to( $node );
e2902068 1405 return undef unless scalar( @linked_paths );
8e1394aa 1406
e2902068 1407 # We have to find the linked path that contains all of the
1408 # witnesses supplied in $path.
1409 my( @path_wits, @alt_path_wits );
4e5a7b2c 1410 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1411 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1412 my $base_le;
1413 my $alt_le;
1414 foreach my $le ( @linked_paths ) {
3a2ebbf4 1415 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1416 $base_le = $le;
910a0a6d 1417 }
508fd430 1418 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1419 if( _is_within( \@path_wits, \@le_wits ) ) {
1420 # This is the right path.
1421 return $direction eq 'next' ? $le->[1] : $le->[0];
1422 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1423 $alt_le = $le;
1424 }
8e1394aa 1425 }
e2902068 1426 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1427 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1428 if $alt_le;
e2902068 1429
1430 # Got this far? Return the base path if it exists.
3a2ebbf4 1431 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1432 if $base_le;
e2902068 1433
1434 # Got this far? We have no appropriate path.
2c669bca 1435 warn "Could not find $direction node from " . $node->id
910a0a6d 1436 . " along path $path";
8e1394aa 1437 return undef;
1438}
1439
4a8828f0 1440# Some set logic.
1441sub _is_within {
1442 my( $set1, $set2 ) = @_;
7854e12e 1443 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1444 foreach my $el ( @$set1 ) {
910a0a6d 1445 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1446 }
1447 return $ret;
1448}
1449
4e5a7b2c 1450# Return the string that joins together a list of witnesses for
1451# display on a single path.
1452sub _witnesses_of_label {
1453 my( $self, $label ) = @_;
1454 my $regex = $self->wit_list_separator;
1455 my @answer = split( /\Q$regex\E/, $label );
1456 return @answer;
b0b4421a 1457}
1458
d4b75f44 1459=head2 common_readings
1460
1461Returns the list of common readings in the graph (i.e. those readings that are
1462shared by all non-lacunose witnesses.)
1463
1464=cut
1465
1466sub common_readings {
1467 my $self = shift;
1468 my @common = grep { $_->is_common } $self->readings;
1469 return @common;
1470}
1471
fae52efd 1472=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1473
1474Returns the text of a witness (plus its backup, if we are using a layer)
1475as stored in the collation. The text is returned as a string, where the
1476individual readings are joined with spaces and the meta-readings (e.g.
1477lacunae) are omitted. Optional specification of $start and $end allows
1478the generation of a subset of the witness text.
4e5a7b2c 1479
b0b4421a 1480=cut
1481
1482sub path_text {
861c3e27 1483 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1484 $start = $self->start unless $start;
1485 $end = $self->end unless $end;
861c3e27 1486 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1487 my $pathtext = '';
1488 my $last;
1489 foreach my $r ( @path ) {
6ad2ce78 1490 unless ( $r->join_prior || !$last || $last->join_next ) {
1491 $pathtext .= ' ';
1492 }
1493 $pathtext .= $r->text;
629e27b0 1494 $last = $r;
1495 }
1496 return $pathtext;
b0b4421a 1497}
4e5a7b2c 1498
1499=head1 INITIALIZATION METHODS
1500
1501These are mostly for use by parsers.
1502
1503=head2 make_witness_path( $witness )
1504
1505Link the array of readings contained in $witness->path (and in
1506$witness->uncorrected_path if it exists) into collation paths.
1507Clear out the arrays when finished.
de51424a 1508
4e5a7b2c 1509=head2 make_witness_paths
1510
1511Call make_witness_path for all witnesses in the tradition.
1512
1513=cut
930ff666 1514
7e450e44 1515# For use when a collation is constructed from a base text and an apparatus.
1516# We have the sequences of readings and just need to add path edges.
1f7aa795 1517# When we are done, clear out the witness path attributes, as they are no
1518# longer needed.
1519# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1520
6a222840 1521sub make_witness_paths {
1522 my( $self ) = @_;
910a0a6d 1523 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 1524 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 1525 $self->make_witness_path( $wit );
7854e12e 1526 }
7854e12e 1527}
1528
6a222840 1529sub make_witness_path {
7854e12e 1530 my( $self, $wit ) = @_;
1531 my @chain = @{$wit->path};
15d2d3df 1532 my $sig = $wit->sigil;
fae52efd 1533 # Add start and end if necessary
1534 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1535 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1536 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1537 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1538 }
1f7aa795 1539 if( $wit->is_layered ) {
d9e873d0 1540 @chain = @{$wit->uncorrected_path};
fae52efd 1541 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1542 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1543 foreach my $idx( 0 .. $#chain-1 ) {
1544 my $source = $chain[$idx];
1545 my $target = $chain[$idx+1];
1546 $self->add_path( $source, $target, $sig.$self->ac_label )
1547 unless $self->has_path( $source, $target, $sig );
1548 }
15d2d3df 1549 }
1f7aa795 1550 $wit->clear_path;
1551 $wit->clear_uncorrected_path;
e2902068 1552}
1553
4e5a7b2c 1554=head2 calculate_ranks
1555
1556Calculate the reading ranks (that is, their aligned positions relative
1557to each other) for the graph. This can only be called on linear collations.
1558
b365fbae 1559=begin testing
1560
1561use Text::Tradition;
1562
1563my $cxfile = 't/data/Collatex-16.xml';
1564my $t = Text::Tradition->new(
1565 'name' => 'inline',
1566 'input' => 'CollateX',
1567 'file' => $cxfile,
1568 );
1569my $c = $t->collation;
1570
1571# Make an svg
bfcbcecb 1572my $table = $c->alignment_table;
1573ok( $c->has_cached_table, "Alignment table was cached" );
1574is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1575$c->calculate_ranks;
bfcbcecb 1576is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1577$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1578isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1579
1580=end testing
1581
4e5a7b2c 1582=cut
1583
910a0a6d 1584sub calculate_ranks {
1585 my $self = shift;
b365fbae 1586 # Save the existing ranks, in case we need to invalidate the cached SVG.
1587 my %existing_ranks;
ac4d7ac2 1588 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1589
1590 # Do the rankings based on the relationship equivalence graph, starting
1591 # with the start node.
56772e8c 1592 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1593
910a0a6d 1594 # Transfer our rankings from the topological graph to the real one.
1595 foreach my $r ( $self->readings ) {
cecbe56d 1596 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1597 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1598 } else {
63778331 1599 # Die. Find the last rank we calculated.
359944f7 1600 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1601 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1602 $self->readings;
1603 my $last = pop @all_defined;
1604 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1605 }
de51424a 1606 }
bfcbcecb 1607 # Do we need to invalidate the cached data?
be3af600 1608 if( $self->has_cached_table ) {
b365fbae 1609 foreach my $r ( $self->readings ) {
7c293912 1610 next if defined( $existing_ranks{$r} )
1611 && $existing_ranks{$r} == $r->rank;
c1915ab9 1612 # Something has changed, so clear the cache
bfcbcecb 1613 $self->_clear_cache;
c1915ab9 1614 # ...and recalculate the common readings.
1615 $self->calculate_common_readings();
b365fbae 1616 last;
1617 }
1618 }
c1915ab9 1619 # The graph calculation information is now up to date.
1620 $self->_graphcalc_done(1);
8e1394aa 1621}
3a1f2523 1622
c1915ab9 1623sub _clear_cache {
1624 my $self = shift;
c1915ab9 1625 $self->wipe_table if $self->has_cached_table;
1626}
1627
1628
4e5a7b2c 1629=head2 flatten_ranks
1630
1631A convenience method for parsing collation data. Searches the graph for readings
1632with the same text at the same rank, and merges any that are found.
1633
1634=cut
1635
0e476982 1636sub flatten_ranks {
1637 my $self = shift;
1638 my %unique_rank_rdg;
bf6e338d 1639 my $changed;
0e476982 1640 foreach my $rdg ( $self->readings ) {
1641 next unless $rdg->has_rank;
1642 my $key = $rdg->rank . "||" . $rdg->text;
1643 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1644 # Make sure they don't have different grammatical forms
1645 my $ur = $unique_rank_rdg{$key};
a445ce40 1646 if( $rdg->is_identical( $ur ) ) {
1647 # Combine!
1648 #say STDERR "Combining readings at same rank: $key";
1649 $changed = 1;
1650 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1651 # TODO see if this now makes a common point.
07e6765f 1652 }
0e476982 1653 } else {
1654 $unique_rank_rdg{$key} = $rdg;
1655 }
1656 }
bf6e338d 1657 # If we merged readings, the ranks are still fine but the alignment
1658 # table is wrong. Wipe it.
1659 $self->wipe_table() if $changed;
0e476982 1660}
4633f9e4 1661
1662
d4b75f44 1663=head2 calculate_common_readings
1664
1665Goes through the graph identifying the readings that appear in every witness
1666(apart from those with lacunae at that spot.) Marks them as common and returns
1667the list.
1668
1669=begin testing
1670
1671use Text::Tradition;
1672
1673my $cxfile = 't/data/Collatex-16.xml';
1674my $t = Text::Tradition->new(
1675 'name' => 'inline',
1676 'input' => 'CollateX',
1677 'file' => $cxfile,
1678 );
1679my $c = $t->collation;
1680
1681my @common = $c->calculate_common_readings();
1682is( scalar @common, 8, "Found correct number of common readings" );
1683my @marked = sort $c->common_readings();
1684is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1685my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1686is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1687
1688=end testing
1689
1690=cut
1691
1692sub calculate_common_readings {
1693 my $self = shift;
1694 my @common;
c1915ab9 1695 map { $_->is_common( 0 ) } $self->readings;
1696 # Implicitly calls calculate_ranks
1dd07bda 1697 my $table = $self->alignment_table;
d4b75f44 1698 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1699 my @row = map { $_->{'tokens'}->[$idx]
1700 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1701 @{$table->{'alignment'}};
d4b75f44 1702 my %hash;
1703 foreach my $r ( @row ) {
1704 if( $r ) {
1705 $hash{$r->id} = $r unless $r->is_meta;
1706 } else {
1707 $hash{'UNDEF'} = $r;
1708 }
1709 }
1710 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1711 my( $r ) = values %hash;
1712 $r->is_common( 1 );
1713 push( @common, $r );
1714 }
1715 }
1716 return @common;
1717}
1718
861c3e27 1719=head2 text_from_paths
1720
1721Calculate the text array for all witnesses from the path, for later consistency
1722checking. Only to be used if there is no non-graph-based way to know the
1723original texts.
1724
1725=cut
1726
1727sub text_from_paths {
1728 my $self = shift;
1729 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1730 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1731 my @text;
1732 foreach my $r ( @readings ) {
1733 next if $r->is_meta;
1734 push( @text, $r->text );
1735 }
861c3e27 1736 $wit->text( \@text );
1737 if( $wit->is_layered ) {
5164a6f0 1738 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1739 $wit->sigil.$self->ac_label );
1740 my @uctext;
1741 foreach my $r ( @ucrdgs ) {
1742 next if $r->is_meta;
1743 push( @uctext, $r->text );
1744 }
1745 $wit->layertext( \@uctext );
861c3e27 1746 }
1747 }
1748}
0e476982 1749
4e5a7b2c 1750=head1 UTILITY FUNCTIONS
1751
1752=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1753
4e5a7b2c 1754Find the last reading that occurs in sequence before both the given readings.
414cc046 1755At the very least this should be $self->start.
4e5a7b2c 1756
1757=head2 common_successor( $reading_a, $reading_b )
1758
1759Find the first reading that occurs in sequence after both the given readings.
414cc046 1760At the very least this should be $self->end.
4e5a7b2c 1761
22222af9 1762=begin testing
1763
1764use Text::Tradition;
1765
1766my $cxfile = 't/data/Collatex-16.xml';
1767my $t = Text::Tradition->new(
1768 'name' => 'inline',
1769 'input' => 'CollateX',
1770 'file' => $cxfile,
1771 );
1772my $c = $t->collation;
1773
679f17e1 1774is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1775 'n20', "Found correct common predecessor" );
679f17e1 1776is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1777 '__END__', "Found correct common successor" );
22222af9 1778
4e5a7b2c 1779is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1780 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1781is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1782 '__END__', "Found correct common successor for readings on same path" );
22222af9 1783
1784=end testing
1785
1786=cut
1787
1788## Return the closest reading that is a predecessor of both the given readings.
1789sub common_predecessor {
1790 my $self = shift;
4e5a7b2c 1791 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1792 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1793}
1794
1795sub common_successor {
1796 my $self = shift;
4e5a7b2c 1797 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1798 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1799}
1800
414cc046 1801
1802# TODO think about how to do this without ranks...
027d819c 1803sub _common_in_path {
22222af9 1804 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1805 my $iter = $self->end->rank;
22222af9 1806 my @candidates;
414cc046 1807 my @last_r1 = ( $r1 );
1808 my @last_r2 = ( $r2 );
1809 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1810 my %all_seen;
6771a1b1 1811 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 1812 while( !@candidates ) {
414cc046 1813 last unless $iter--; # Avoid looping infinitely
1814 # Iterate separately down the graph from r1 and r2
1815 my( @new_lc1, @new_lc2 );
1816 foreach my $lc ( @last_r1 ) {
1817 foreach my $p ( $lc->$dir ) {
1818 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 1819 # say STDERR "Path candidate $p from $lc";
414cc046 1820 push( @candidates, $p );
002e3600 1821 } elsif( !$all_seen{$p->id} ) {
414cc046 1822 $all_seen{$p->id} = 'r1';
1823 push( @new_lc1, $p );
1824 }
1825 }
1826 }
1827 foreach my $lc ( @last_r2 ) {
22222af9 1828 foreach my $p ( $lc->$dir ) {
414cc046 1829 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 1830 # say STDERR "Path candidate $p from $lc";
22222af9 1831 push( @candidates, $p );
002e3600 1832 } elsif( !$all_seen{$p->id} ) {
414cc046 1833 $all_seen{$p->id} = 'r2';
1834 push( @new_lc2, $p );
22222af9 1835 }
1836 }
1837 }
414cc046 1838 @last_r1 = @new_lc1;
1839 @last_r2 = @new_lc2;
22222af9 1840 }
1841 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1842 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1843}
1844
63778331 1845sub throw {
1846 Text::Tradition::Error->throw(
1847 'ident' => 'Collation error',
1848 'message' => $_[0],
1849 );
1850}
1851
dd3b58b0 1852no Moose;
1853__PACKAGE__->meta->make_immutable;
e867486f 1854
a445ce40 1855=head1 BUGS/TODO
1856
1857=over
1858
1859=item * Rework XML serialization in a more modular way
1860
1861=back
1862
027d819c 1863=head1 LICENSE
e867486f 1864
027d819c 1865This package is free software and is provided "as is" without express
1866or implied warranty. You can redistribute it and/or modify it under
1867the same terms as Perl itself.
e867486f 1868
027d819c 1869=head1 AUTHOR
e867486f 1870
027d819c 1871Tara L Andrews E<lt>aurum@cpan.orgE<gt>