load XML::LibXML only when required; handle global relationships more correctly;...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
82fa4d57 9use Text::CSV;
b15511bf 10use Text::Tradition::Collation::Reading;
22222af9 11use Text::Tradition::Collation::RelationshipStore;
63778331 12use Text::Tradition::Error;
cc31ebaa 13use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
202ccb18 31 get_relationship => 'get_relationship',
ee801e17 32 del_relationship => 'del_relationship',
359944f7 33 equivalence => 'equivalence',
34 equivalence_graph => 'equivalence_graph',
22222af9 35 },
36 writer => '_set_relations',
3a2ebbf4 37 );
dd3b58b0 38
3a2ebbf4 39has 'tradition' => (
40 is => 'ro',
d047cd52 41 isa => 'Text::Tradition',
8cfd99c4 42 writer => '_set_tradition',
8d9a1cd8 43 weak_ref => 1,
d047cd52 44 );
dd3b58b0 45
3a2ebbf4 46has 'readings' => (
47 isa => 'HashRef[Text::Tradition::Collation::Reading]',
48 traits => ['Hash'],
49 handles => {
50 reading => 'get',
51 _add_reading => 'set',
52 del_reading => 'delete',
53 has_reading => 'exists',
54 readings => 'values',
55 },
56 default => sub { {} },
57 );
910a0a6d 58
4a8828f0 59has 'wit_list_separator' => (
7854e12e 60 is => 'rw',
61 isa => 'Str',
62 default => ', ',
63 );
64
65has 'baselabel' => (
66 is => 'rw',
67 isa => 'Str',
68 default => 'base text',
69 );
4a8828f0 70
15d2d3df 71has 'linear' => (
72 is => 'rw',
73 isa => 'Bool',
74 default => 1,
75 );
c84275ff 76
ef9d481f 77has 'ac_label' => (
78 is => 'rw',
79 isa => 'Str',
80 default => ' (a.c.)',
81 );
3a2ebbf4 82
4e483aa5 83has 'wordsep' => (
84 is => 'rw',
85 isa => 'Str',
86 default => ' ',
87 );
88
3a2ebbf4 89has 'start' => (
90 is => 'ro',
91 isa => 'Text::Tradition::Collation::Reading',
92 writer => '_set_start',
93 weak_ref => 1,
94 );
95
96has 'end' => (
97 is => 'ro',
98 isa => 'Text::Tradition::Collation::Reading',
99 writer => '_set_end',
100 weak_ref => 1,
101 );
b365fbae 102
103has 'cached_svg' => (
104 is => 'rw',
105 isa => 'Str',
106 predicate => 'has_cached_svg',
107 clearer => 'wipe_svg',
108 );
1dd07bda 109
110has 'cached_table' => (
111 is => 'rw',
112 isa => 'HashRef',
113 predicate => 'has_cached_table',
114 clearer => 'wipe_table',
115 );
c1915ab9 116
117has '_graphcalc_done' => (
118 is => 'rw',
119 isa => 'Bool',
120 default => undef,
121 );
1f563ac3 122
4e5a7b2c 123=head1 NAME
124
125Text::Tradition::Collation - a software model for a text collation
126
127=head1 SYNOPSIS
128
129 use Text::Tradition;
130 my $t = Text::Tradition->new(
131 'name' => 'this is a text',
132 'input' => 'TEI',
133 'file' => '/path/to/tei_parallel_seg_file.xml' );
134
135 my $c = $t->collation;
136 my @readings = $c->readings;
137 my @paths = $c->paths;
138 my @relationships = $c->relationships;
139
140 my $svg_variant_graph = $t->collation->as_svg();
141
142=head1 DESCRIPTION
143
144Text::Tradition is a library for representation and analysis of collated
145texts, particularly medieval ones. The Collation is the central feature of
146a Tradition, where the text, its sequence of readings, and its relationships
147between readings are actually kept.
148
149=head1 CONSTRUCTOR
150
151=head2 new
152
153The constructor. Takes a hash or hashref of the following arguments:
154
155=over
156
157=item * tradition - The Text::Tradition object to which the collation
158belongs. Required.
159
160=item * linear - Whether the collation should be linear; that is, whether
161transposed readings should be treated as two linked readings rather than one,
162and therefore whether the collation graph is acyclic. Defaults to true.
163
4e5a7b2c 164=item * baselabel - The default label for the path taken by a base text
165(if any). Defaults to 'base text'.
166
167=item * wit_list_separator - The string to join a list of witnesses for
168purposes of making labels in display graphs. Defaults to ', '.
169
170=item * ac_label - The extra label to tack onto a witness sigil when
171representing another layer of path for the given witness - that is, when
172a text has more than one possible reading due to scribal corrections or
173the like. Defaults to ' (a.c.)'.
174
4e483aa5 175=item * wordsep - The string used to separate words in the original text.
176Defaults to ' '.
177
4e5a7b2c 178=back
179
180=head1 ACCESSORS
181
182=head2 tradition
183
184=head2 linear
185
4e5a7b2c 186=head2 wit_list_separator
187
188=head2 baselabel
189
190=head2 ac_label
191
4e483aa5 192=head2 wordsep
193
4e5a7b2c 194Simple accessors for collation attributes.
195
196=head2 start
197
198The meta-reading at the start of every witness path.
199
200=head2 end
201
202The meta-reading at the end of every witness path.
203
204=head2 readings
205
206Returns all Reading objects in the graph.
207
208=head2 reading( $id )
209
210Returns the Reading object corresponding to the given ID.
211
212=head2 add_reading( $reading_args )
213
214Adds a new reading object to the collation.
215See L<Text::Tradition::Collation::Reading> for the available arguments.
216
217=head2 del_reading( $object_or_id )
218
219Removes the given reading from the collation, implicitly removing its
220paths and relationships.
221
4e483aa5 222=head2 merge_readings( $main, $second, $concatenate, $with_str )
223
224Merges the $second reading into the $main one. If $concatenate is true, then
225the merged node will carry the text of both readings, concatenated with either
226$with_str (if specified) or a sensible default (the empty string if the
227appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 228
4e483aa5 229The first two arguments may be either readings or reading IDs.
4e5a7b2c 230
231=head2 has_reading( $id )
232
233Predicate to see whether a given reading ID is in the graph.
234
235=head2 reading_witnesses( $object_or_id )
236
237Returns a list of sigils whose witnesses contain the reading.
238
239=head2 paths
240
241Returns all reading paths within the document - that is, all edges in the
242collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
243
244=head2 add_path( $source, $target, $sigil )
245
246Links the given readings in the collation in sequence, under the given witness
247sigil. The readings may be specified by object or ID.
248
249=head2 del_path( $source, $target, $sigil )
250
251Links the given readings in the collation in sequence, under the given witness
252sigil. The readings may be specified by object or ID.
253
254=head2 has_path( $source, $target );
255
256Returns true if the two readings are linked in sequence in any witness.
257The readings may be specified by object or ID.
258
259=head2 relationships
260
261Returns all Relationship objects in the collation.
262
263=head2 add_relationship( $reading, $other_reading, $options )
264
265Adds a new relationship of the type given in $options between the two readings,
266which may be specified by object or ID. Returns a value of ( $status, @vectors)
267where $status is true on success, and @vectors is a list of relationship edges
268that were ultimately added.
269See L<Text::Tradition::Collation::Relationship> for the available options.
270
271=cut
dd3b58b0 272
d047cd52 273sub BUILD {
3a2ebbf4 274 my $self = shift;
22222af9 275 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 276 $self->_set_start( $self->add_reading(
277 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
278 $self->_set_end( $self->add_reading(
279 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 280}
784877d9 281
3a2ebbf4 282### Reading construct/destruct functions
283
284sub add_reading {
285 my( $self, $reading ) = @_;
286 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
287 my %args = %$reading;
e4b73942 288 if( $args{'init'} ) {
289 # If we are initializing an empty collation, don't assume that we
290 # have set a tradition.
291 delete $args{'init'};
292 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
6ad2ce78 293 $args{'language'} = $self->tradition->language;
294 }
3a2ebbf4 295 $reading = Text::Tradition::Collation::Reading->new(
296 'collation' => $self,
297 %args );
298 }
299 # First check to see if a reading with this ID exists.
300 if( $self->reading( $reading->id ) ) {
63778331 301 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 302 }
c1915ab9 303 $self->_graphcalc_done(0);
3a2ebbf4 304 $self->_add_reading( $reading->id => $reading );
305 # Once the reading has been added, put it in both graphs.
306 $self->sequence->add_vertex( $reading->id );
cecbe56d 307 $self->relations->add_reading( $reading->id );
3a2ebbf4 308 return $reading;
eca16057 309};
310
3a2ebbf4 311around del_reading => sub {
312 my $orig = shift;
313 my $self = shift;
314 my $arg = shift;
315
cecbe56d 316 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
317 $arg = $arg->id;
3a2ebbf4 318 }
3a2ebbf4 319 # Remove the reading from the graphs.
c1915ab9 320 $self->_graphcalc_done(0);
4e483aa5 321 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 322 $self->sequence->delete_vertex( $arg );
323 $self->relations->delete_reading( $arg );
3a2ebbf4 324
325 # Carry on.
cecbe56d 326 $self->$orig( $arg );
3a2ebbf4 327};
7854e12e 328
4e483aa5 329=begin testing
330
331use Text::Tradition;
332
333my $cxfile = 't/data/Collatex-16.xml';
334my $t = Text::Tradition->new(
335 'name' => 'inline',
336 'input' => 'CollateX',
337 'file' => $cxfile,
338 );
339my $c = $t->collation;
340
341my $rno = scalar $c->readings;
342# Split n21 for testing purposes
343my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
344my $old_r = $c->reading( 'n21' );
345$old_r->alter_text( 'to' );
346$c->del_path( 'n20', 'n21', 'A' );
347$c->add_path( 'n20', 'n21p0', 'A' );
348$c->add_path( 'n21p0', 'n21', 'A' );
349$c->flatten_ranks();
350ok( $c->reading( 'n21p0' ), "New reading exists" );
351is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
352
679f17e1 353# Combine n3 and n4 ( with his )
4e483aa5 354$c->merge_readings( 'n3', 'n4', 1 );
355ok( !$c->reading('n4'), "Reading n4 is gone" );
356is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
357
679f17e1 358# Collapse n9 and n10 ( rood / root )
359$c->merge_readings( 'n9', 'n10' );
360ok( !$c->reading('n10'), "Reading n10 is gone" );
361is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 362
363# Combine n21 and n21p0
364my $remaining = $c->reading('n21');
365$remaining ||= $c->reading('n22'); # one of these should still exist
366$c->merge_readings( 'n21p0', $remaining, 1 );
367ok( !$c->reading('n21'), "Reading $remaining is gone" );
368is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
369
370=end testing
371
372=cut
7854e12e 373
3a2ebbf4 374sub merge_readings {
375 my $self = shift;
376
56772e8c 377 # Sanity check
378 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
379 my $mergemeta = $kept_obj->is_meta;
380 throw( "Cannot merge meta and non-meta reading" )
381 unless ( $mergemeta && $del_obj->is_meta )
382 || ( !$mergemeta && !$del_obj->is_meta );
383 if( $mergemeta ) {
384 throw( "Cannot merge with start or end node" )
385 if( $kept_obj eq $self->start || $kept_obj eq $self->end
386 || $del_obj eq $self->start || $del_obj eq $self->end );
387 }
3a2ebbf4 388 # We only need the IDs for adding paths to the graph, not the reading
389 # objects themselves.
56772e8c 390 my $kept = $kept_obj->id;
391 my $deleted = $del_obj->id;
c1915ab9 392 $self->_graphcalc_done(0);
10943ab0 393
3a2ebbf4 394 # The kept reading should inherit the paths and the relationships
395 # of the deleted reading.
396 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
397 my @vector = ( $kept );
398 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
399 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 400 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 401 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
402 $self->sequence->add_edge( @vector );
403 my $fwits = $self->sequence->get_edge_attributes( @vector );
404 @wits{keys %$fwits} = values %$fwits;
405 $self->sequence->set_edge_attributes( @vector, \%wits );
406 }
cecbe56d 407 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 408
409 # Do the deletion deed.
4e483aa5 410 if( $combine ) {
869a1ada 411 # Combine the text of the readings
4e483aa5 412 my $joinstr = $combine_char;
413 unless( defined $joinstr ) {
414 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
415 $joinstr = $self->wordsep unless defined $joinstr;
416 }
417 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
339786dd 418 # Change this reading to a joining one if necessary
419 $kept_obj->_set_join_next( $del_obj->join_next );
869a1ada 420 $kept_obj->normal_form(
421 join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
422 # Combine the lexemes present in the readings
423 if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
424 $kept_obj->add_lexeme( $del_obj->lexemes );
425 }
49d4f2ac 426 }
3a2ebbf4 427 $self->del_reading( $deleted );
428}
7854e12e 429
6771a1b1 430=head2 compress_readings
431
432Where possible in the graph, compresses plain sequences of readings into a
433single reading. The sequences must consist of readings with no
434relationships to other readings, with only a single witness path between
435them and no other witness paths from either that would skip the other. The
436readings must also not be marked as nonsense or bad grammar.
437
438WARNING: This operation cannot be undone.
439
440=cut
441
442sub compress_readings {
443 my $self = shift;
444 # Anywhere in the graph that there is a reading that joins only to a single
445 # successor, and neither of these have any relationships, just join the two
446 # readings.
447 my %gobbled;
448 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 449 # While we are here, get rid of any extra wordforms from a disambiguated
450 # reading.
451 if( $rdg->disambiguated ) {
452 foreach my $lex ( $rdg->lexemes ) {
453 $lex->clear_matching_forms();
454 $lex->add_matching_form( $lex->form );
455 }
456 }
457 # Now look for readings that can be joined to their successors.
6771a1b1 458 next if $rdg->is_meta;
459 next if $gobbled{$rdg->id};
460 next if $rdg->grammar_invalid || $rdg->is_nonsense;
461 next if $rdg->related_readings();
462 my %seen;
463 while( $self->sequence->successors( $rdg ) == 1 ) {
464 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
465 throw( "Infinite loop" ) if $seen{$next->id};
466 $seen{$next->id} = 1;
467 last if $self->sequence->predecessors( $next ) > 1;
468 last if $next->is_meta;
469 last if $next->grammar_invalid || $next->is_nonsense;
470 last if $next->related_readings();
471 say "Joining readings $rdg and $next";
472 $self->merge_readings( $rdg, $next, 1 );
473 }
474 }
475 # Make sure we haven't screwed anything up
476 foreach my $wit ( $self->tradition->witnesses ) {
477 my $pathtext = $self->path_text( $wit->sigil );
478 my $origtext = join( ' ', @{$wit->text} );
479 throw( "Text differs for witness " . $wit->sigil )
480 unless $pathtext eq $origtext;
481 if( $wit->is_layered ) {
482 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
483 $origtext = join( ' ', @{$wit->layertext} );
484 throw( "Ante-corr text differs for witness " . $wit->sigil )
485 unless $pathtext eq $origtext;
486 }
487 }
488
489 $self->relations->rebuild_equivalence();
490 $self->calculate_ranks();
491}
3265b0ce 492
3a2ebbf4 493# Helper function for manipulating the graph.
494sub _stringify_args {
4e483aa5 495 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 496 $first = $first->id
497 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
498 $second = $second->id
499 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 500 return( $first, $second, @args );
3a2ebbf4 501}
df6d9812 502
4e5a7b2c 503# Helper function for manipulating the graph.
504sub _objectify_args {
505 my( $self, $first, $second, $arg ) = @_;
506 $first = $self->reading( $first )
507 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
508 $second = $self->reading( $second )
509 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
510 return( $first, $second, $arg );
511}
3a2ebbf4 512### Path logic
513
514sub add_path {
515 my $self = shift;
516
517 # We only need the IDs for adding paths to the graph, not the reading
518 # objects themselves.
cecbe56d 519 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 520
c1915ab9 521 $self->_graphcalc_done(0);
3a2ebbf4 522 # Connect the readings
359944f7 523 unless( $self->sequence->has_edge( $source, $target ) ) {
524 $self->sequence->add_edge( $source, $target );
525 $self->relations->add_equivalence_edge( $source, $target );
526 }
3a2ebbf4 527 # Note the witness in question
528 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 529}
b15511bf 530
3a2ebbf4 531sub del_path {
532 my $self = shift;
49d4f2ac 533 my @args;
534 if( ref( $_[0] ) eq 'ARRAY' ) {
535 my $e = shift @_;
536 @args = ( @$e, @_ );
537 } else {
538 @args = @_;
539 }
3a2ebbf4 540
541 # We only need the IDs for adding paths to the graph, not the reading
542 # objects themselves.
49d4f2ac 543 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 544
c1915ab9 545 $self->_graphcalc_done(0);
3a2ebbf4 546 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 547 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 548 }
549 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
550 $self->sequence->delete_edge( $source, $target );
359944f7 551 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 552 }
784877d9 553}
554
3a2ebbf4 555
15d2d3df 556# Extra graph-alike utility
557sub has_path {
3a2ebbf4 558 my $self = shift;
559 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
560 return undef unless $self->sequence->has_edge( $source, $target );
561 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 562}
563
4e5a7b2c 564=head2 clear_witness( @sigil_list )
3a2ebbf4 565
4e5a7b2c 566Clear the given witnesses out of the collation entirely, removing references
567to them in paths, and removing readings that belong only to them. Should only
568be called via $tradition->del_witness.
3a2ebbf4 569
570=cut
571
4e5a7b2c 572sub clear_witness {
573 my( $self, @sigils ) = @_;
574
c1915ab9 575 $self->_graphcalc_done(0);
4e5a7b2c 576 # Clear the witness(es) out of the paths
577 foreach my $e ( $self->paths ) {
578 foreach my $sig ( @sigils ) {
579 $self->del_path( $e, $sig );
580 }
581 }
582
583 # Clear out the newly unused readings
584 foreach my $r ( $self->readings ) {
585 unless( $self->reading_witnesses( $r ) ) {
586 $self->del_reading( $r );
587 }
588 }
589}
3a2ebbf4 590
591sub add_relationship {
592 my $self = shift;
22222af9 593 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 594 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 595 $self->_graphcalc_done(0);
63778331 596 return @vectors;
22222af9 597}
ef9d481f 598
ca6e6095 599around qw/ get_relationship del_relationship / => sub {
600 my $orig = shift;
601 my $self = shift;
602 my @args = @_;
603 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
604 @args = @{$_[0]};
605 }
606 my( $source, $target ) = $self->_stringify_args( @args );
607 $self->$orig( $source, $target );
608};
609
22222af9 610=head2 reading_witnesses( $reading )
910a0a6d 611
22222af9 612Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 613
22222af9 614=cut
1d310495 615
1d310495 616sub reading_witnesses {
617 my( $self, $reading ) = @_;
618 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 619 # arbitrarily chosen "incoming". Thus, special-case the start node.
620 if( $reading eq $self->start ) {
621 return map { $_->sigil } $self->tradition->witnesses;
622 }
1d310495 623 my %all_witnesses;
624 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
625 my $wits = $self->sequence->get_edge_attributes( @$e );
626 @all_witnesses{ keys %$wits } = 1;
627 }
c12bb878 628 my $acstr = $self->ac_label;
629 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
630 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
631 }
1d310495 632 return keys %all_witnesses;
910a0a6d 633}
634
4e5a7b2c 635=head1 OUTPUT METHODS
8e1394aa 636
0ecb975c 637=head2 as_svg( \%options )
8e1394aa 638
0068967c 639Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 640See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 641
642=cut
643
644sub as_svg {
0ecb975c 645 my( $self, $opts ) = @_;
bfcbcecb 646 throw( "Need GraphViz installed to output SVG" )
647 unless File::Which::which( 'dot' );
e247aad1 648 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 649 $self->calculate_ranks()
650 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
e247aad1 651 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 652 my @cmd = qw/dot -Tsvg/;
653 my( $svg, $err );
654 my $dotfile = File::Temp->new();
655 ## USE FOR DEBUGGING
656 # $dotfile->unlink_on_destroy(0);
657 binmode $dotfile, ':utf8';
658 print $dotfile $self->as_dot( $opts );
659 push( @cmd, $dotfile->filename );
660 run( \@cmd, ">", binary(), \$svg );
e247aad1 661 $svg = decode_utf8( $svg );
662 $self->cached_svg( $svg ) unless $want_subgraph;
663 return $svg;
664 } else {
665 return $self->cached_svg;
b365fbae 666 }
8e1394aa 667}
668
b22576c6 669
0ecb975c 670=head2 as_dot( \%options )
b22576c6 671
0ecb975c 672Returns a string that is the collation graph expressed in dot
673(i.e. GraphViz) format. Options include:
b22576c6 674
0ecb975c 675=over 4
b22576c6 676
0ecb975c 677=item * from
b22576c6 678
0ecb975c 679=item * to
df6d9812 680
0ecb975c 681=item * color_common
682
683=back
df6d9812 684
685=cut
686
687sub as_dot {
0ecb975c 688 my( $self, $opts ) = @_;
689 my $startrank = $opts->{'from'} if $opts;
690 my $endrank = $opts->{'to'} if $opts;
691 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 692 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
693 && $self->end->rank > 100;
6648ee3d 694 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 695
b22576c6 696 # Check the arguments
697 if( $startrank ) {
698 return if $endrank && $startrank > $endrank;
699 return if $startrank > $self->end->rank;
700 }
701 if( defined $endrank ) {
702 return if $endrank < 0;
f1b3b33a 703 $endrank = undef if $endrank == $self->end->rank;
b22576c6 704 }
705
67da8d6c 706 my $graph_name = $self->tradition->name;
707 $graph_name =~ s/[^\w\s]//g;
708 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 709
710 my %graph_attrs = (
711 'rankdir' => 'LR',
712 'bgcolor' => 'none',
713 );
714 my %node_attrs = (
b8990398 715 'fontsize' => 14,
f13b5582 716 'fillcolor' => 'white',
717 'style' => 'filled',
718 'shape' => 'ellipse'
719 );
720 my %edge_attrs = (
721 'arrowhead' => 'open',
722 'color' => '#000000',
723 'fontcolor' => '#000000',
724 );
725
67da8d6c 726 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 727 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
728 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 729
b22576c6 730 # Output substitute start/end readings if necessary
731 if( $startrank ) {
43c94341 732 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 733 }
734 if( $endrank ) {
43c94341 735 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 736 }
b365fbae 737 if( $STRAIGHTENHACK ) {
738 ## HACK part 1
43c94341 739 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
740 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 741 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
742 }
b22576c6 743 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 744 # Sort the readings by rank if we have ranks; this speeds layout.
745 my @all_readings = $self->end->has_rank
746 ? sort { $a->rank <=> $b->rank } $self->readings
747 : $self->readings;
4633f9e4 748 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 749 foreach my $reading ( @all_readings ) {
b22576c6 750 # Only output readings within our rank range.
751 next if $startrank && $reading->rank < $startrank;
752 next if $endrank && $reading->rank > $endrank;
753 $used{$reading->id} = 1;
910a0a6d 754 # Need not output nodes without separate labels
3a2ebbf4 755 next if $reading->id eq $reading->text;
d4b75f44 756 my $rattrs;
30f0df34 757 my $label = $reading->text;
629e27b0 758 $label .= '-' if $reading->join_next;
759 $label = "-$label" if $reading->join_prior;
8f9cab7b 760 $label =~ s/\"/\\\"/g;
d4b75f44 761 $rattrs->{'label'} = $label;
10e4b1ac 762 $rattrs->{'id'} = $reading->id;
0ecb975c 763 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 764 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 765 }
3a2ebbf4 766
30ddc24c 767 # Add the real edges. Need to weight one edge per rank jump, in a
768 # continuous line.
b365fbae 769 # my $weighted = $self->_add_edge_weights;
b22576c6 770 my @edges = $self->paths;
3bdec618 771 my( %substart, %subend );
b22576c6 772 foreach my $edge ( @edges ) {
773 # Do we need to output this edge?
508fd430 774 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 775 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 776 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 777
b22576c6 778 # Account for the rank gap if necessary
30ddc24c 779 my $rank0 = $self->reading( $edge->[0] )->rank
780 if $self->reading( $edge->[0] )->has_rank;
781 my $rank1 = $self->reading( $edge->[1] )->rank
782 if $self->reading( $edge->[1] )->has_rank;
783 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
784 $variables->{'minlen'} = $rank1 - $rank0;
785 }
786
787 # Add the calculated edge weights
b365fbae 788 # if( exists $weighted->{$edge->[0]}
e247aad1 789 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
790 # # $variables->{'color'} = 'red';
791 # $variables->{'weight'} = 3.0;
792 # }
30ddc24c 793
508fd430 794 # EXPERIMENTAL: make edge width reflect no. of witnesses
795 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
796 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
797
f13b5582 798 my $varopts = _dot_attr_string( $variables );
799 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
800 $edge->[0], $edge->[1], $varopts );
3bdec618 801 } elsif( $used{$edge->[0]} ) {
96ba0418 802 $subend{$edge->[0]} = $edge->[1];
3bdec618 803 } elsif( $used{$edge->[1]} ) {
96ba0418 804 $substart{$edge->[1]} = $edge->[0];
b22576c6 805 }
df6d9812 806 }
3bdec618 807 # Add substitute start and end edges if necessary
808 foreach my $node ( keys %substart ) {
96ba0418 809 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 810 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 811 my $nrdg = $self->reading( $node );
812 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
813 # Substart is actually one lower than $startrank
814 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
815 }
f13b5582 816 my $varopts = _dot_attr_string( $variables );
96ba0418 817 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 818 }
819 foreach my $node ( keys %subend ) {
96ba0418 820 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 821 my $variables = { %edge_attrs, 'label' => $witstr };
822 my $varopts = _dot_attr_string( $variables );
96ba0418 823 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 824 }
b365fbae 825 # HACK part 2
826 if( $STRAIGHTENHACK ) {
43c94341 827 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
828 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 829 }
30ddc24c 830
df6d9812 831 $dot .= "}\n";
832 return $dot;
833}
834
f13b5582 835sub _dot_attr_string {
836 my( $hash ) = @_;
837 my @attrs;
838 foreach my $k ( sort keys %$hash ) {
839 my $v = $hash->{$k};
840 push( @attrs, $k.'="'.$v.'"' );
841 }
842 return( '[ ' . join( ', ', @attrs ) . ' ]' );
843}
844
30ddc24c 845sub _add_edge_weights {
846 my $self = shift;
847 # Walk the graph from START to END, choosing the successor node with
848 # the largest number of witness paths each time.
849 my $weighted = {};
850 my $curr = $self->start->id;
008fc8a6 851 my $ranked = $self->end->has_rank;
30ddc24c 852 while( $curr ne $self->end->id ) {
008fc8a6 853 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 854 my @succ = sort { $self->path_witnesses( $curr, $a )
855 <=> $self->path_witnesses( $curr, $b ) }
856 $self->sequence->successors( $curr );
857 my $next = pop @succ;
008fc8a6 858 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 859 # Try to avoid lacunae in the weighted path.
008fc8a6 860 while( @succ &&
861 ( $self->reading( $next )->is_lacuna ||
862 $nextrank - $rank > 1 ) ){
30ddc24c 863 $next = pop @succ;
864 }
865 $weighted->{$curr} = $next;
866 $curr = $next;
867 }
868 return $weighted;
869}
870
027d819c 871=head2 path_witnesses( $edge )
872
873Returns the list of sigils whose witnesses are associated with the given edge.
874The edge can be passed as either an array or an arrayref of ( $source, $target ).
875
876=cut
877
3a2ebbf4 878sub path_witnesses {
879 my( $self, @edge ) = @_;
880 # If edge is an arrayref, cope.
881 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
882 my $e = shift @edge;
883 @edge = @$e;
884 }
885 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 886 return @wits;
3a2ebbf4 887}
888
7f9f05e8 889# Helper function. Make a display label for the given witnesses, showing a.c.
890# witnesses only where the main witness is not also in the list.
027d819c 891sub _path_display_label {
508fd430 892 my $self = shift;
7f9f05e8 893 my %wits;
894 map { $wits{$_} = 1 } @_;
895
896 # If an a.c. wit is listed, remove it if the main wit is also listed.
897 # Otherwise keep it for explicit listing.
898 my $aclabel = $self->ac_label;
899 my @disp_ac;
900 foreach my $w ( sort keys %wits ) {
901 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
902 if( exists $wits{$1} ) {
903 delete $wits{$w};
904 } else {
905 push( @disp_ac, $w );
906 }
907 }
908 }
909
910 # See if we are in a majority situation.
8f9cab7b 911 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 912 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 913 if( scalar keys %wits > $maj ) {
914 unshift( @disp_ac, 'majority' );
915 return join( ', ', @disp_ac );
8f9cab7b 916 } else {
7f9f05e8 917 return join( ', ', sort keys %wits );
8f9cab7b 918 }
919}
1dd07bda 920
bf6e338d 921=head2 readings_at_rank( $rank )
1dd07bda 922
bf6e338d 923Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 924
925=cut
926
bf6e338d 927sub readings_at_rank {
1dd07bda 928 my( $self, $rank ) = @_;
bf6e338d 929 my $table = $self->alignment_table;
930 # Table rank is real rank - 1.
931 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
932 my %readings;
933 foreach my $e ( @elements ) {
934 next unless ref( $e ) eq 'HASH';
935 next unless exists $e->{'t'};
936 $readings{$e->{'t'}->id} = $e->{'t'};
937 }
938 return values %readings;
1dd07bda 939}
8f9cab7b 940
4e5a7b2c 941=head2 as_graphml
8e1394aa 942
4e5a7b2c 943Returns a GraphML representation of the collation. The GraphML will contain
944two graphs. The first expresses the attributes of the readings and the witness
945paths that link them; the second expresses the relationships that link the
946readings. This is the native transfer format for a tradition.
8e1394aa 947
56eefa04 948=begin testing
949
950use Text::Tradition;
951
952my $READINGS = 311;
953my $PATHS = 361;
954
955my $datafile = 't/data/florilegium_tei_ps.xml';
956my $tradition = Text::Tradition->new( 'input' => 'TEI',
957 'name' => 'test0',
958 'file' => $datafile,
959 'linear' => 1 );
960
961ok( $tradition, "Got a tradition object" );
962is( scalar $tradition->witnesses, 13, "Found all witnesses" );
963ok( $tradition->collation, "Tradition has a collation" );
964
965my $c = $tradition->collation;
966is( scalar $c->readings, $READINGS, "Collation has all readings" );
967is( scalar $c->paths, $PATHS, "Collation has all paths" );
968is( scalar $c->relationships, 0, "Collation has all relationships" );
969
970# Add a few relationships
971$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
972$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
973$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
974
975# Now write it to GraphML and parse it again.
976
977my $graphml = $c->as_graphml;
978my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
979is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
980is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
981is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
982
9fef629b 983# Now add a stemma, write to GraphML, and look at the output.
2a812726 984my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
985is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
986is( $tradition->stemmata, 1, "Tradition now has the stemma" );
987$graphml = $c->as_graphml;
988like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
989
9fef629b 990# Now add a user, write to GraphML, and look at the output.
991unlike( $graphml, qr/testuser/, "Test user name does not exist in GraphML yet" );
992my $testuser = Text::Tradition::User->new(
993 id => 'testuser', password => 'testpass' );
994is( ref( $testuser ), 'Text::Tradition::User', "Created test user object" );
995$testuser->add_tradition( $tradition );
996is( $tradition->user->id, $testuser->id, "Tradition assigned to test user" );
997$graphml = $c->as_graphml;
998like( $graphml, qr/testuser/, "Test user name now exists in GraphML" );
999
56eefa04 1000=end testing
1001
8e1394aa 1002=cut
1003
1004sub as_graphml {
a30ca502 1005 my( $self, $options ) = @_;
3d14b48e 1006 $self->calculate_ranks unless $self->_graphcalc_done;
1007
a30ca502 1008 my $start = $options->{'from'}
1009 ? $self->reading( $options->{'from'} ) : $self->start;
1010 my $end = $options->{'to'}
1011 ? $self->reading( $options->{'to'} ) : $self->end;
1012 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1013 throw( 'Start node must be before end node' );
1014 }
1015 # The readings need to be ranked for this to work.
1016 $start = $self->start unless $start->has_rank;
1017 $end = $self->end unless $end->has_rank;
414cc046 1018 my $rankoffset = 0;
1019 unless( $start eq $self->start ) {
1020 $rankoffset = $start->rank - 1;
1021 }
a30ca502 1022 my %use_readings;
1023
8e1394aa 1024 # Some namespaces
1025 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1026 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1027 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 1028 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 1029
1030 # Create the document and root node
428bcf0b 1031 require XML::LibXML;
8e1394aa 1032 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1033 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1034 $graphml->setDocumentElement( $root );
1035 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1036 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 1037
1038 # List of attribute types to save on our objects and their corresponding
1039 # GraphML types
1040 my %save_types = (
1041 'Str' => 'string',
1042 'Int' => 'int',
1043 'Bool' => 'boolean',
10e4b1ac 1044 'ReadingID' => 'string',
bbd064a9 1045 'RelationshipType' => 'string',
1046 'RelationshipScope' => 'string',
1047 );
1048
1049 # List of attribute names *not* to save on our objects.
1050 # We will also not save any attribute beginning with _.
1051 my %skipsave;
1052 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 1053
bbd064a9 1054 # Add the data keys for the graph. Include an extra key 'version' for the
1055 # GraphML output version.
e309421a 1056 my %graph_data_keys;
1057 my $gdi = 0;
bbd064a9 1058 my %graph_attributes = ( 'version' => 'string' );
1059 # Graph attributes include those of Tradition and those of Collation.
1060 my %gattr_from;
1061 my $tmeta = $self->tradition->meta;
1062 my $cmeta = $self->meta;
1063 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1064 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1065 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1066 next if $attr->name =~ /^_/;
1067 next if $skipsave{$attr->name};
1068 next unless $save_types{$attr->type_constraint->name};
1069 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1070 }
9fef629b 1071 # Extra custom keys for complex objects that should be saved in some form.
1072 # The subroutine should return a string, or undef/empty.
1073 $graph_attributes{'stemmata'} = sub {
1074 my @stemstrs;
1075 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1076 $self->tradition->stemmata;
1077 join( "\n", @stemstrs );
1078 };
1079 $graph_attributes{'user'} = sub {
1080 $self->tradition->user ? $self->tradition->user->id : undef
1081 };
bbd064a9 1082
1083 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1084 $graph_data_keys{$datum} = 'dg'.$gdi++;
1085 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1086 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1087 : $graph_attributes{$datum};
e309421a 1088 $key->setAttribute( 'attr.name', $datum );
9fef629b 1089 $key->setAttribute( 'attr.type', $dtype );
e309421a 1090 $key->setAttribute( 'for', 'graph' );
1091 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1092 }
f6066bac 1093
bbd064a9 1094 # Add the data keys for reading nodes
1095 my %reading_attributes;
1096 my $rmeta = Text::Tradition::Collation::Reading->meta;
1097 foreach my $attr( $rmeta->get_all_attributes ) {
1098 next if $attr->name =~ /^_/;
1099 next if $skipsave{$attr->name};
1100 next unless $save_types{$attr->type_constraint->name};
1101 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1102 }
7cd9f181 1103 # Extra custom key for the reading morphology
1104 $reading_attributes{'lexemes'} = 'string';
1105
ef9d481f 1106 my %node_data_keys;
1107 my $ndi = 0;
bbd064a9 1108 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1109 $node_data_keys{$datum} = 'dn'.$ndi++;
1110 my $key = $root->addNewChild( $graphml_ns, 'key' );
1111 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1112 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1113 $key->setAttribute( 'for', 'node' );
1114 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1115 }
1116
bbd064a9 1117 # Add the data keys for edges, that is, paths and relationships. Path
1118 # data does not come from a Moose class so is here manually.
ef9d481f 1119 my $edi = 0;
1120 my %edge_data_keys;
bbd064a9 1121 my %edge_attributes = (
3a2ebbf4 1122 witness => 'string', # ID/label for a path
3a2ebbf4 1123 extra => 'boolean', # Path key
3a2ebbf4 1124 );
bbd064a9 1125 my @path_attributes = keys %edge_attributes; # track our manual additions
1126 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1127 foreach my $attr( $pmeta->get_all_attributes ) {
1128 next if $attr->name =~ /^_/;
1129 next if $skipsave{$attr->name};
1130 next unless $save_types{$attr->type_constraint->name};
1131 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1132 }
1133 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1134 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1135 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1136 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1137 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1138 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1139 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1140 }
3a2ebbf4 1141
cc31ebaa 1142 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1143 my $xmlidname = $self->tradition->name;
1144 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1145 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1146 $xmlidname = '_'.$xmlidname;
1147 }
2c669bca 1148 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1149 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1150 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1151 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1152 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1153 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1154 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1155 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1156
2a812726 1157 # Tradition/collation attribute data
bbd064a9 1158 foreach my $datum ( keys %graph_attributes ) {
1159 my $value;
1160 if( $datum eq 'version' ) {
2a812726 1161 $value = '3.2';
9fef629b 1162 } elsif( ref( $graph_attributes{$datum} ) ) {
1163 my $sub = $graph_attributes{$datum};
1164 $value = &$sub();
bbd064a9 1165 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1166 $value = $self->tradition->$datum;
1167 } else {
1168 $value = $self->$datum;
1169 }
2c669bca 1170 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1171 }
8e1394aa 1172
1173 my $node_ctr = 0;
1174 my %node_hash;
22222af9 1175 # Add our readings to the graph
3a2ebbf4 1176 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1177 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1178 ( $n->rank < $start->rank || $n->rank > $end->rank );
1179 $use_readings{$n->id} = 1;
2c669bca 1180 # Add to the main graph
1181 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1182 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1183 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1184 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1185 foreach my $d ( keys %reading_attributes ) {
255875b8 1186 my $nval = $n->$d;
7cd9f181 1187 # Custom serialization
1188 if( $d eq 'lexemes' ) {
1189 # If nval is a true value, we have lexemes so we need to
1190 # serialize them. Otherwise set nval to undef so that the
1191 # key is excluded from this reading.
1192 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1193 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1194 $nval = undef;
7cd9f181 1195 }
cc31ebaa 1196 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1197 # Adjust the ranks within the subgraph.
cc31ebaa 1198 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1199 : $nval - $rankoffset;
414cc046 1200 }
255875b8 1201 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1202 if defined $nval;
1203 }
b15511bf 1204 }
1205
2c669bca 1206 # Add the path edges to the sequence graph
df6d9812 1207 my $edge_ctr = 0;
3a2ebbf4 1208 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1209 # We add an edge in the graphml for every witness in $e.
a30ca502 1210 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1211 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1212 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1213 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1214 # Skip any path from start to end; that witness is not in the subgraph.
1215 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1216 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1217 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1218 $node_hash{ $e->[0] },
1219 $node_hash{ $e->[1] } );
2c669bca 1220 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1221 $edge_el->setAttribute( 'source', $from );
1222 $edge_el->setAttribute( 'target', $to );
1223 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1224
1225 # It's a witness path, so add the witness
1226 my $base = $wit;
1227 my $key = $edge_data_keys{'witness'};
1228 # Is this an ante-corr witness?
1229 my $aclabel = $self->ac_label;
1230 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1231 # Keep the base witness
1232 $base = $1;
1233 # ...and record that this is an 'extra' reading path
1234 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1235 }
1236 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1237 }
1238 }
1239
cc31ebaa 1240 # Report the actual number of nodes and edges that went in
1241 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1242 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1243
22222af9 1244 # Add the relationship graph to the XML
bbd064a9 1245 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1246 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1247 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1248
94c00c71 1249 # Save and return the thing
1250 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1251 return $result;
df6d9812 1252}
1253
b15511bf 1254sub _add_graphml_data {
1255 my( $el, $key, $value ) = @_;
b15511bf 1256 return unless defined $value;
c9bf3dbf 1257 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1258 $data_el->setAttribute( 'key', $key );
1259 $data_el->appendText( $value );
8e1394aa 1260}
1261
4e5a7b2c 1262=head2 as_csv
910a0a6d 1263
1264Returns a CSV alignment table representation of the collation graph, one
2c669bca 1265row per witness (or witness uncorrected.)
910a0a6d 1266
1267=cut
1268
1269sub as_csv {
3a2ebbf4 1270 my( $self ) = @_;
1dd07bda 1271 my $table = $self->alignment_table;
82fa4d57 1272 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1273 my @result;
2c669bca 1274 # Make the header row
1275 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1276 push( @result, decode_utf8( $csv->string ) );
1277 # Make the rest of the rows
1278 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1279 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1280 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1281 $csv->combine( @row );
910a0a6d 1282 push( @result, decode_utf8( $csv->string ) );
1283 }
3a2ebbf4 1284 return join( "\n", @result );
910a0a6d 1285}
1286
1dd07bda 1287=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1288
566f4595 1289Return a reference to an alignment table, in a slightly enhanced CollateX
1290format which looks like this:
1291
1292 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1293 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1294 { witness => "SIG2",
4e5a7b2c 1295 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1296 ... ],
1297 length => TEXTLEN };
1298
1299If $use_refs is set to 1, the reading object is returned in the table
1300instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1301
1302If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1303keys have a true hash value will be included.
2c669bca 1304
1305=cut
9f3ba6f7 1306
1dd07bda 1307sub alignment_table {
1308 my( $self ) = @_;
c1915ab9 1309 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1310 return $self->cached_table if $self->has_cached_table;
1311
0ecb975c 1312 # Make sure we can do this
1313 throw( "Need a linear graph in order to make an alignment table" )
1314 unless $self->linear;
1315 $self->calculate_ranks unless $self->end->has_rank;
1316
2c669bca 1317 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1318 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1319 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1320 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1321 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1322 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1323 push( @{$table->{'alignment'}},
1324 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1325 if( $wit->is_layered ) {
1326 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1327 $wit->sigil.$self->ac_label );
1dd07bda 1328 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1329 push( @{$table->{'alignment'}},
1330 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1331 }
1332 }
1dd07bda 1333 $self->cached_table( $table );
1334 return $table;
910a0a6d 1335}
1336
1337sub _make_witness_row {
1dd07bda 1338 my( $path, $positions ) = @_;
910a0a6d 1339 my %char_hash;
1340 map { $char_hash{$_} = undef } @$positions;
2c669bca 1341 my $debug = 0;
910a0a6d 1342 foreach my $rdg ( @$path ) {
eca16057 1343 my $rtext = $rdg->text;
1344 $rtext = '#LACUNA#' if $rdg->is_lacuna;
6771a1b1 1345 say STDERR "rank " . $rdg->rank if $debug;
1346 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1347 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1348 }
1349 my @row = map { $char_hash{$_} } @$positions;
eca16057 1350 # Fill in lacuna markers for undef spots in the row
1351 my $last_el = shift @row;
1352 my @filled_row = ( $last_el );
1353 foreach my $el ( @row ) {
0e476982 1354 # If we are using node reference, make the lacuna node appear many times
1355 # in the table. If not, use the lacuna tag.
1dd07bda 1356 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1357 $el = $last_el;
eca16057 1358 }
1359 push( @filled_row, $el );
1360 $last_el = $el;
1361 }
1362 return @filled_row;
910a0a6d 1363}
1364
4e5a7b2c 1365=head1 NAVIGATION METHODS
910a0a6d 1366
4e5a7b2c 1367=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1368
1369Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1370with $last, for the witness given in $sigil. If a $backup sigil is
1371specified (e.g. when walking a layered witness), it will be used wherever
1372no $sigil path exists. If there is a base text reading, that will be
1373used wherever no path exists for $sigil or $backup.
e2902068 1374
1375=cut
1376
910a0a6d 1377# TODO Think about returning some lazy-eval iterator.
b0b4421a 1378# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1379
e2902068 1380sub reading_sequence {
861c3e27 1381 my( $self, $start, $end, $witness ) = @_;
e2902068 1382
930ff666 1383 $witness = $self->baselabel unless $witness;
e2902068 1384 my @readings = ( $start );
1385 my %seen;
1386 my $n = $start;
3a2ebbf4 1387 while( $n && $n->id ne $end->id ) {
1388 if( exists( $seen{$n->id} ) ) {
63778331 1389 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1390 }
3a2ebbf4 1391 $seen{$n->id} = 1;
910a0a6d 1392
861c3e27 1393 my $next = $self->next_reading( $n, $witness );
44771cf2 1394 unless( $next ) {
63778331 1395 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1396 }
910a0a6d 1397 push( @readings, $next );
1398 $n = $next;
e2902068 1399 }
1400 # Check that the last reading is our end reading.
1401 my $last = $readings[$#readings];
63778331 1402 throw( "Last reading found from " . $start->text .
1403 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1404 unless $last->id eq $end->id;
e2902068 1405
1406 return @readings;
1407}
1408
4e5a7b2c 1409=head2 next_reading( $reading, $sigil );
8e1394aa 1410
4a8828f0 1411Returns the reading that follows the given reading along the given witness
930ff666 1412path.
8e1394aa 1413
1414=cut
1415
4a8828f0 1416sub next_reading {
e2902068 1417 # Return the successor via the corresponding path.
8e1394aa 1418 my $self = shift;
3a2ebbf4 1419 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1420 return undef unless $answer;
3a2ebbf4 1421 return $self->reading( $answer );
8e1394aa 1422}
1423
4e5a7b2c 1424=head2 prior_reading( $reading, $sigil )
8e1394aa 1425
4a8828f0 1426Returns the reading that precedes the given reading along the given witness
930ff666 1427path.
8e1394aa 1428
1429=cut
1430
4a8828f0 1431sub prior_reading {
e2902068 1432 # Return the predecessor via the corresponding path.
8e1394aa 1433 my $self = shift;
3a2ebbf4 1434 my $answer = $self->_find_linked_reading( 'prior', @_ );
1435 return $self->reading( $answer );
8e1394aa 1436}
1437
4a8828f0 1438sub _find_linked_reading {
861c3e27 1439 my( $self, $direction, $node, $path ) = @_;
1440
1441 # Get a backup if we are dealing with a layered witness
1442 my $alt_path;
1443 my $aclabel = $self->ac_label;
1444 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1445 $alt_path = $1;
1446 }
1447
e2902068 1448 my @linked_paths = $direction eq 'next'
3a2ebbf4 1449 ? $self->sequence->edges_from( $node )
1450 : $self->sequence->edges_to( $node );
e2902068 1451 return undef unless scalar( @linked_paths );
8e1394aa 1452
e2902068 1453 # We have to find the linked path that contains all of the
1454 # witnesses supplied in $path.
1455 my( @path_wits, @alt_path_wits );
4e5a7b2c 1456 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1457 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1458 my $base_le;
1459 my $alt_le;
1460 foreach my $le ( @linked_paths ) {
3a2ebbf4 1461 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1462 $base_le = $le;
910a0a6d 1463 }
508fd430 1464 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1465 if( _is_within( \@path_wits, \@le_wits ) ) {
1466 # This is the right path.
1467 return $direction eq 'next' ? $le->[1] : $le->[0];
1468 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1469 $alt_le = $le;
1470 }
8e1394aa 1471 }
e2902068 1472 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1473 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1474 if $alt_le;
e2902068 1475
1476 # Got this far? Return the base path if it exists.
3a2ebbf4 1477 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1478 if $base_le;
e2902068 1479
1480 # Got this far? We have no appropriate path.
2c669bca 1481 warn "Could not find $direction node from " . $node->id
910a0a6d 1482 . " along path $path";
8e1394aa 1483 return undef;
1484}
1485
4a8828f0 1486# Some set logic.
1487sub _is_within {
1488 my( $set1, $set2 ) = @_;
7854e12e 1489 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1490 foreach my $el ( @$set1 ) {
910a0a6d 1491 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1492 }
1493 return $ret;
1494}
1495
4e5a7b2c 1496# Return the string that joins together a list of witnesses for
1497# display on a single path.
1498sub _witnesses_of_label {
1499 my( $self, $label ) = @_;
1500 my $regex = $self->wit_list_separator;
1501 my @answer = split( /\Q$regex\E/, $label );
1502 return @answer;
b0b4421a 1503}
1504
d4b75f44 1505=head2 common_readings
1506
1507Returns the list of common readings in the graph (i.e. those readings that are
1508shared by all non-lacunose witnesses.)
1509
1510=cut
1511
1512sub common_readings {
1513 my $self = shift;
1514 my @common = grep { $_->is_common } $self->readings;
1515 return @common;
1516}
1517
fae52efd 1518=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1519
1520Returns the text of a witness (plus its backup, if we are using a layer)
1521as stored in the collation. The text is returned as a string, where the
1522individual readings are joined with spaces and the meta-readings (e.g.
1523lacunae) are omitted. Optional specification of $start and $end allows
1524the generation of a subset of the witness text.
4e5a7b2c 1525
b0b4421a 1526=cut
1527
1528sub path_text {
861c3e27 1529 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1530 $start = $self->start unless $start;
1531 $end = $self->end unless $end;
861c3e27 1532 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1533 my $pathtext = '';
1534 my $last;
1535 foreach my $r ( @path ) {
6ad2ce78 1536 unless ( $r->join_prior || !$last || $last->join_next ) {
1537 $pathtext .= ' ';
1538 }
1539 $pathtext .= $r->text;
629e27b0 1540 $last = $r;
1541 }
1542 return $pathtext;
b0b4421a 1543}
4e5a7b2c 1544
1545=head1 INITIALIZATION METHODS
1546
1547These are mostly for use by parsers.
1548
1549=head2 make_witness_path( $witness )
1550
1551Link the array of readings contained in $witness->path (and in
1552$witness->uncorrected_path if it exists) into collation paths.
1553Clear out the arrays when finished.
de51424a 1554
4e5a7b2c 1555=head2 make_witness_paths
1556
1557Call make_witness_path for all witnesses in the tradition.
1558
1559=cut
930ff666 1560
7e450e44 1561# For use when a collation is constructed from a base text and an apparatus.
1562# We have the sequences of readings and just need to add path edges.
1f7aa795 1563# When we are done, clear out the witness path attributes, as they are no
1564# longer needed.
1565# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1566
6a222840 1567sub make_witness_paths {
1568 my( $self ) = @_;
910a0a6d 1569 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 1570 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 1571 $self->make_witness_path( $wit );
7854e12e 1572 }
7854e12e 1573}
1574
6a222840 1575sub make_witness_path {
7854e12e 1576 my( $self, $wit ) = @_;
1577 my @chain = @{$wit->path};
15d2d3df 1578 my $sig = $wit->sigil;
fae52efd 1579 # Add start and end if necessary
1580 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1581 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1582 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1583 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1584 }
1f7aa795 1585 if( $wit->is_layered ) {
d9e873d0 1586 @chain = @{$wit->uncorrected_path};
fae52efd 1587 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1588 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1589 foreach my $idx( 0 .. $#chain-1 ) {
1590 my $source = $chain[$idx];
1591 my $target = $chain[$idx+1];
1592 $self->add_path( $source, $target, $sig.$self->ac_label )
1593 unless $self->has_path( $source, $target, $sig );
1594 }
15d2d3df 1595 }
1f7aa795 1596 $wit->clear_path;
1597 $wit->clear_uncorrected_path;
e2902068 1598}
1599
4e5a7b2c 1600=head2 calculate_ranks
1601
1602Calculate the reading ranks (that is, their aligned positions relative
1603to each other) for the graph. This can only be called on linear collations.
1604
b365fbae 1605=begin testing
1606
1607use Text::Tradition;
1608
1609my $cxfile = 't/data/Collatex-16.xml';
1610my $t = Text::Tradition->new(
1611 'name' => 'inline',
1612 'input' => 'CollateX',
1613 'file' => $cxfile,
1614 );
1615my $c = $t->collation;
1616
1617# Make an svg
bfcbcecb 1618my $table = $c->alignment_table;
1619ok( $c->has_cached_table, "Alignment table was cached" );
1620is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1621$c->calculate_ranks;
bfcbcecb 1622is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1623$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1624isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1625
1626=end testing
1627
4e5a7b2c 1628=cut
1629
910a0a6d 1630sub calculate_ranks {
1631 my $self = shift;
b365fbae 1632 # Save the existing ranks, in case we need to invalidate the cached SVG.
1633 my %existing_ranks;
ac4d7ac2 1634 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1635
1636 # Do the rankings based on the relationship equivalence graph, starting
1637 # with the start node.
56772e8c 1638 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1639
910a0a6d 1640 # Transfer our rankings from the topological graph to the real one.
1641 foreach my $r ( $self->readings ) {
cecbe56d 1642 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1643 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1644 } else {
63778331 1645 # Die. Find the last rank we calculated.
359944f7 1646 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1647 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1648 $self->readings;
1649 my $last = pop @all_defined;
1650 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1651 }
de51424a 1652 }
bfcbcecb 1653 # Do we need to invalidate the cached data?
1654 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1655 foreach my $r ( $self->readings ) {
7c293912 1656 next if defined( $existing_ranks{$r} )
1657 && $existing_ranks{$r} == $r->rank;
c1915ab9 1658 # Something has changed, so clear the cache
bfcbcecb 1659 $self->_clear_cache;
c1915ab9 1660 # ...and recalculate the common readings.
1661 $self->calculate_common_readings();
b365fbae 1662 last;
1663 }
1664 }
c1915ab9 1665 # The graph calculation information is now up to date.
1666 $self->_graphcalc_done(1);
8e1394aa 1667}
3a1f2523 1668
c1915ab9 1669sub _clear_cache {
1670 my $self = shift;
1671 $self->wipe_svg if $self->has_cached_svg;
1672 $self->wipe_table if $self->has_cached_table;
1673}
1674
1675
4e5a7b2c 1676=head2 flatten_ranks
1677
1678A convenience method for parsing collation data. Searches the graph for readings
1679with the same text at the same rank, and merges any that are found.
1680
1681=cut
1682
0e476982 1683sub flatten_ranks {
1684 my $self = shift;
1685 my %unique_rank_rdg;
bf6e338d 1686 my $changed;
0e476982 1687 foreach my $rdg ( $self->readings ) {
1688 next unless $rdg->has_rank;
1689 my $key = $rdg->rank . "||" . $rdg->text;
1690 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1691 # Make sure they don't have different grammatical forms
1692 my $ur = $unique_rank_rdg{$key};
1693 if( $rdg->disambiguated && $ur->disambiguated ) {
1694 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1695 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1696 next unless $rform eq $uform;
1697 } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1698 next;
1699 }
0e476982 1700 # Combine!
6771a1b1 1701 #say STDERR "Combining readings at same rank: $key";
bf6e338d 1702 $changed = 1;
0e476982 1703 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1704 # TODO see if this now makes a common point.
0e476982 1705 } else {
1706 $unique_rank_rdg{$key} = $rdg;
1707 }
1708 }
bf6e338d 1709 # If we merged readings, the ranks are still fine but the alignment
1710 # table is wrong. Wipe it.
1711 $self->wipe_table() if $changed;
0e476982 1712}
4633f9e4 1713
1714
d4b75f44 1715=head2 calculate_common_readings
1716
1717Goes through the graph identifying the readings that appear in every witness
1718(apart from those with lacunae at that spot.) Marks them as common and returns
1719the list.
1720
1721=begin testing
1722
1723use Text::Tradition;
1724
1725my $cxfile = 't/data/Collatex-16.xml';
1726my $t = Text::Tradition->new(
1727 'name' => 'inline',
1728 'input' => 'CollateX',
1729 'file' => $cxfile,
1730 );
1731my $c = $t->collation;
1732
1733my @common = $c->calculate_common_readings();
1734is( scalar @common, 8, "Found correct number of common readings" );
1735my @marked = sort $c->common_readings();
1736is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1737my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1738is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1739
1740=end testing
1741
1742=cut
1743
1744sub calculate_common_readings {
1745 my $self = shift;
1746 my @common;
c1915ab9 1747 map { $_->is_common( 0 ) } $self->readings;
1748 # Implicitly calls calculate_ranks
1dd07bda 1749 my $table = $self->alignment_table;
d4b75f44 1750 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1751 my @row = map { $_->{'tokens'}->[$idx]
1752 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1753 @{$table->{'alignment'}};
d4b75f44 1754 my %hash;
1755 foreach my $r ( @row ) {
1756 if( $r ) {
1757 $hash{$r->id} = $r unless $r->is_meta;
1758 } else {
1759 $hash{'UNDEF'} = $r;
1760 }
1761 }
1762 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1763 my( $r ) = values %hash;
1764 $r->is_common( 1 );
1765 push( @common, $r );
1766 }
1767 }
1768 return @common;
1769}
1770
861c3e27 1771=head2 text_from_paths
1772
1773Calculate the text array for all witnesses from the path, for later consistency
1774checking. Only to be used if there is no non-graph-based way to know the
1775original texts.
1776
1777=cut
1778
1779sub text_from_paths {
1780 my $self = shift;
1781 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1782 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1783 my @text;
1784 foreach my $r ( @readings ) {
1785 next if $r->is_meta;
1786 push( @text, $r->text );
1787 }
861c3e27 1788 $wit->text( \@text );
1789 if( $wit->is_layered ) {
5164a6f0 1790 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1791 $wit->sigil.$self->ac_label );
1792 my @uctext;
1793 foreach my $r ( @ucrdgs ) {
1794 next if $r->is_meta;
1795 push( @uctext, $r->text );
1796 }
1797 $wit->layertext( \@uctext );
861c3e27 1798 }
1799 }
1800}
0e476982 1801
4e5a7b2c 1802=head1 UTILITY FUNCTIONS
1803
1804=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1805
4e5a7b2c 1806Find the last reading that occurs in sequence before both the given readings.
414cc046 1807At the very least this should be $self->start.
4e5a7b2c 1808
1809=head2 common_successor( $reading_a, $reading_b )
1810
1811Find the first reading that occurs in sequence after both the given readings.
414cc046 1812At the very least this should be $self->end.
4e5a7b2c 1813
22222af9 1814=begin testing
1815
1816use Text::Tradition;
1817
1818my $cxfile = 't/data/Collatex-16.xml';
1819my $t = Text::Tradition->new(
1820 'name' => 'inline',
1821 'input' => 'CollateX',
1822 'file' => $cxfile,
1823 );
1824my $c = $t->collation;
1825
679f17e1 1826is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1827 'n20', "Found correct common predecessor" );
679f17e1 1828is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1829 '__END__', "Found correct common successor" );
22222af9 1830
4e5a7b2c 1831is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1832 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1833is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1834 '__END__', "Found correct common successor for readings on same path" );
22222af9 1835
1836=end testing
1837
1838=cut
1839
1840## Return the closest reading that is a predecessor of both the given readings.
1841sub common_predecessor {
1842 my $self = shift;
4e5a7b2c 1843 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1844 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1845}
1846
1847sub common_successor {
1848 my $self = shift;
4e5a7b2c 1849 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1850 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1851}
1852
414cc046 1853
1854# TODO think about how to do this without ranks...
027d819c 1855sub _common_in_path {
22222af9 1856 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1857 my $iter = $self->end->rank;
22222af9 1858 my @candidates;
414cc046 1859 my @last_r1 = ( $r1 );
1860 my @last_r2 = ( $r2 );
1861 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1862 my %all_seen;
6771a1b1 1863 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 1864 while( !@candidates ) {
414cc046 1865 last unless $iter--; # Avoid looping infinitely
1866 # Iterate separately down the graph from r1 and r2
1867 my( @new_lc1, @new_lc2 );
1868 foreach my $lc ( @last_r1 ) {
1869 foreach my $p ( $lc->$dir ) {
1870 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 1871 # say STDERR "Path candidate $p from $lc";
414cc046 1872 push( @candidates, $p );
002e3600 1873 } elsif( !$all_seen{$p->id} ) {
414cc046 1874 $all_seen{$p->id} = 'r1';
1875 push( @new_lc1, $p );
1876 }
1877 }
1878 }
1879 foreach my $lc ( @last_r2 ) {
22222af9 1880 foreach my $p ( $lc->$dir ) {
414cc046 1881 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 1882 # say STDERR "Path candidate $p from $lc";
22222af9 1883 push( @candidates, $p );
002e3600 1884 } elsif( !$all_seen{$p->id} ) {
414cc046 1885 $all_seen{$p->id} = 'r2';
1886 push( @new_lc2, $p );
22222af9 1887 }
1888 }
1889 }
414cc046 1890 @last_r1 = @new_lc1;
1891 @last_r2 = @new_lc2;
22222af9 1892 }
1893 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1894 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1895}
1896
63778331 1897sub throw {
1898 Text::Tradition::Error->throw(
1899 'ident' => 'Collation error',
1900 'message' => $_[0],
1901 );
1902}
1903
dd3b58b0 1904no Moose;
1905__PACKAGE__->meta->make_immutable;
e867486f 1906
027d819c 1907=head1 LICENSE
e867486f 1908
027d819c 1909This package is free software and is provided "as is" without express
1910or implied warranty. You can redistribute it and/or modify it under
1911the same terms as Perl itself.
e867486f 1912
027d819c 1913=head1 AUTHOR
e867486f 1914
027d819c 1915Tara L Andrews E<lt>aurum@cpan.orgE<gt>