migrate lexemes and normal forms when readings are combined; script to merge readings...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 277 $self->_set_start( $self->add_reading(
278 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
279 $self->_set_end( $self->add_reading(
280 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 281}
784877d9 282
3a2ebbf4 283### Reading construct/destruct functions
284
285sub add_reading {
286 my( $self, $reading ) = @_;
287 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
288 my %args = %$reading;
e4b73942 289 if( $args{'init'} ) {
290 # If we are initializing an empty collation, don't assume that we
291 # have set a tradition.
292 delete $args{'init'};
293 } elsif( $self->tradition->has_language && !exists $args{'language'} ) {
6ad2ce78 294 $args{'language'} = $self->tradition->language;
295 }
3a2ebbf4 296 $reading = Text::Tradition::Collation::Reading->new(
297 'collation' => $self,
298 %args );
299 }
300 # First check to see if a reading with this ID exists.
301 if( $self->reading( $reading->id ) ) {
63778331 302 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 303 }
c1915ab9 304 $self->_graphcalc_done(0);
3a2ebbf4 305 $self->_add_reading( $reading->id => $reading );
306 # Once the reading has been added, put it in both graphs.
307 $self->sequence->add_vertex( $reading->id );
cecbe56d 308 $self->relations->add_reading( $reading->id );
3a2ebbf4 309 return $reading;
eca16057 310};
311
3a2ebbf4 312around del_reading => sub {
313 my $orig = shift;
314 my $self = shift;
315 my $arg = shift;
316
cecbe56d 317 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
318 $arg = $arg->id;
3a2ebbf4 319 }
3a2ebbf4 320 # Remove the reading from the graphs.
c1915ab9 321 $self->_graphcalc_done(0);
4e483aa5 322 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 323 $self->sequence->delete_vertex( $arg );
324 $self->relations->delete_reading( $arg );
3a2ebbf4 325
326 # Carry on.
cecbe56d 327 $self->$orig( $arg );
3a2ebbf4 328};
7854e12e 329
4e483aa5 330=begin testing
331
332use Text::Tradition;
333
334my $cxfile = 't/data/Collatex-16.xml';
335my $t = Text::Tradition->new(
336 'name' => 'inline',
337 'input' => 'CollateX',
338 'file' => $cxfile,
339 );
340my $c = $t->collation;
341
342my $rno = scalar $c->readings;
343# Split n21 for testing purposes
344my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
345my $old_r = $c->reading( 'n21' );
346$old_r->alter_text( 'to' );
347$c->del_path( 'n20', 'n21', 'A' );
348$c->add_path( 'n20', 'n21p0', 'A' );
349$c->add_path( 'n21p0', 'n21', 'A' );
350$c->flatten_ranks();
351ok( $c->reading( 'n21p0' ), "New reading exists" );
352is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
353
679f17e1 354# Combine n3 and n4 ( with his )
4e483aa5 355$c->merge_readings( 'n3', 'n4', 1 );
356ok( !$c->reading('n4'), "Reading n4 is gone" );
357is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
358
679f17e1 359# Collapse n9 and n10 ( rood / root )
360$c->merge_readings( 'n9', 'n10' );
361ok( !$c->reading('n10'), "Reading n10 is gone" );
362is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 363
364# Combine n21 and n21p0
365my $remaining = $c->reading('n21');
366$remaining ||= $c->reading('n22'); # one of these should still exist
367$c->merge_readings( 'n21p0', $remaining, 1 );
368ok( !$c->reading('n21'), "Reading $remaining is gone" );
369is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
370
371=end testing
372
373=cut
7854e12e 374
3a2ebbf4 375sub merge_readings {
376 my $self = shift;
377
56772e8c 378 # Sanity check
379 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
380 my $mergemeta = $kept_obj->is_meta;
381 throw( "Cannot merge meta and non-meta reading" )
382 unless ( $mergemeta && $del_obj->is_meta )
383 || ( !$mergemeta && !$del_obj->is_meta );
384 if( $mergemeta ) {
385 throw( "Cannot merge with start or end node" )
386 if( $kept_obj eq $self->start || $kept_obj eq $self->end
387 || $del_obj eq $self->start || $del_obj eq $self->end );
388 }
3a2ebbf4 389 # We only need the IDs for adding paths to the graph, not the reading
390 # objects themselves.
56772e8c 391 my $kept = $kept_obj->id;
392 my $deleted = $del_obj->id;
c1915ab9 393 $self->_graphcalc_done(0);
10943ab0 394
3a2ebbf4 395 # The kept reading should inherit the paths and the relationships
396 # of the deleted reading.
397 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
398 my @vector = ( $kept );
399 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
400 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 401 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 402 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
403 $self->sequence->add_edge( @vector );
404 my $fwits = $self->sequence->get_edge_attributes( @vector );
405 @wits{keys %$fwits} = values %$fwits;
406 $self->sequence->set_edge_attributes( @vector, \%wits );
407 }
cecbe56d 408 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 409
410 # Do the deletion deed.
4e483aa5 411 if( $combine ) {
869a1ada 412 # Combine the text of the readings
4e483aa5 413 my $joinstr = $combine_char;
414 unless( defined $joinstr ) {
415 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
416 $joinstr = $self->wordsep unless defined $joinstr;
417 }
418 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
869a1ada 419 $kept_obj->normal_form(
420 join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
421 # Combine the lexemes present in the readings
422 if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
423 $kept_obj->add_lexeme( $del_obj->lexemes );
424 }
49d4f2ac 425 }
3a2ebbf4 426 $self->del_reading( $deleted );
427}
7854e12e 428
3265b0ce 429
3a2ebbf4 430# Helper function for manipulating the graph.
431sub _stringify_args {
4e483aa5 432 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 433 $first = $first->id
434 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
435 $second = $second->id
436 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 437 return( $first, $second, @args );
3a2ebbf4 438}
df6d9812 439
4e5a7b2c 440# Helper function for manipulating the graph.
441sub _objectify_args {
442 my( $self, $first, $second, $arg ) = @_;
443 $first = $self->reading( $first )
444 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
445 $second = $self->reading( $second )
446 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
447 return( $first, $second, $arg );
448}
3a2ebbf4 449### Path logic
450
451sub add_path {
452 my $self = shift;
453
454 # We only need the IDs for adding paths to the graph, not the reading
455 # objects themselves.
cecbe56d 456 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 457
c1915ab9 458 $self->_graphcalc_done(0);
3a2ebbf4 459 # Connect the readings
359944f7 460 unless( $self->sequence->has_edge( $source, $target ) ) {
461 $self->sequence->add_edge( $source, $target );
462 $self->relations->add_equivalence_edge( $source, $target );
463 }
3a2ebbf4 464 # Note the witness in question
465 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 466}
b15511bf 467
3a2ebbf4 468sub del_path {
469 my $self = shift;
49d4f2ac 470 my @args;
471 if( ref( $_[0] ) eq 'ARRAY' ) {
472 my $e = shift @_;
473 @args = ( @$e, @_ );
474 } else {
475 @args = @_;
476 }
3a2ebbf4 477
478 # We only need the IDs for adding paths to the graph, not the reading
479 # objects themselves.
49d4f2ac 480 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 481
c1915ab9 482 $self->_graphcalc_done(0);
3a2ebbf4 483 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 484 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 485 }
486 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
487 $self->sequence->delete_edge( $source, $target );
359944f7 488 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 489 }
784877d9 490}
491
3a2ebbf4 492
15d2d3df 493# Extra graph-alike utility
494sub has_path {
3a2ebbf4 495 my $self = shift;
496 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
497 return undef unless $self->sequence->has_edge( $source, $target );
498 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 499}
500
4e5a7b2c 501=head2 clear_witness( @sigil_list )
3a2ebbf4 502
4e5a7b2c 503Clear the given witnesses out of the collation entirely, removing references
504to them in paths, and removing readings that belong only to them. Should only
505be called via $tradition->del_witness.
3a2ebbf4 506
507=cut
508
4e5a7b2c 509sub clear_witness {
510 my( $self, @sigils ) = @_;
511
c1915ab9 512 $self->_graphcalc_done(0);
4e5a7b2c 513 # Clear the witness(es) out of the paths
514 foreach my $e ( $self->paths ) {
515 foreach my $sig ( @sigils ) {
516 $self->del_path( $e, $sig );
517 }
518 }
519
520 # Clear out the newly unused readings
521 foreach my $r ( $self->readings ) {
522 unless( $self->reading_witnesses( $r ) ) {
523 $self->del_reading( $r );
524 }
525 }
526}
3a2ebbf4 527
528sub add_relationship {
529 my $self = shift;
22222af9 530 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 531 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 532 $self->_graphcalc_done(0);
63778331 533 return @vectors;
22222af9 534}
ef9d481f 535
ca6e6095 536around qw/ get_relationship del_relationship / => sub {
537 my $orig = shift;
538 my $self = shift;
539 my @args = @_;
540 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
541 @args = @{$_[0]};
542 }
543 my( $source, $target ) = $self->_stringify_args( @args );
544 $self->$orig( $source, $target );
545};
546
22222af9 547=head2 reading_witnesses( $reading )
910a0a6d 548
22222af9 549Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 550
22222af9 551=cut
1d310495 552
1d310495 553sub reading_witnesses {
554 my( $self, $reading ) = @_;
555 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 556 # arbitrarily chosen "incoming". Thus, special-case the start node.
557 if( $reading eq $self->start ) {
558 return map { $_->sigil } $self->tradition->witnesses;
559 }
1d310495 560 my %all_witnesses;
561 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
562 my $wits = $self->sequence->get_edge_attributes( @$e );
563 @all_witnesses{ keys %$wits } = 1;
564 }
c12bb878 565 my $acstr = $self->ac_label;
566 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
567 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
568 }
1d310495 569 return keys %all_witnesses;
910a0a6d 570}
571
4e5a7b2c 572=head1 OUTPUT METHODS
8e1394aa 573
0ecb975c 574=head2 as_svg( \%options )
8e1394aa 575
0068967c 576Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 577See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 578
579=cut
580
581sub as_svg {
0ecb975c 582 my( $self, $opts ) = @_;
bfcbcecb 583 throw( "Need GraphViz installed to output SVG" )
584 unless File::Which::which( 'dot' );
e247aad1 585 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 586 $self->calculate_ranks()
587 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
e247aad1 588 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 589 my @cmd = qw/dot -Tsvg/;
590 my( $svg, $err );
591 my $dotfile = File::Temp->new();
592 ## USE FOR DEBUGGING
593 # $dotfile->unlink_on_destroy(0);
594 binmode $dotfile, ':utf8';
595 print $dotfile $self->as_dot( $opts );
596 push( @cmd, $dotfile->filename );
597 run( \@cmd, ">", binary(), \$svg );
e247aad1 598 $svg = decode_utf8( $svg );
599 $self->cached_svg( $svg ) unless $want_subgraph;
600 return $svg;
601 } else {
602 return $self->cached_svg;
b365fbae 603 }
8e1394aa 604}
605
b22576c6 606
0ecb975c 607=head2 as_dot( \%options )
b22576c6 608
0ecb975c 609Returns a string that is the collation graph expressed in dot
610(i.e. GraphViz) format. Options include:
b22576c6 611
0ecb975c 612=over 4
b22576c6 613
0ecb975c 614=item * from
b22576c6 615
0ecb975c 616=item * to
df6d9812 617
0ecb975c 618=item * color_common
619
620=back
df6d9812 621
622=cut
623
624sub as_dot {
0ecb975c 625 my( $self, $opts ) = @_;
626 my $startrank = $opts->{'from'} if $opts;
627 my $endrank = $opts->{'to'} if $opts;
628 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 629 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
630 && $self->end->rank > 100;
6648ee3d 631 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 632
b22576c6 633 # Check the arguments
634 if( $startrank ) {
635 return if $endrank && $startrank > $endrank;
636 return if $startrank > $self->end->rank;
637 }
638 if( defined $endrank ) {
639 return if $endrank < 0;
f1b3b33a 640 $endrank = undef if $endrank == $self->end->rank;
b22576c6 641 }
642
67da8d6c 643 my $graph_name = $self->tradition->name;
644 $graph_name =~ s/[^\w\s]//g;
645 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 646
647 my %graph_attrs = (
648 'rankdir' => 'LR',
649 'bgcolor' => 'none',
650 );
651 my %node_attrs = (
b8990398 652 'fontsize' => 14,
f13b5582 653 'fillcolor' => 'white',
654 'style' => 'filled',
655 'shape' => 'ellipse'
656 );
657 my %edge_attrs = (
658 'arrowhead' => 'open',
659 'color' => '#000000',
660 'fontcolor' => '#000000',
661 );
662
67da8d6c 663 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 664 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
665 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 666
b22576c6 667 # Output substitute start/end readings if necessary
668 if( $startrank ) {
43c94341 669 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 670 }
671 if( $endrank ) {
43c94341 672 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 673 }
b365fbae 674 if( $STRAIGHTENHACK ) {
675 ## HACK part 1
43c94341 676 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
677 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 678 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
679 }
b22576c6 680 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 681 # Sort the readings by rank if we have ranks; this speeds layout.
682 my @all_readings = $self->end->has_rank
683 ? sort { $a->rank <=> $b->rank } $self->readings
684 : $self->readings;
4633f9e4 685 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 686 foreach my $reading ( @all_readings ) {
b22576c6 687 # Only output readings within our rank range.
688 next if $startrank && $reading->rank < $startrank;
689 next if $endrank && $reading->rank > $endrank;
690 $used{$reading->id} = 1;
910a0a6d 691 # Need not output nodes without separate labels
3a2ebbf4 692 next if $reading->id eq $reading->text;
d4b75f44 693 my $rattrs;
30f0df34 694 my $label = $reading->text;
629e27b0 695 $label .= '-' if $reading->join_next;
696 $label = "-$label" if $reading->join_prior;
8f9cab7b 697 $label =~ s/\"/\\\"/g;
d4b75f44 698 $rattrs->{'label'} = $label;
10e4b1ac 699 $rattrs->{'id'} = $reading->id;
0ecb975c 700 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 701 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 702 }
3a2ebbf4 703
30ddc24c 704 # Add the real edges. Need to weight one edge per rank jump, in a
705 # continuous line.
b365fbae 706 # my $weighted = $self->_add_edge_weights;
b22576c6 707 my @edges = $self->paths;
3bdec618 708 my( %substart, %subend );
b22576c6 709 foreach my $edge ( @edges ) {
710 # Do we need to output this edge?
508fd430 711 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 712 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 713 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 714
b22576c6 715 # Account for the rank gap if necessary
30ddc24c 716 my $rank0 = $self->reading( $edge->[0] )->rank
717 if $self->reading( $edge->[0] )->has_rank;
718 my $rank1 = $self->reading( $edge->[1] )->rank
719 if $self->reading( $edge->[1] )->has_rank;
720 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
721 $variables->{'minlen'} = $rank1 - $rank0;
722 }
723
724 # Add the calculated edge weights
b365fbae 725 # if( exists $weighted->{$edge->[0]}
e247aad1 726 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
727 # # $variables->{'color'} = 'red';
728 # $variables->{'weight'} = 3.0;
729 # }
30ddc24c 730
508fd430 731 # EXPERIMENTAL: make edge width reflect no. of witnesses
732 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
733 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
734
f13b5582 735 my $varopts = _dot_attr_string( $variables );
736 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
737 $edge->[0], $edge->[1], $varopts );
3bdec618 738 } elsif( $used{$edge->[0]} ) {
96ba0418 739 $subend{$edge->[0]} = $edge->[1];
3bdec618 740 } elsif( $used{$edge->[1]} ) {
96ba0418 741 $substart{$edge->[1]} = $edge->[0];
b22576c6 742 }
df6d9812 743 }
3bdec618 744 # Add substitute start and end edges if necessary
745 foreach my $node ( keys %substart ) {
96ba0418 746 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 747 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 748 my $nrdg = $self->reading( $node );
749 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
750 # Substart is actually one lower than $startrank
751 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
752 }
f13b5582 753 my $varopts = _dot_attr_string( $variables );
96ba0418 754 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 755 }
756 foreach my $node ( keys %subend ) {
96ba0418 757 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 758 my $variables = { %edge_attrs, 'label' => $witstr };
759 my $varopts = _dot_attr_string( $variables );
96ba0418 760 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 761 }
b365fbae 762 # HACK part 2
763 if( $STRAIGHTENHACK ) {
43c94341 764 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
765 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 766 }
30ddc24c 767
df6d9812 768 $dot .= "}\n";
769 return $dot;
770}
771
f13b5582 772sub _dot_attr_string {
773 my( $hash ) = @_;
774 my @attrs;
775 foreach my $k ( sort keys %$hash ) {
776 my $v = $hash->{$k};
777 push( @attrs, $k.'="'.$v.'"' );
778 }
779 return( '[ ' . join( ', ', @attrs ) . ' ]' );
780}
781
30ddc24c 782sub _add_edge_weights {
783 my $self = shift;
784 # Walk the graph from START to END, choosing the successor node with
785 # the largest number of witness paths each time.
786 my $weighted = {};
787 my $curr = $self->start->id;
008fc8a6 788 my $ranked = $self->end->has_rank;
30ddc24c 789 while( $curr ne $self->end->id ) {
008fc8a6 790 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 791 my @succ = sort { $self->path_witnesses( $curr, $a )
792 <=> $self->path_witnesses( $curr, $b ) }
793 $self->sequence->successors( $curr );
794 my $next = pop @succ;
008fc8a6 795 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 796 # Try to avoid lacunae in the weighted path.
008fc8a6 797 while( @succ &&
798 ( $self->reading( $next )->is_lacuna ||
799 $nextrank - $rank > 1 ) ){
30ddc24c 800 $next = pop @succ;
801 }
802 $weighted->{$curr} = $next;
803 $curr = $next;
804 }
805 return $weighted;
806}
807
027d819c 808=head2 path_witnesses( $edge )
809
810Returns the list of sigils whose witnesses are associated with the given edge.
811The edge can be passed as either an array or an arrayref of ( $source, $target ).
812
813=cut
814
3a2ebbf4 815sub path_witnesses {
816 my( $self, @edge ) = @_;
817 # If edge is an arrayref, cope.
818 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
819 my $e = shift @edge;
820 @edge = @$e;
821 }
822 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 823 return @wits;
3a2ebbf4 824}
825
7f9f05e8 826# Helper function. Make a display label for the given witnesses, showing a.c.
827# witnesses only where the main witness is not also in the list.
027d819c 828sub _path_display_label {
508fd430 829 my $self = shift;
7f9f05e8 830 my %wits;
831 map { $wits{$_} = 1 } @_;
832
833 # If an a.c. wit is listed, remove it if the main wit is also listed.
834 # Otherwise keep it for explicit listing.
835 my $aclabel = $self->ac_label;
836 my @disp_ac;
837 foreach my $w ( sort keys %wits ) {
838 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
839 if( exists $wits{$1} ) {
840 delete $wits{$w};
841 } else {
842 push( @disp_ac, $w );
843 }
844 }
845 }
846
847 # See if we are in a majority situation.
8f9cab7b 848 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 849 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 850 if( scalar keys %wits > $maj ) {
851 unshift( @disp_ac, 'majority' );
852 return join( ', ', @disp_ac );
8f9cab7b 853 } else {
7f9f05e8 854 return join( ', ', sort keys %wits );
8f9cab7b 855 }
856}
1dd07bda 857
bf6e338d 858=head2 readings_at_rank( $rank )
1dd07bda 859
bf6e338d 860Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 861
862=cut
863
bf6e338d 864sub readings_at_rank {
1dd07bda 865 my( $self, $rank ) = @_;
bf6e338d 866 my $table = $self->alignment_table;
867 # Table rank is real rank - 1.
868 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
869 my %readings;
870 foreach my $e ( @elements ) {
871 next unless ref( $e ) eq 'HASH';
872 next unless exists $e->{'t'};
873 $readings{$e->{'t'}->id} = $e->{'t'};
874 }
875 return values %readings;
1dd07bda 876}
8f9cab7b 877
4e5a7b2c 878=head2 as_graphml
8e1394aa 879
4e5a7b2c 880Returns a GraphML representation of the collation. The GraphML will contain
881two graphs. The first expresses the attributes of the readings and the witness
882paths that link them; the second expresses the relationships that link the
883readings. This is the native transfer format for a tradition.
8e1394aa 884
56eefa04 885=begin testing
886
887use Text::Tradition;
888
889my $READINGS = 311;
890my $PATHS = 361;
891
892my $datafile = 't/data/florilegium_tei_ps.xml';
893my $tradition = Text::Tradition->new( 'input' => 'TEI',
894 'name' => 'test0',
895 'file' => $datafile,
896 'linear' => 1 );
897
898ok( $tradition, "Got a tradition object" );
899is( scalar $tradition->witnesses, 13, "Found all witnesses" );
900ok( $tradition->collation, "Tradition has a collation" );
901
902my $c = $tradition->collation;
903is( scalar $c->readings, $READINGS, "Collation has all readings" );
904is( scalar $c->paths, $PATHS, "Collation has all paths" );
905is( scalar $c->relationships, 0, "Collation has all relationships" );
906
907# Add a few relationships
908$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
909$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
910$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
911
912# Now write it to GraphML and parse it again.
913
914my $graphml = $c->as_graphml;
915my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
916is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
917is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
918is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
919
2a812726 920# Now add a stemma, write to GraphML, and parse again.
921my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
922is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
923is( $tradition->stemmata, 1, "Tradition now has the stemma" );
924$graphml = $c->as_graphml;
925like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
926
56eefa04 927=end testing
928
8e1394aa 929=cut
930
931sub as_graphml {
a30ca502 932 my( $self, $options ) = @_;
3d14b48e 933 $self->calculate_ranks unless $self->_graphcalc_done;
934
a30ca502 935 my $start = $options->{'from'}
936 ? $self->reading( $options->{'from'} ) : $self->start;
937 my $end = $options->{'to'}
938 ? $self->reading( $options->{'to'} ) : $self->end;
939 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
940 throw( 'Start node must be before end node' );
941 }
942 # The readings need to be ranked for this to work.
943 $start = $self->start unless $start->has_rank;
944 $end = $self->end unless $end->has_rank;
414cc046 945 my $rankoffset = 0;
946 unless( $start eq $self->start ) {
947 $rankoffset = $start->rank - 1;
948 }
a30ca502 949 my %use_readings;
950
8e1394aa 951 # Some namespaces
952 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
953 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
954 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 955 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 956
957 # Create the document and root node
958 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
959 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
960 $graphml->setDocumentElement( $root );
961 $root->setNamespace( $xsi_ns, 'xsi', 0 );
962 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 963
964 # List of attribute types to save on our objects and their corresponding
965 # GraphML types
966 my %save_types = (
967 'Str' => 'string',
968 'Int' => 'int',
969 'Bool' => 'boolean',
10e4b1ac 970 'ReadingID' => 'string',
bbd064a9 971 'RelationshipType' => 'string',
972 'RelationshipScope' => 'string',
973 );
974
975 # List of attribute names *not* to save on our objects.
976 # We will also not save any attribute beginning with _.
977 my %skipsave;
978 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 979
bbd064a9 980 # Add the data keys for the graph. Include an extra key 'version' for the
981 # GraphML output version.
e309421a 982 my %graph_data_keys;
983 my $gdi = 0;
bbd064a9 984 my %graph_attributes = ( 'version' => 'string' );
985 # Graph attributes include those of Tradition and those of Collation.
986 my %gattr_from;
987 my $tmeta = $self->tradition->meta;
988 my $cmeta = $self->meta;
989 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
990 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
991 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
992 next if $attr->name =~ /^_/;
993 next if $skipsave{$attr->name};
994 next unless $save_types{$attr->type_constraint->name};
995 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
996 }
2a812726 997 # Extra custom key for the tradition stemma(ta)
998 $graph_attributes{'stemmata'} = 'string';
bbd064a9 999
1000 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1001 $graph_data_keys{$datum} = 'dg'.$gdi++;
1002 my $key = $root->addNewChild( $graphml_ns, 'key' );
1003 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1004 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 1005 $key->setAttribute( 'for', 'graph' );
1006 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1007 }
f6066bac 1008
bbd064a9 1009 # Add the data keys for reading nodes
1010 my %reading_attributes;
1011 my $rmeta = Text::Tradition::Collation::Reading->meta;
1012 foreach my $attr( $rmeta->get_all_attributes ) {
1013 next if $attr->name =~ /^_/;
1014 next if $skipsave{$attr->name};
1015 next unless $save_types{$attr->type_constraint->name};
1016 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1017 }
7cd9f181 1018 # Extra custom key for the reading morphology
1019 $reading_attributes{'lexemes'} = 'string';
1020
ef9d481f 1021 my %node_data_keys;
1022 my $ndi = 0;
bbd064a9 1023 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1024 $node_data_keys{$datum} = 'dn'.$ndi++;
1025 my $key = $root->addNewChild( $graphml_ns, 'key' );
1026 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1027 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1028 $key->setAttribute( 'for', 'node' );
1029 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1030 }
1031
bbd064a9 1032 # Add the data keys for edges, that is, paths and relationships. Path
1033 # data does not come from a Moose class so is here manually.
ef9d481f 1034 my $edi = 0;
1035 my %edge_data_keys;
bbd064a9 1036 my %edge_attributes = (
3a2ebbf4 1037 witness => 'string', # ID/label for a path
3a2ebbf4 1038 extra => 'boolean', # Path key
3a2ebbf4 1039 );
bbd064a9 1040 my @path_attributes = keys %edge_attributes; # track our manual additions
1041 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1042 foreach my $attr( $pmeta->get_all_attributes ) {
1043 next if $attr->name =~ /^_/;
1044 next if $skipsave{$attr->name};
1045 next unless $save_types{$attr->type_constraint->name};
1046 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1047 }
1048 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1049 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1050 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1051 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1052 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1053 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1054 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1055 }
3a2ebbf4 1056
cc31ebaa 1057 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1058 my $xmlidname = $self->tradition->name;
1059 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1060 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1061 $xmlidname = '_'.$xmlidname;
1062 }
2c669bca 1063 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1064 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1065 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1066 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1067 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1068 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1069 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1070 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1071
2a812726 1072 # Tradition/collation attribute data
bbd064a9 1073 foreach my $datum ( keys %graph_attributes ) {
1074 my $value;
1075 if( $datum eq 'version' ) {
2a812726 1076 $value = '3.2';
1077 } elsif( $datum eq 'stemmata' ) {
1078 my @stemstrs;
1079 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1080 $self->tradition->stemmata;
1081 $value = join( "\n", @stemstrs );
bbd064a9 1082 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1083 $value = $self->tradition->$datum;
1084 } else {
1085 $value = $self->$datum;
1086 }
2c669bca 1087 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1088 }
8e1394aa 1089
1090 my $node_ctr = 0;
1091 my %node_hash;
22222af9 1092 # Add our readings to the graph
3a2ebbf4 1093 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1094 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1095 ( $n->rank < $start->rank || $n->rank > $end->rank );
1096 $use_readings{$n->id} = 1;
2c669bca 1097 # Add to the main graph
1098 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1099 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1100 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1101 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1102 foreach my $d ( keys %reading_attributes ) {
255875b8 1103 my $nval = $n->$d;
7cd9f181 1104 # Custom serialization
1105 if( $d eq 'lexemes' ) {
1106 # If nval is a true value, we have lexemes so we need to
1107 # serialize them. Otherwise set nval to undef so that the
1108 # key is excluded from this reading.
1109 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1110 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1111 $nval = undef;
7cd9f181 1112 }
cc31ebaa 1113 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1114 # Adjust the ranks within the subgraph.
cc31ebaa 1115 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1116 : $nval - $rankoffset;
414cc046 1117 }
255875b8 1118 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1119 if defined $nval;
1120 }
b15511bf 1121 }
1122
2c669bca 1123 # Add the path edges to the sequence graph
df6d9812 1124 my $edge_ctr = 0;
3a2ebbf4 1125 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1126 # We add an edge in the graphml for every witness in $e.
a30ca502 1127 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1128 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1129 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1130 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1131 # Skip any path from start to end; that witness is not in the subgraph.
1132 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1133 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1134 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1135 $node_hash{ $e->[0] },
1136 $node_hash{ $e->[1] } );
2c669bca 1137 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1138 $edge_el->setAttribute( 'source', $from );
1139 $edge_el->setAttribute( 'target', $to );
1140 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1141
1142 # It's a witness path, so add the witness
1143 my $base = $wit;
1144 my $key = $edge_data_keys{'witness'};
1145 # Is this an ante-corr witness?
1146 my $aclabel = $self->ac_label;
1147 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1148 # Keep the base witness
1149 $base = $1;
1150 # ...and record that this is an 'extra' reading path
1151 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1152 }
1153 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1154 }
1155 }
1156
cc31ebaa 1157 # Report the actual number of nodes and edges that went in
1158 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1159 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1160
22222af9 1161 # Add the relationship graph to the XML
bbd064a9 1162 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1163 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1164 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1165
94c00c71 1166 # Save and return the thing
1167 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1168 return $result;
df6d9812 1169}
1170
b15511bf 1171sub _add_graphml_data {
1172 my( $el, $key, $value ) = @_;
b15511bf 1173 return unless defined $value;
c9bf3dbf 1174 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1175 $data_el->setAttribute( 'key', $key );
1176 $data_el->appendText( $value );
8e1394aa 1177}
1178
4e5a7b2c 1179=head2 as_csv
910a0a6d 1180
1181Returns a CSV alignment table representation of the collation graph, one
2c669bca 1182row per witness (or witness uncorrected.)
910a0a6d 1183
1184=cut
1185
1186sub as_csv {
3a2ebbf4 1187 my( $self ) = @_;
1dd07bda 1188 my $table = $self->alignment_table;
82fa4d57 1189 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1190 my @result;
2c669bca 1191 # Make the header row
1192 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1193 push( @result, decode_utf8( $csv->string ) );
1194 # Make the rest of the rows
1195 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1196 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1197 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1198 $csv->combine( @row );
910a0a6d 1199 push( @result, decode_utf8( $csv->string ) );
1200 }
3a2ebbf4 1201 return join( "\n", @result );
910a0a6d 1202}
1203
1dd07bda 1204=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1205
566f4595 1206Return a reference to an alignment table, in a slightly enhanced CollateX
1207format which looks like this:
1208
1209 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1210 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1211 { witness => "SIG2",
4e5a7b2c 1212 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1213 ... ],
1214 length => TEXTLEN };
1215
1216If $use_refs is set to 1, the reading object is returned in the table
1217instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1218
1219If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1220keys have a true hash value will be included.
2c669bca 1221
1222=cut
9f3ba6f7 1223
1dd07bda 1224sub alignment_table {
1225 my( $self ) = @_;
c1915ab9 1226 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1227 return $self->cached_table if $self->has_cached_table;
1228
0ecb975c 1229 # Make sure we can do this
1230 throw( "Need a linear graph in order to make an alignment table" )
1231 unless $self->linear;
1232 $self->calculate_ranks unless $self->end->has_rank;
1233
2c669bca 1234 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1235 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1236 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1237 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1238 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1239 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1240 push( @{$table->{'alignment'}},
1241 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1242 if( $wit->is_layered ) {
1243 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1244 $wit->sigil.$self->ac_label );
1dd07bda 1245 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1246 push( @{$table->{'alignment'}},
1247 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1248 }
1249 }
1dd07bda 1250 $self->cached_table( $table );
1251 return $table;
910a0a6d 1252}
1253
1254sub _make_witness_row {
1dd07bda 1255 my( $path, $positions ) = @_;
910a0a6d 1256 my %char_hash;
1257 map { $char_hash{$_} = undef } @$positions;
2c669bca 1258 my $debug = 0;
910a0a6d 1259 foreach my $rdg ( @$path ) {
eca16057 1260 my $rtext = $rdg->text;
1261 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1262 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1263 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1264 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1265 }
1266 my @row = map { $char_hash{$_} } @$positions;
eca16057 1267 # Fill in lacuna markers for undef spots in the row
1268 my $last_el = shift @row;
1269 my @filled_row = ( $last_el );
1270 foreach my $el ( @row ) {
0e476982 1271 # If we are using node reference, make the lacuna node appear many times
1272 # in the table. If not, use the lacuna tag.
1dd07bda 1273 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1274 $el = $last_el;
eca16057 1275 }
1276 push( @filled_row, $el );
1277 $last_el = $el;
1278 }
1279 return @filled_row;
910a0a6d 1280}
1281
4e5a7b2c 1282=head1 NAVIGATION METHODS
910a0a6d 1283
4e5a7b2c 1284=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1285
1286Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1287with $last, for the witness given in $sigil. If a $backup sigil is
1288specified (e.g. when walking a layered witness), it will be used wherever
1289no $sigil path exists. If there is a base text reading, that will be
1290used wherever no path exists for $sigil or $backup.
e2902068 1291
1292=cut
1293
910a0a6d 1294# TODO Think about returning some lazy-eval iterator.
b0b4421a 1295# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1296
e2902068 1297sub reading_sequence {
861c3e27 1298 my( $self, $start, $end, $witness ) = @_;
e2902068 1299
930ff666 1300 $witness = $self->baselabel unless $witness;
e2902068 1301 my @readings = ( $start );
1302 my %seen;
1303 my $n = $start;
3a2ebbf4 1304 while( $n && $n->id ne $end->id ) {
1305 if( exists( $seen{$n->id} ) ) {
63778331 1306 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1307 }
3a2ebbf4 1308 $seen{$n->id} = 1;
910a0a6d 1309
861c3e27 1310 my $next = $self->next_reading( $n, $witness );
44771cf2 1311 unless( $next ) {
63778331 1312 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1313 }
910a0a6d 1314 push( @readings, $next );
1315 $n = $next;
e2902068 1316 }
1317 # Check that the last reading is our end reading.
1318 my $last = $readings[$#readings];
63778331 1319 throw( "Last reading found from " . $start->text .
1320 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1321 unless $last->id eq $end->id;
e2902068 1322
1323 return @readings;
1324}
1325
4e5a7b2c 1326=head2 next_reading( $reading, $sigil );
8e1394aa 1327
4a8828f0 1328Returns the reading that follows the given reading along the given witness
930ff666 1329path.
8e1394aa 1330
1331=cut
1332
4a8828f0 1333sub next_reading {
e2902068 1334 # Return the successor via the corresponding path.
8e1394aa 1335 my $self = shift;
3a2ebbf4 1336 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1337 return undef unless $answer;
3a2ebbf4 1338 return $self->reading( $answer );
8e1394aa 1339}
1340
4e5a7b2c 1341=head2 prior_reading( $reading, $sigil )
8e1394aa 1342
4a8828f0 1343Returns the reading that precedes the given reading along the given witness
930ff666 1344path.
8e1394aa 1345
1346=cut
1347
4a8828f0 1348sub prior_reading {
e2902068 1349 # Return the predecessor via the corresponding path.
8e1394aa 1350 my $self = shift;
3a2ebbf4 1351 my $answer = $self->_find_linked_reading( 'prior', @_ );
1352 return $self->reading( $answer );
8e1394aa 1353}
1354
4a8828f0 1355sub _find_linked_reading {
861c3e27 1356 my( $self, $direction, $node, $path ) = @_;
1357
1358 # Get a backup if we are dealing with a layered witness
1359 my $alt_path;
1360 my $aclabel = $self->ac_label;
1361 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1362 $alt_path = $1;
1363 }
1364
e2902068 1365 my @linked_paths = $direction eq 'next'
3a2ebbf4 1366 ? $self->sequence->edges_from( $node )
1367 : $self->sequence->edges_to( $node );
e2902068 1368 return undef unless scalar( @linked_paths );
8e1394aa 1369
e2902068 1370 # We have to find the linked path that contains all of the
1371 # witnesses supplied in $path.
1372 my( @path_wits, @alt_path_wits );
4e5a7b2c 1373 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1374 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1375 my $base_le;
1376 my $alt_le;
1377 foreach my $le ( @linked_paths ) {
3a2ebbf4 1378 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1379 $base_le = $le;
910a0a6d 1380 }
508fd430 1381 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1382 if( _is_within( \@path_wits, \@le_wits ) ) {
1383 # This is the right path.
1384 return $direction eq 'next' ? $le->[1] : $le->[0];
1385 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1386 $alt_le = $le;
1387 }
8e1394aa 1388 }
e2902068 1389 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1390 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1391 if $alt_le;
e2902068 1392
1393 # Got this far? Return the base path if it exists.
3a2ebbf4 1394 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1395 if $base_le;
e2902068 1396
1397 # Got this far? We have no appropriate path.
2c669bca 1398 warn "Could not find $direction node from " . $node->id
910a0a6d 1399 . " along path $path";
8e1394aa 1400 return undef;
1401}
1402
4a8828f0 1403# Some set logic.
1404sub _is_within {
1405 my( $set1, $set2 ) = @_;
7854e12e 1406 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1407 foreach my $el ( @$set1 ) {
910a0a6d 1408 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1409 }
1410 return $ret;
1411}
1412
4e5a7b2c 1413# Return the string that joins together a list of witnesses for
1414# display on a single path.
1415sub _witnesses_of_label {
1416 my( $self, $label ) = @_;
1417 my $regex = $self->wit_list_separator;
1418 my @answer = split( /\Q$regex\E/, $label );
1419 return @answer;
b0b4421a 1420}
1421
d4b75f44 1422=head2 common_readings
1423
1424Returns the list of common readings in the graph (i.e. those readings that are
1425shared by all non-lacunose witnesses.)
1426
1427=cut
1428
1429sub common_readings {
1430 my $self = shift;
1431 my @common = grep { $_->is_common } $self->readings;
1432 return @common;
1433}
1434
fae52efd 1435=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1436
1437Returns the text of a witness (plus its backup, if we are using a layer)
1438as stored in the collation. The text is returned as a string, where the
1439individual readings are joined with spaces and the meta-readings (e.g.
1440lacunae) are omitted. Optional specification of $start and $end allows
1441the generation of a subset of the witness text.
4e5a7b2c 1442
b0b4421a 1443=cut
1444
1445sub path_text {
861c3e27 1446 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1447 $start = $self->start unless $start;
1448 $end = $self->end unless $end;
861c3e27 1449 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1450 my $pathtext = '';
1451 my $last;
1452 foreach my $r ( @path ) {
6ad2ce78 1453 unless ( $r->join_prior || !$last || $last->join_next ) {
1454 $pathtext .= ' ';
1455 }
1456 $pathtext .= $r->text;
629e27b0 1457 $last = $r;
1458 }
1459 return $pathtext;
b0b4421a 1460}
4e5a7b2c 1461
1462=head1 INITIALIZATION METHODS
1463
1464These are mostly for use by parsers.
1465
1466=head2 make_witness_path( $witness )
1467
1468Link the array of readings contained in $witness->path (and in
1469$witness->uncorrected_path if it exists) into collation paths.
1470Clear out the arrays when finished.
de51424a 1471
4e5a7b2c 1472=head2 make_witness_paths
1473
1474Call make_witness_path for all witnesses in the tradition.
1475
1476=cut
930ff666 1477
7e450e44 1478# For use when a collation is constructed from a base text and an apparatus.
1479# We have the sequences of readings and just need to add path edges.
1f7aa795 1480# When we are done, clear out the witness path attributes, as they are no
1481# longer needed.
1482# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1483
6a222840 1484sub make_witness_paths {
1485 my( $self ) = @_;
910a0a6d 1486 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1487 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1488 $self->make_witness_path( $wit );
7854e12e 1489 }
7854e12e 1490}
1491
6a222840 1492sub make_witness_path {
7854e12e 1493 my( $self, $wit ) = @_;
1494 my @chain = @{$wit->path};
15d2d3df 1495 my $sig = $wit->sigil;
fae52efd 1496 # Add start and end if necessary
1497 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1498 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1499 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1500 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1501 }
1f7aa795 1502 if( $wit->is_layered ) {
d9e873d0 1503 @chain = @{$wit->uncorrected_path};
fae52efd 1504 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1505 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1506 foreach my $idx( 0 .. $#chain-1 ) {
1507 my $source = $chain[$idx];
1508 my $target = $chain[$idx+1];
1509 $self->add_path( $source, $target, $sig.$self->ac_label )
1510 unless $self->has_path( $source, $target, $sig );
1511 }
15d2d3df 1512 }
1f7aa795 1513 $wit->clear_path;
1514 $wit->clear_uncorrected_path;
e2902068 1515}
1516
4e5a7b2c 1517=head2 calculate_ranks
1518
1519Calculate the reading ranks (that is, their aligned positions relative
1520to each other) for the graph. This can only be called on linear collations.
1521
b365fbae 1522=begin testing
1523
1524use Text::Tradition;
1525
1526my $cxfile = 't/data/Collatex-16.xml';
1527my $t = Text::Tradition->new(
1528 'name' => 'inline',
1529 'input' => 'CollateX',
1530 'file' => $cxfile,
1531 );
1532my $c = $t->collation;
1533
1534# Make an svg
bfcbcecb 1535my $table = $c->alignment_table;
1536ok( $c->has_cached_table, "Alignment table was cached" );
1537is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1538$c->calculate_ranks;
bfcbcecb 1539is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1540$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1541isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1542
1543=end testing
1544
4e5a7b2c 1545=cut
1546
910a0a6d 1547sub calculate_ranks {
1548 my $self = shift;
b365fbae 1549 # Save the existing ranks, in case we need to invalidate the cached SVG.
1550 my %existing_ranks;
ac4d7ac2 1551 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1552
1553 # Do the rankings based on the relationship equivalence graph, starting
1554 # with the start node.
56772e8c 1555 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1556
910a0a6d 1557 # Transfer our rankings from the topological graph to the real one.
1558 foreach my $r ( $self->readings ) {
cecbe56d 1559 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1560 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1561 } else {
63778331 1562 # Die. Find the last rank we calculated.
359944f7 1563 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1564 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1565 $self->readings;
1566 my $last = pop @all_defined;
1567 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1568 }
de51424a 1569 }
bfcbcecb 1570 # Do we need to invalidate the cached data?
1571 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1572 foreach my $r ( $self->readings ) {
7c293912 1573 next if defined( $existing_ranks{$r} )
1574 && $existing_ranks{$r} == $r->rank;
c1915ab9 1575 # Something has changed, so clear the cache
bfcbcecb 1576 $self->_clear_cache;
c1915ab9 1577 # ...and recalculate the common readings.
1578 $self->calculate_common_readings();
b365fbae 1579 last;
1580 }
1581 }
c1915ab9 1582 # The graph calculation information is now up to date.
1583 $self->_graphcalc_done(1);
8e1394aa 1584}
3a1f2523 1585
c1915ab9 1586sub _clear_cache {
1587 my $self = shift;
1588 $self->wipe_svg if $self->has_cached_svg;
1589 $self->wipe_table if $self->has_cached_table;
1590}
1591
1592
4e5a7b2c 1593=head2 flatten_ranks
1594
1595A convenience method for parsing collation data. Searches the graph for readings
1596with the same text at the same rank, and merges any that are found.
1597
1598=cut
1599
0e476982 1600sub flatten_ranks {
1601 my $self = shift;
1602 my %unique_rank_rdg;
bf6e338d 1603 my $changed;
0e476982 1604 foreach my $rdg ( $self->readings ) {
1605 next unless $rdg->has_rank;
1606 my $key = $rdg->rank . "||" . $rdg->text;
1607 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1608 # Make sure they don't have different grammatical forms
1609 my $ur = $unique_rank_rdg{$key};
1610 if( $rdg->disambiguated && $ur->disambiguated ) {
1611 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1612 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1613 next unless $rform eq $uform;
1614 } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1615 next;
1616 }
0e476982 1617 # Combine!
cecbe56d 1618 #print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1619 $changed = 1;
0e476982 1620 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1621 # TODO see if this now makes a common point.
0e476982 1622 } else {
1623 $unique_rank_rdg{$key} = $rdg;
1624 }
1625 }
bf6e338d 1626 # If we merged readings, the ranks are still fine but the alignment
1627 # table is wrong. Wipe it.
1628 $self->wipe_table() if $changed;
0e476982 1629}
4633f9e4 1630
1631
d4b75f44 1632=head2 calculate_common_readings
1633
1634Goes through the graph identifying the readings that appear in every witness
1635(apart from those with lacunae at that spot.) Marks them as common and returns
1636the list.
1637
1638=begin testing
1639
1640use Text::Tradition;
1641
1642my $cxfile = 't/data/Collatex-16.xml';
1643my $t = Text::Tradition->new(
1644 'name' => 'inline',
1645 'input' => 'CollateX',
1646 'file' => $cxfile,
1647 );
1648my $c = $t->collation;
1649
1650my @common = $c->calculate_common_readings();
1651is( scalar @common, 8, "Found correct number of common readings" );
1652my @marked = sort $c->common_readings();
1653is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1654my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1655is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1656
1657=end testing
1658
1659=cut
1660
1661sub calculate_common_readings {
1662 my $self = shift;
1663 my @common;
c1915ab9 1664 map { $_->is_common( 0 ) } $self->readings;
1665 # Implicitly calls calculate_ranks
1dd07bda 1666 my $table = $self->alignment_table;
d4b75f44 1667 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1668 my @row = map { $_->{'tokens'}->[$idx]
1669 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1670 @{$table->{'alignment'}};
d4b75f44 1671 my %hash;
1672 foreach my $r ( @row ) {
1673 if( $r ) {
1674 $hash{$r->id} = $r unless $r->is_meta;
1675 } else {
1676 $hash{'UNDEF'} = $r;
1677 }
1678 }
1679 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1680 my( $r ) = values %hash;
1681 $r->is_common( 1 );
1682 push( @common, $r );
1683 }
1684 }
1685 return @common;
1686}
1687
861c3e27 1688=head2 text_from_paths
1689
1690Calculate the text array for all witnesses from the path, for later consistency
1691checking. Only to be used if there is no non-graph-based way to know the
1692original texts.
1693
1694=cut
1695
1696sub text_from_paths {
1697 my $self = shift;
1698 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1699 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1700 my @text;
1701 foreach my $r ( @readings ) {
1702 next if $r->is_meta;
1703 push( @text, $r->text );
1704 }
861c3e27 1705 $wit->text( \@text );
1706 if( $wit->is_layered ) {
5164a6f0 1707 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1708 $wit->sigil.$self->ac_label );
1709 my @uctext;
1710 foreach my $r ( @ucrdgs ) {
1711 next if $r->is_meta;
1712 push( @uctext, $r->text );
1713 }
1714 $wit->layertext( \@uctext );
861c3e27 1715 }
1716 }
1717}
0e476982 1718
4e5a7b2c 1719=head1 UTILITY FUNCTIONS
1720
1721=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1722
4e5a7b2c 1723Find the last reading that occurs in sequence before both the given readings.
414cc046 1724At the very least this should be $self->start.
4e5a7b2c 1725
1726=head2 common_successor( $reading_a, $reading_b )
1727
1728Find the first reading that occurs in sequence after both the given readings.
414cc046 1729At the very least this should be $self->end.
4e5a7b2c 1730
22222af9 1731=begin testing
1732
1733use Text::Tradition;
1734
1735my $cxfile = 't/data/Collatex-16.xml';
1736my $t = Text::Tradition->new(
1737 'name' => 'inline',
1738 'input' => 'CollateX',
1739 'file' => $cxfile,
1740 );
1741my $c = $t->collation;
1742
679f17e1 1743is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1744 'n20', "Found correct common predecessor" );
679f17e1 1745is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1746 '__END__', "Found correct common successor" );
22222af9 1747
4e5a7b2c 1748is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1749 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1750is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1751 '__END__', "Found correct common successor for readings on same path" );
22222af9 1752
1753=end testing
1754
1755=cut
1756
1757## Return the closest reading that is a predecessor of both the given readings.
1758sub common_predecessor {
1759 my $self = shift;
4e5a7b2c 1760 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1761 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1762}
1763
1764sub common_successor {
1765 my $self = shift;
4e5a7b2c 1766 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1767 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1768}
1769
414cc046 1770
1771# TODO think about how to do this without ranks...
027d819c 1772sub _common_in_path {
22222af9 1773 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1774 my $iter = $self->end->rank;
22222af9 1775 my @candidates;
414cc046 1776 my @last_r1 = ( $r1 );
1777 my @last_r2 = ( $r2 );
1778 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1779 my %all_seen;
414cc046 1780 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1781 while( !@candidates ) {
414cc046 1782 last unless $iter--; # Avoid looping infinitely
1783 # Iterate separately down the graph from r1 and r2
1784 my( @new_lc1, @new_lc2 );
1785 foreach my $lc ( @last_r1 ) {
1786 foreach my $p ( $lc->$dir ) {
1787 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1788 # print STDERR "Path candidate $p from $lc\n";
1789 push( @candidates, $p );
002e3600 1790 } elsif( !$all_seen{$p->id} ) {
414cc046 1791 $all_seen{$p->id} = 'r1';
1792 push( @new_lc1, $p );
1793 }
1794 }
1795 }
1796 foreach my $lc ( @last_r2 ) {
22222af9 1797 foreach my $p ( $lc->$dir ) {
414cc046 1798 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1799 # print STDERR "Path candidate $p from $lc\n";
22222af9 1800 push( @candidates, $p );
002e3600 1801 } elsif( !$all_seen{$p->id} ) {
414cc046 1802 $all_seen{$p->id} = 'r2';
1803 push( @new_lc2, $p );
22222af9 1804 }
1805 }
1806 }
414cc046 1807 @last_r1 = @new_lc1;
1808 @last_r2 = @new_lc2;
22222af9 1809 }
1810 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1811 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1812}
1813
63778331 1814sub throw {
1815 Text::Tradition::Error->throw(
1816 'ident' => 'Collation error',
1817 'message' => $_[0],
1818 );
1819}
1820
dd3b58b0 1821no Moose;
1822__PACKAGE__->meta->make_immutable;
e867486f 1823
027d819c 1824=head1 LICENSE
e867486f 1825
027d819c 1826This package is free software and is provided "as is" without express
1827or implied warranty. You can redistribute it and/or modify it under
1828the same terms as Perl itself.
e867486f 1829
027d819c 1830=head1 AUTHOR
e867486f 1831
027d819c 1832Tara L Andrews E<lt>aurum@cpan.orgE<gt>