fix dot/SVG output for (sub)start/end nodes and their IDs
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
22222af9 302 $self->relations->add_reading( $reading->id );
3a2ebbf4 303 return $reading;
eca16057 304};
305
3a2ebbf4 306around del_reading => sub {
307 my $orig = shift;
308 my $self = shift;
309 my $arg = shift;
310
311 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
312 $arg = $arg->id;
313 }
3a2ebbf4 314 # Remove the reading from the graphs.
c1915ab9 315 $self->_graphcalc_done(0);
4e483aa5 316 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 317 $self->sequence->delete_vertex( $arg );
22222af9 318 $self->relations->delete_reading( $arg );
3a2ebbf4 319
320 # Carry on.
321 $self->$orig( $arg );
322};
7854e12e 323
4e483aa5 324=begin testing
325
326use Text::Tradition;
327
328my $cxfile = 't/data/Collatex-16.xml';
329my $t = Text::Tradition->new(
330 'name' => 'inline',
331 'input' => 'CollateX',
332 'file' => $cxfile,
333 );
334my $c = $t->collation;
335
336my $rno = scalar $c->readings;
337# Split n21 for testing purposes
338my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
339my $old_r = $c->reading( 'n21' );
340$old_r->alter_text( 'to' );
341$c->del_path( 'n20', 'n21', 'A' );
342$c->add_path( 'n20', 'n21p0', 'A' );
343$c->add_path( 'n21p0', 'n21', 'A' );
344$c->flatten_ranks();
345ok( $c->reading( 'n21p0' ), "New reading exists" );
346is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
347
679f17e1 348# Combine n3 and n4 ( with his )
4e483aa5 349$c->merge_readings( 'n3', 'n4', 1 );
350ok( !$c->reading('n4'), "Reading n4 is gone" );
351is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
352
679f17e1 353# Collapse n9 and n10 ( rood / root )
354$c->merge_readings( 'n9', 'n10' );
355ok( !$c->reading('n10'), "Reading n10 is gone" );
356is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 357
358# Combine n21 and n21p0
359my $remaining = $c->reading('n21');
360$remaining ||= $c->reading('n22'); # one of these should still exist
361$c->merge_readings( 'n21p0', $remaining, 1 );
362ok( !$c->reading('n21'), "Reading $remaining is gone" );
363is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
364
365=end testing
366
367=cut
7854e12e 368
3a2ebbf4 369sub merge_readings {
370 my $self = shift;
371
372 # We only need the IDs for adding paths to the graph, not the reading
373 # objects themselves.
4e483aa5 374 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 375 $self->_graphcalc_done(0);
3a2ebbf4 376
377 # The kept reading should inherit the paths and the relationships
378 # of the deleted reading.
379 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
380 my @vector = ( $kept );
381 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
382 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 383 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 384 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
385 $self->sequence->add_edge( @vector );
386 my $fwits = $self->sequence->get_edge_attributes( @vector );
387 @wits{keys %$fwits} = values %$fwits;
388 $self->sequence->set_edge_attributes( @vector, \%wits );
389 }
22222af9 390 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 391
392 # Do the deletion deed.
4e483aa5 393 if( $combine ) {
49d4f2ac 394 my $kept_obj = $self->reading( $kept );
4e483aa5 395 my $del_obj = $self->reading( $deleted );
396 my $joinstr = $combine_char;
397 unless( defined $joinstr ) {
398 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
399 $joinstr = $self->wordsep unless defined $joinstr;
400 }
401 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 402 }
3a2ebbf4 403 $self->del_reading( $deleted );
404}
7854e12e 405
3265b0ce 406
3a2ebbf4 407# Helper function for manipulating the graph.
408sub _stringify_args {
4e483aa5 409 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 410 $first = $first->id
411 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
412 $second = $second->id
413 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 414 return( $first, $second, @args );
3a2ebbf4 415}
df6d9812 416
4e5a7b2c 417# Helper function for manipulating the graph.
418sub _objectify_args {
419 my( $self, $first, $second, $arg ) = @_;
420 $first = $self->reading( $first )
421 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
422 $second = $self->reading( $second )
423 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
424 return( $first, $second, $arg );
425}
3a2ebbf4 426### Path logic
427
428sub add_path {
429 my $self = shift;
430
431 # We only need the IDs for adding paths to the graph, not the reading
432 # objects themselves.
433 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
434
c1915ab9 435 $self->_graphcalc_done(0);
3a2ebbf4 436 # Connect the readings
359944f7 437 unless( $self->sequence->has_edge( $source, $target ) ) {
438 $self->sequence->add_edge( $source, $target );
439 $self->relations->add_equivalence_edge( $source, $target );
440 }
3a2ebbf4 441 # Note the witness in question
442 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 443}
b15511bf 444
3a2ebbf4 445sub del_path {
446 my $self = shift;
49d4f2ac 447 my @args;
448 if( ref( $_[0] ) eq 'ARRAY' ) {
449 my $e = shift @_;
450 @args = ( @$e, @_ );
451 } else {
452 @args = @_;
453 }
3a2ebbf4 454
455 # We only need the IDs for adding paths to the graph, not the reading
456 # objects themselves.
49d4f2ac 457 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 458
c1915ab9 459 $self->_graphcalc_done(0);
3a2ebbf4 460 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 461 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 462 }
463 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
464 $self->sequence->delete_edge( $source, $target );
359944f7 465 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 466 }
784877d9 467}
468
3a2ebbf4 469
15d2d3df 470# Extra graph-alike utility
471sub has_path {
3a2ebbf4 472 my $self = shift;
473 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
474 return undef unless $self->sequence->has_edge( $source, $target );
475 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 476}
477
4e5a7b2c 478=head2 clear_witness( @sigil_list )
3a2ebbf4 479
4e5a7b2c 480Clear the given witnesses out of the collation entirely, removing references
481to them in paths, and removing readings that belong only to them. Should only
482be called via $tradition->del_witness.
3a2ebbf4 483
484=cut
485
4e5a7b2c 486sub clear_witness {
487 my( $self, @sigils ) = @_;
488
c1915ab9 489 $self->_graphcalc_done(0);
4e5a7b2c 490 # Clear the witness(es) out of the paths
491 foreach my $e ( $self->paths ) {
492 foreach my $sig ( @sigils ) {
493 $self->del_path( $e, $sig );
494 }
495 }
496
497 # Clear out the newly unused readings
498 foreach my $r ( $self->readings ) {
499 unless( $self->reading_witnesses( $r ) ) {
500 $self->del_reading( $r );
501 }
502 }
503}
3a2ebbf4 504
505sub add_relationship {
506 my $self = shift;
22222af9 507 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 508 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 509 $self->_graphcalc_done(0);
63778331 510 return @vectors;
22222af9 511}
ef9d481f 512
ca6e6095 513around qw/ get_relationship del_relationship / => sub {
514 my $orig = shift;
515 my $self = shift;
516 my @args = @_;
517 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
518 @args = @{$_[0]};
519 }
520 my( $source, $target ) = $self->_stringify_args( @args );
521 $self->$orig( $source, $target );
522};
523
22222af9 524=head2 reading_witnesses( $reading )
910a0a6d 525
22222af9 526Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 527
22222af9 528=cut
1d310495 529
1d310495 530sub reading_witnesses {
531 my( $self, $reading ) = @_;
532 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 533 # arbitrarily chosen "incoming". Thus, special-case the start node.
534 if( $reading eq $self->start ) {
535 return map { $_->sigil } $self->tradition->witnesses;
536 }
1d310495 537 my %all_witnesses;
538 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
539 my $wits = $self->sequence->get_edge_attributes( @$e );
540 @all_witnesses{ keys %$wits } = 1;
541 }
c12bb878 542 my $acstr = $self->ac_label;
543 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
544 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
545 }
1d310495 546 return keys %all_witnesses;
910a0a6d 547}
548
4e5a7b2c 549=head1 OUTPUT METHODS
8e1394aa 550
0ecb975c 551=head2 as_svg( \%options )
8e1394aa 552
0068967c 553Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 554See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 555
556=cut
557
558sub as_svg {
0ecb975c 559 my( $self, $opts ) = @_;
bfcbcecb 560 throw( "Need GraphViz installed to output SVG" )
561 unless File::Which::which( 'dot' );
e247aad1 562 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 563 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 564 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 565 my @cmd = qw/dot -Tsvg/;
566 my( $svg, $err );
567 my $dotfile = File::Temp->new();
568 ## USE FOR DEBUGGING
569 # $dotfile->unlink_on_destroy(0);
570 binmode $dotfile, ':utf8';
571 print $dotfile $self->as_dot( $opts );
572 push( @cmd, $dotfile->filename );
573 run( \@cmd, ">", binary(), \$svg );
e247aad1 574 $svg = decode_utf8( $svg );
575 $self->cached_svg( $svg ) unless $want_subgraph;
576 return $svg;
577 } else {
578 return $self->cached_svg;
b365fbae 579 }
8e1394aa 580}
581
b22576c6 582
0ecb975c 583=head2 as_dot( \%options )
b22576c6 584
0ecb975c 585Returns a string that is the collation graph expressed in dot
586(i.e. GraphViz) format. Options include:
b22576c6 587
0ecb975c 588=over 4
b22576c6 589
0ecb975c 590=item * from
b22576c6 591
0ecb975c 592=item * to
df6d9812 593
0ecb975c 594=item * color_common
595
596=back
df6d9812 597
598=cut
599
600sub as_dot {
0ecb975c 601 my( $self, $opts ) = @_;
602 my $startrank = $opts->{'from'} if $opts;
603 my $endrank = $opts->{'to'} if $opts;
604 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 605 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
606 && $self->end->rank > 100;
6648ee3d 607 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 608
b22576c6 609 # Check the arguments
610 if( $startrank ) {
611 return if $endrank && $startrank > $endrank;
612 return if $startrank > $self->end->rank;
613 }
614 if( defined $endrank ) {
615 return if $endrank < 0;
f1b3b33a 616 $endrank = undef if $endrank == $self->end->rank;
b22576c6 617 }
618
67da8d6c 619 my $graph_name = $self->tradition->name;
620 $graph_name =~ s/[^\w\s]//g;
621 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 622
623 my %graph_attrs = (
624 'rankdir' => 'LR',
625 'bgcolor' => 'none',
626 );
627 my %node_attrs = (
b8990398 628 'fontsize' => 14,
f13b5582 629 'fillcolor' => 'white',
630 'style' => 'filled',
631 'shape' => 'ellipse'
632 );
633 my %edge_attrs = (
634 'arrowhead' => 'open',
635 'color' => '#000000',
636 'fontcolor' => '#000000',
637 );
638
67da8d6c 639 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 640 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
641 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 642
b22576c6 643 # Output substitute start/end readings if necessary
644 if( $startrank ) {
43c94341 645 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 646 }
647 if( $endrank ) {
43c94341 648 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 649 }
b365fbae 650 if( $STRAIGHTENHACK ) {
651 ## HACK part 1
43c94341 652 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
653 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 654 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
655 }
b22576c6 656 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 657 # Sort the readings by rank if we have ranks; this speeds layout.
658 my @all_readings = $self->end->has_rank
659 ? sort { $a->rank <=> $b->rank } $self->readings
660 : $self->readings;
4633f9e4 661 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 662 foreach my $reading ( @all_readings ) {
b22576c6 663 # Only output readings within our rank range.
664 next if $startrank && $reading->rank < $startrank;
665 next if $endrank && $reading->rank > $endrank;
666 $used{$reading->id} = 1;
910a0a6d 667 # Need not output nodes without separate labels
3a2ebbf4 668 next if $reading->id eq $reading->text;
d4b75f44 669 my $rattrs;
30f0df34 670 my $label = $reading->text;
629e27b0 671 $label .= '-' if $reading->join_next;
672 $label = "-$label" if $reading->join_prior;
8f9cab7b 673 $label =~ s/\"/\\\"/g;
d4b75f44 674 $rattrs->{'label'} = $label;
10e4b1ac 675 $rattrs->{'id'} = $reading->id;
0ecb975c 676 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 677 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 678 }
3a2ebbf4 679
30ddc24c 680 # Add the real edges. Need to weight one edge per rank jump, in a
681 # continuous line.
b365fbae 682 # my $weighted = $self->_add_edge_weights;
b22576c6 683 my @edges = $self->paths;
3bdec618 684 my( %substart, %subend );
b22576c6 685 foreach my $edge ( @edges ) {
686 # Do we need to output this edge?
508fd430 687 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 688 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 689 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 690
b22576c6 691 # Account for the rank gap if necessary
30ddc24c 692 my $rank0 = $self->reading( $edge->[0] )->rank
693 if $self->reading( $edge->[0] )->has_rank;
694 my $rank1 = $self->reading( $edge->[1] )->rank
695 if $self->reading( $edge->[1] )->has_rank;
696 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
697 $variables->{'minlen'} = $rank1 - $rank0;
698 }
699
700 # Add the calculated edge weights
b365fbae 701 # if( exists $weighted->{$edge->[0]}
e247aad1 702 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
703 # # $variables->{'color'} = 'red';
704 # $variables->{'weight'} = 3.0;
705 # }
30ddc24c 706
508fd430 707 # EXPERIMENTAL: make edge width reflect no. of witnesses
708 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
709 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
710
f13b5582 711 my $varopts = _dot_attr_string( $variables );
712 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
713 $edge->[0], $edge->[1], $varopts );
3bdec618 714 } elsif( $used{$edge->[0]} ) {
715 $subend{$edge->[0]} = 1;
716 } elsif( $used{$edge->[1]} ) {
717 $substart{$edge->[1]} = 1;
b22576c6 718 }
df6d9812 719 }
3bdec618 720 # Add substitute start and end edges if necessary
721 foreach my $node ( keys %substart ) {
027d819c 722 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 723 my $variables = { %edge_attrs, 'label' => $witstr };
724 my $varopts = _dot_attr_string( $variables );
43c94341 725 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;";
3bdec618 726 }
727 foreach my $node ( keys %subend ) {
027d819c 728 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 729 my $variables = { %edge_attrs, 'label' => $witstr };
730 my $varopts = _dot_attr_string( $variables );
43c94341 731 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;";
3bdec618 732 }
b365fbae 733 # HACK part 2
734 if( $STRAIGHTENHACK ) {
43c94341 735 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
736 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 737 }
30ddc24c 738
df6d9812 739 $dot .= "}\n";
740 return $dot;
741}
742
f13b5582 743sub _dot_attr_string {
744 my( $hash ) = @_;
745 my @attrs;
746 foreach my $k ( sort keys %$hash ) {
747 my $v = $hash->{$k};
748 push( @attrs, $k.'="'.$v.'"' );
749 }
750 return( '[ ' . join( ', ', @attrs ) . ' ]' );
751}
752
30ddc24c 753sub _add_edge_weights {
754 my $self = shift;
755 # Walk the graph from START to END, choosing the successor node with
756 # the largest number of witness paths each time.
757 my $weighted = {};
758 my $curr = $self->start->id;
008fc8a6 759 my $ranked = $self->end->has_rank;
30ddc24c 760 while( $curr ne $self->end->id ) {
008fc8a6 761 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 762 my @succ = sort { $self->path_witnesses( $curr, $a )
763 <=> $self->path_witnesses( $curr, $b ) }
764 $self->sequence->successors( $curr );
765 my $next = pop @succ;
008fc8a6 766 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 767 # Try to avoid lacunae in the weighted path.
008fc8a6 768 while( @succ &&
769 ( $self->reading( $next )->is_lacuna ||
770 $nextrank - $rank > 1 ) ){
30ddc24c 771 $next = pop @succ;
772 }
773 $weighted->{$curr} = $next;
774 $curr = $next;
775 }
776 return $weighted;
777}
778
027d819c 779=head2 path_witnesses( $edge )
780
781Returns the list of sigils whose witnesses are associated with the given edge.
782The edge can be passed as either an array or an arrayref of ( $source, $target ).
783
784=cut
785
3a2ebbf4 786sub path_witnesses {
787 my( $self, @edge ) = @_;
788 # If edge is an arrayref, cope.
789 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
790 my $e = shift @edge;
791 @edge = @$e;
792 }
793 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 794 return @wits;
3a2ebbf4 795}
796
7f9f05e8 797# Helper function. Make a display label for the given witnesses, showing a.c.
798# witnesses only where the main witness is not also in the list.
027d819c 799sub _path_display_label {
508fd430 800 my $self = shift;
7f9f05e8 801 my %wits;
802 map { $wits{$_} = 1 } @_;
803
804 # If an a.c. wit is listed, remove it if the main wit is also listed.
805 # Otherwise keep it for explicit listing.
806 my $aclabel = $self->ac_label;
807 my @disp_ac;
808 foreach my $w ( sort keys %wits ) {
809 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
810 if( exists $wits{$1} ) {
811 delete $wits{$w};
812 } else {
813 push( @disp_ac, $w );
814 }
815 }
816 }
817
818 # See if we are in a majority situation.
8f9cab7b 819 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 820 if( scalar keys %wits > $maj ) {
821 unshift( @disp_ac, 'majority' );
822 return join( ', ', @disp_ac );
8f9cab7b 823 } else {
7f9f05e8 824 return join( ', ', sort keys %wits );
8f9cab7b 825 }
826}
1dd07bda 827
bf6e338d 828=head2 readings_at_rank( $rank )
1dd07bda 829
bf6e338d 830Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 831
832=cut
833
bf6e338d 834sub readings_at_rank {
1dd07bda 835 my( $self, $rank ) = @_;
bf6e338d 836 my $table = $self->alignment_table;
837 # Table rank is real rank - 1.
838 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
839 my %readings;
840 foreach my $e ( @elements ) {
841 next unless ref( $e ) eq 'HASH';
842 next unless exists $e->{'t'};
843 $readings{$e->{'t'}->id} = $e->{'t'};
844 }
845 return values %readings;
1dd07bda 846}
8f9cab7b 847
4e5a7b2c 848=head2 as_graphml
8e1394aa 849
4e5a7b2c 850Returns a GraphML representation of the collation. The GraphML will contain
851two graphs. The first expresses the attributes of the readings and the witness
852paths that link them; the second expresses the relationships that link the
853readings. This is the native transfer format for a tradition.
8e1394aa 854
56eefa04 855=begin testing
856
857use Text::Tradition;
858
859my $READINGS = 311;
860my $PATHS = 361;
861
862my $datafile = 't/data/florilegium_tei_ps.xml';
863my $tradition = Text::Tradition->new( 'input' => 'TEI',
864 'name' => 'test0',
865 'file' => $datafile,
866 'linear' => 1 );
867
868ok( $tradition, "Got a tradition object" );
869is( scalar $tradition->witnesses, 13, "Found all witnesses" );
870ok( $tradition->collation, "Tradition has a collation" );
871
872my $c = $tradition->collation;
873is( scalar $c->readings, $READINGS, "Collation has all readings" );
874is( scalar $c->paths, $PATHS, "Collation has all paths" );
875is( scalar $c->relationships, 0, "Collation has all relationships" );
876
877# Add a few relationships
878$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
879$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
880$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
881
882# Now write it to GraphML and parse it again.
883
884my $graphml = $c->as_graphml;
885my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
886is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
887is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
888is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
889
890=end testing
891
8e1394aa 892=cut
893
894sub as_graphml {
a30ca502 895 my( $self, $options ) = @_;
3d14b48e 896 $self->calculate_ranks unless $self->_graphcalc_done;
897
a30ca502 898 my $start = $options->{'from'}
899 ? $self->reading( $options->{'from'} ) : $self->start;
900 my $end = $options->{'to'}
901 ? $self->reading( $options->{'to'} ) : $self->end;
902 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
903 throw( 'Start node must be before end node' );
904 }
905 # The readings need to be ranked for this to work.
906 $start = $self->start unless $start->has_rank;
907 $end = $self->end unless $end->has_rank;
414cc046 908 my $rankoffset = 0;
909 unless( $start eq $self->start ) {
910 $rankoffset = $start->rank - 1;
911 }
a30ca502 912 my %use_readings;
913
8e1394aa 914 # Some namespaces
915 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
916 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
917 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 918 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 919
920 # Create the document and root node
921 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
922 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
923 $graphml->setDocumentElement( $root );
924 $root->setNamespace( $xsi_ns, 'xsi', 0 );
925 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 926
927 # List of attribute types to save on our objects and their corresponding
928 # GraphML types
929 my %save_types = (
930 'Str' => 'string',
931 'Int' => 'int',
932 'Bool' => 'boolean',
10e4b1ac 933 'ReadingID' => 'string',
bbd064a9 934 'RelationshipType' => 'string',
935 'RelationshipScope' => 'string',
936 );
937
938 # List of attribute names *not* to save on our objects.
939 # We will also not save any attribute beginning with _.
940 my %skipsave;
941 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 942
bbd064a9 943 # Add the data keys for the graph. Include an extra key 'version' for the
944 # GraphML output version.
e309421a 945 my %graph_data_keys;
946 my $gdi = 0;
bbd064a9 947 my %graph_attributes = ( 'version' => 'string' );
948 # Graph attributes include those of Tradition and those of Collation.
949 my %gattr_from;
950 my $tmeta = $self->tradition->meta;
951 my $cmeta = $self->meta;
952 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
953 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
954 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
955 next if $attr->name =~ /^_/;
956 next if $skipsave{$attr->name};
957 next unless $save_types{$attr->type_constraint->name};
958 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
959 }
960
961 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 962 $graph_data_keys{$datum} = 'dg'.$gdi++;
963 my $key = $root->addNewChild( $graphml_ns, 'key' );
964 $key->setAttribute( 'attr.name', $datum );
bbd064a9 965 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 966 $key->setAttribute( 'for', 'graph' );
967 $key->setAttribute( 'id', $graph_data_keys{$datum} );
968 }
f6066bac 969
bbd064a9 970 # Add the data keys for reading nodes
971 my %reading_attributes;
972 my $rmeta = Text::Tradition::Collation::Reading->meta;
973 foreach my $attr( $rmeta->get_all_attributes ) {
974 next if $attr->name =~ /^_/;
975 next if $skipsave{$attr->name};
976 next unless $save_types{$attr->type_constraint->name};
977 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
978 }
7cd9f181 979 # Extra custom key for the reading morphology
980 $reading_attributes{'lexemes'} = 'string';
981
ef9d481f 982 my %node_data_keys;
983 my $ndi = 0;
bbd064a9 984 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 985 $node_data_keys{$datum} = 'dn'.$ndi++;
986 my $key = $root->addNewChild( $graphml_ns, 'key' );
987 $key->setAttribute( 'attr.name', $datum );
bbd064a9 988 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 989 $key->setAttribute( 'for', 'node' );
990 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 991 }
992
bbd064a9 993 # Add the data keys for edges, that is, paths and relationships. Path
994 # data does not come from a Moose class so is here manually.
ef9d481f 995 my $edi = 0;
996 my %edge_data_keys;
bbd064a9 997 my %edge_attributes = (
3a2ebbf4 998 witness => 'string', # ID/label for a path
3a2ebbf4 999 extra => 'boolean', # Path key
3a2ebbf4 1000 );
bbd064a9 1001 my @path_attributes = keys %edge_attributes; # track our manual additions
1002 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1003 foreach my $attr( $pmeta->get_all_attributes ) {
1004 next if $attr->name =~ /^_/;
1005 next if $skipsave{$attr->name};
1006 next unless $save_types{$attr->type_constraint->name};
1007 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1008 }
1009 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1010 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1011 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1012 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1013 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1014 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1015 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1016 }
3a2ebbf4 1017
cc31ebaa 1018 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1019 my $xmlidname = $self->tradition->name;
1020 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1021 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1022 $xmlidname = '_'.$xmlidname;
1023 }
2c669bca 1024 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1025 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1026 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1027 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1028 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1029 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1030 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1031 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1032
e309421a 1033 # Collation attribute data
bbd064a9 1034 foreach my $datum ( keys %graph_attributes ) {
1035 my $value;
1036 if( $datum eq 'version' ) {
1037 $value = '3.1';
1038 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1039 $value = $self->tradition->$datum;
1040 } else {
1041 $value = $self->$datum;
1042 }
2c669bca 1043 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1044 }
8e1394aa 1045
1046 my $node_ctr = 0;
1047 my %node_hash;
22222af9 1048 # Add our readings to the graph
3a2ebbf4 1049 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1050 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1051 ( $n->rank < $start->rank || $n->rank > $end->rank );
1052 $use_readings{$n->id} = 1;
2c669bca 1053 # Add to the main graph
1054 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1055 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1056 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1057 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1058 foreach my $d ( keys %reading_attributes ) {
255875b8 1059 my $nval = $n->$d;
7cd9f181 1060 # Custom serialization
1061 if( $d eq 'lexemes' ) {
1062 # If nval is a true value, we have lexemes so we need to
1063 # serialize them. Otherwise set nval to undef so that the
1064 # key is excluded from this reading.
1065 $nval = $nval ? $n->_serialize_lexemes : undef;
1066 }
cc31ebaa 1067 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1068 # Adjust the ranks within the subgraph.
cc31ebaa 1069 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1070 : $nval - $rankoffset;
414cc046 1071 }
255875b8 1072 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1073 if defined $nval;
1074 }
b15511bf 1075 }
1076
2c669bca 1077 # Add the path edges to the sequence graph
df6d9812 1078 my $edge_ctr = 0;
3a2ebbf4 1079 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1080 # We add an edge in the graphml for every witness in $e.
a30ca502 1081 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1082 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1083 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1084 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1085 # Skip any path from start to end; that witness is not in the subgraph.
1086 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1087 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1088 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1089 $node_hash{ $e->[0] },
1090 $node_hash{ $e->[1] } );
2c669bca 1091 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1092 $edge_el->setAttribute( 'source', $from );
1093 $edge_el->setAttribute( 'target', $to );
1094 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1095
1096 # It's a witness path, so add the witness
1097 my $base = $wit;
1098 my $key = $edge_data_keys{'witness'};
1099 # Is this an ante-corr witness?
1100 my $aclabel = $self->ac_label;
1101 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1102 # Keep the base witness
1103 $base = $1;
1104 # ...and record that this is an 'extra' reading path
1105 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1106 }
1107 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1108 }
1109 }
1110
cc31ebaa 1111 # Report the actual number of nodes and edges that went in
1112 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1113 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1114
22222af9 1115 # Add the relationship graph to the XML
bbd064a9 1116 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1117 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1118 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1119
94c00c71 1120 # Save and return the thing
1121 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1122 return $result;
df6d9812 1123}
1124
b15511bf 1125sub _add_graphml_data {
1126 my( $el, $key, $value ) = @_;
b15511bf 1127 return unless defined $value;
c9bf3dbf 1128 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1129 $data_el->setAttribute( 'key', $key );
1130 $data_el->appendText( $value );
8e1394aa 1131}
1132
4e5a7b2c 1133=head2 as_csv
910a0a6d 1134
1135Returns a CSV alignment table representation of the collation graph, one
2c669bca 1136row per witness (or witness uncorrected.)
910a0a6d 1137
1138=cut
1139
1140sub as_csv {
3a2ebbf4 1141 my( $self ) = @_;
1dd07bda 1142 my $table = $self->alignment_table;
82fa4d57 1143 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1144 my @result;
2c669bca 1145 # Make the header row
1146 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1147 push( @result, decode_utf8( $csv->string ) );
1148 # Make the rest of the rows
1149 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1150 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1151 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1152 $csv->combine( @row );
910a0a6d 1153 push( @result, decode_utf8( $csv->string ) );
1154 }
3a2ebbf4 1155 return join( "\n", @result );
910a0a6d 1156}
1157
1dd07bda 1158=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1159
566f4595 1160Return a reference to an alignment table, in a slightly enhanced CollateX
1161format which looks like this:
1162
1163 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1164 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1165 { witness => "SIG2",
4e5a7b2c 1166 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1167 ... ],
1168 length => TEXTLEN };
1169
1170If $use_refs is set to 1, the reading object is returned in the table
1171instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1172
1173If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1174keys have a true hash value will be included.
2c669bca 1175
1176=cut
9f3ba6f7 1177
1dd07bda 1178sub alignment_table {
1179 my( $self ) = @_;
c1915ab9 1180 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1181 return $self->cached_table if $self->has_cached_table;
1182
0ecb975c 1183 # Make sure we can do this
1184 throw( "Need a linear graph in order to make an alignment table" )
1185 unless $self->linear;
1186 $self->calculate_ranks unless $self->end->has_rank;
1187
2c669bca 1188 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1189 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1190 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1191 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1192 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1193 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1194 push( @{$table->{'alignment'}},
1195 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1196 if( $wit->is_layered ) {
1197 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1198 $wit->sigil.$self->ac_label );
1dd07bda 1199 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1200 push( @{$table->{'alignment'}},
1201 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1202 }
1203 }
1dd07bda 1204 $self->cached_table( $table );
1205 return $table;
910a0a6d 1206}
1207
1208sub _make_witness_row {
1dd07bda 1209 my( $path, $positions ) = @_;
910a0a6d 1210 my %char_hash;
1211 map { $char_hash{$_} = undef } @$positions;
2c669bca 1212 my $debug = 0;
910a0a6d 1213 foreach my $rdg ( @$path ) {
eca16057 1214 my $rtext = $rdg->text;
1215 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1216 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1217 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1218 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1219 }
1220 my @row = map { $char_hash{$_} } @$positions;
eca16057 1221 # Fill in lacuna markers for undef spots in the row
1222 my $last_el = shift @row;
1223 my @filled_row = ( $last_el );
1224 foreach my $el ( @row ) {
0e476982 1225 # If we are using node reference, make the lacuna node appear many times
1226 # in the table. If not, use the lacuna tag.
1dd07bda 1227 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1228 $el = $last_el;
eca16057 1229 }
1230 push( @filled_row, $el );
1231 $last_el = $el;
1232 }
1233 return @filled_row;
910a0a6d 1234}
1235
4e5a7b2c 1236=head1 NAVIGATION METHODS
910a0a6d 1237
4e5a7b2c 1238=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1239
1240Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1241with $last, for the witness given in $sigil. If a $backup sigil is
1242specified (e.g. when walking a layered witness), it will be used wherever
1243no $sigil path exists. If there is a base text reading, that will be
1244used wherever no path exists for $sigil or $backup.
e2902068 1245
1246=cut
1247
910a0a6d 1248# TODO Think about returning some lazy-eval iterator.
b0b4421a 1249# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1250
e2902068 1251sub reading_sequence {
861c3e27 1252 my( $self, $start, $end, $witness ) = @_;
e2902068 1253
930ff666 1254 $witness = $self->baselabel unless $witness;
e2902068 1255 my @readings = ( $start );
1256 my %seen;
1257 my $n = $start;
3a2ebbf4 1258 while( $n && $n->id ne $end->id ) {
1259 if( exists( $seen{$n->id} ) ) {
63778331 1260 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1261 }
3a2ebbf4 1262 $seen{$n->id} = 1;
910a0a6d 1263
861c3e27 1264 my $next = $self->next_reading( $n, $witness );
44771cf2 1265 unless( $next ) {
63778331 1266 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1267 }
910a0a6d 1268 push( @readings, $next );
1269 $n = $next;
e2902068 1270 }
1271 # Check that the last reading is our end reading.
1272 my $last = $readings[$#readings];
63778331 1273 throw( "Last reading found from " . $start->text .
1274 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1275 unless $last->id eq $end->id;
e2902068 1276
1277 return @readings;
1278}
1279
4e5a7b2c 1280=head2 next_reading( $reading, $sigil );
8e1394aa 1281
4a8828f0 1282Returns the reading that follows the given reading along the given witness
930ff666 1283path.
8e1394aa 1284
1285=cut
1286
4a8828f0 1287sub next_reading {
e2902068 1288 # Return the successor via the corresponding path.
8e1394aa 1289 my $self = shift;
3a2ebbf4 1290 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1291 return undef unless $answer;
3a2ebbf4 1292 return $self->reading( $answer );
8e1394aa 1293}
1294
4e5a7b2c 1295=head2 prior_reading( $reading, $sigil )
8e1394aa 1296
4a8828f0 1297Returns the reading that precedes the given reading along the given witness
930ff666 1298path.
8e1394aa 1299
1300=cut
1301
4a8828f0 1302sub prior_reading {
e2902068 1303 # Return the predecessor via the corresponding path.
8e1394aa 1304 my $self = shift;
3a2ebbf4 1305 my $answer = $self->_find_linked_reading( 'prior', @_ );
1306 return $self->reading( $answer );
8e1394aa 1307}
1308
4a8828f0 1309sub _find_linked_reading {
861c3e27 1310 my( $self, $direction, $node, $path ) = @_;
1311
1312 # Get a backup if we are dealing with a layered witness
1313 my $alt_path;
1314 my $aclabel = $self->ac_label;
1315 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1316 $alt_path = $1;
1317 }
1318
e2902068 1319 my @linked_paths = $direction eq 'next'
3a2ebbf4 1320 ? $self->sequence->edges_from( $node )
1321 : $self->sequence->edges_to( $node );
e2902068 1322 return undef unless scalar( @linked_paths );
8e1394aa 1323
e2902068 1324 # We have to find the linked path that contains all of the
1325 # witnesses supplied in $path.
1326 my( @path_wits, @alt_path_wits );
4e5a7b2c 1327 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1328 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1329 my $base_le;
1330 my $alt_le;
1331 foreach my $le ( @linked_paths ) {
3a2ebbf4 1332 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1333 $base_le = $le;
910a0a6d 1334 }
508fd430 1335 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1336 if( _is_within( \@path_wits, \@le_wits ) ) {
1337 # This is the right path.
1338 return $direction eq 'next' ? $le->[1] : $le->[0];
1339 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1340 $alt_le = $le;
1341 }
8e1394aa 1342 }
e2902068 1343 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1344 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1345 if $alt_le;
e2902068 1346
1347 # Got this far? Return the base path if it exists.
3a2ebbf4 1348 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1349 if $base_le;
e2902068 1350
1351 # Got this far? We have no appropriate path.
2c669bca 1352 warn "Could not find $direction node from " . $node->id
910a0a6d 1353 . " along path $path";
8e1394aa 1354 return undef;
1355}
1356
4a8828f0 1357# Some set logic.
1358sub _is_within {
1359 my( $set1, $set2 ) = @_;
7854e12e 1360 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1361 foreach my $el ( @$set1 ) {
910a0a6d 1362 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1363 }
1364 return $ret;
1365}
1366
4e5a7b2c 1367# Return the string that joins together a list of witnesses for
1368# display on a single path.
1369sub _witnesses_of_label {
1370 my( $self, $label ) = @_;
1371 my $regex = $self->wit_list_separator;
1372 my @answer = split( /\Q$regex\E/, $label );
1373 return @answer;
b0b4421a 1374}
1375
d4b75f44 1376=head2 common_readings
1377
1378Returns the list of common readings in the graph (i.e. those readings that are
1379shared by all non-lacunose witnesses.)
1380
1381=cut
1382
1383sub common_readings {
1384 my $self = shift;
1385 my @common = grep { $_->is_common } $self->readings;
1386 return @common;
1387}
1388
fae52efd 1389=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1390
1391Returns the text of a witness (plus its backup, if we are using a layer)
1392as stored in the collation. The text is returned as a string, where the
1393individual readings are joined with spaces and the meta-readings (e.g.
1394lacunae) are omitted. Optional specification of $start and $end allows
1395the generation of a subset of the witness text.
4e5a7b2c 1396
b0b4421a 1397=cut
1398
1399sub path_text {
861c3e27 1400 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1401 $start = $self->start unless $start;
1402 $end = $self->end unless $end;
861c3e27 1403 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1404 my $pathtext = '';
1405 my $last;
1406 foreach my $r ( @path ) {
6ad2ce78 1407 unless ( $r->join_prior || !$last || $last->join_next ) {
1408 $pathtext .= ' ';
1409 }
1410 $pathtext .= $r->text;
629e27b0 1411 $last = $r;
1412 }
1413 return $pathtext;
b0b4421a 1414}
4e5a7b2c 1415
1416=head1 INITIALIZATION METHODS
1417
1418These are mostly for use by parsers.
1419
1420=head2 make_witness_path( $witness )
1421
1422Link the array of readings contained in $witness->path (and in
1423$witness->uncorrected_path if it exists) into collation paths.
1424Clear out the arrays when finished.
de51424a 1425
4e5a7b2c 1426=head2 make_witness_paths
1427
1428Call make_witness_path for all witnesses in the tradition.
1429
1430=cut
930ff666 1431
7e450e44 1432# For use when a collation is constructed from a base text and an apparatus.
1433# We have the sequences of readings and just need to add path edges.
1f7aa795 1434# When we are done, clear out the witness path attributes, as they are no
1435# longer needed.
1436# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1437
6a222840 1438sub make_witness_paths {
1439 my( $self ) = @_;
910a0a6d 1440 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1441 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1442 $self->make_witness_path( $wit );
7854e12e 1443 }
7854e12e 1444}
1445
6a222840 1446sub make_witness_path {
7854e12e 1447 my( $self, $wit ) = @_;
1448 my @chain = @{$wit->path};
15d2d3df 1449 my $sig = $wit->sigil;
fae52efd 1450 # Add start and end if necessary
1451 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1452 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1453 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1454 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1455 }
1f7aa795 1456 if( $wit->is_layered ) {
d9e873d0 1457 @chain = @{$wit->uncorrected_path};
fae52efd 1458 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1459 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1460 foreach my $idx( 0 .. $#chain-1 ) {
1461 my $source = $chain[$idx];
1462 my $target = $chain[$idx+1];
1463 $self->add_path( $source, $target, $sig.$self->ac_label )
1464 unless $self->has_path( $source, $target, $sig );
1465 }
15d2d3df 1466 }
1f7aa795 1467 $wit->clear_path;
1468 $wit->clear_uncorrected_path;
e2902068 1469}
1470
4e5a7b2c 1471=head2 calculate_ranks
1472
1473Calculate the reading ranks (that is, their aligned positions relative
1474to each other) for the graph. This can only be called on linear collations.
1475
b365fbae 1476=begin testing
1477
1478use Text::Tradition;
1479
1480my $cxfile = 't/data/Collatex-16.xml';
1481my $t = Text::Tradition->new(
1482 'name' => 'inline',
1483 'input' => 'CollateX',
1484 'file' => $cxfile,
1485 );
1486my $c = $t->collation;
1487
1488# Make an svg
bfcbcecb 1489my $table = $c->alignment_table;
1490ok( $c->has_cached_table, "Alignment table was cached" );
1491is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1492$c->calculate_ranks;
bfcbcecb 1493is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1494$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1495isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1496
1497=end testing
1498
4e5a7b2c 1499=cut
1500
910a0a6d 1501sub calculate_ranks {
1502 my $self = shift;
b365fbae 1503 # Save the existing ranks, in case we need to invalidate the cached SVG.
1504 my %existing_ranks;
ac4d7ac2 1505 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1506
1507 # Do the rankings based on the relationship equivalence graph, starting
1508 # with the start node.
1509 my $topo_start = $self->equivalence( $self->start->id );
c9bf3dbf 1510 my $node_ranks = { $topo_start => 0 };
910a0a6d 1511 my @curr_origin = ( $topo_start );
1512 # A little iterative function.
1513 while( @curr_origin ) {
359944f7 1514 @curr_origin = _assign_rank( $self->equivalence_graph,
1515 $node_ranks, @curr_origin );
910a0a6d 1516 }
1517 # Transfer our rankings from the topological graph to the real one.
1518 foreach my $r ( $self->readings ) {
359944f7 1519 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1520 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1521 } else {
63778331 1522 # Die. Find the last rank we calculated.
359944f7 1523 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1524 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1525 $self->readings;
1526 my $last = pop @all_defined;
1527 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1528 }
de51424a 1529 }
bfcbcecb 1530 # Do we need to invalidate the cached data?
1531 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1532 foreach my $r ( $self->readings ) {
7c293912 1533 next if defined( $existing_ranks{$r} )
1534 && $existing_ranks{$r} == $r->rank;
c1915ab9 1535 # Something has changed, so clear the cache
bfcbcecb 1536 $self->_clear_cache;
c1915ab9 1537 # ...and recalculate the common readings.
1538 $self->calculate_common_readings();
b365fbae 1539 last;
1540 }
1541 }
c1915ab9 1542 # The graph calculation information is now up to date.
1543 $self->_graphcalc_done(1);
8e1394aa 1544}
3a1f2523 1545
910a0a6d 1546sub _assign_rank {
c9bf3dbf 1547 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1548 # Look at each of the children of @current_nodes. If all the child's
1549 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1550 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1551 # parent gets a rank.
910a0a6d 1552 my @next_nodes;
1553 foreach my $c ( @current_nodes ) {
c9bf3dbf 1554 warn "Current reading $c has no rank!"
1555 unless exists $node_ranks->{$c};
1556 # print STDERR "Looking at child of node $c, rank "
1557 # . $node_ranks->{$c} . "\n";
1558 foreach my $child ( $graph->successors( $c ) ) {
1559 next if exists $node_ranks->{$child};
910a0a6d 1560 my $highest_rank = -1;
1561 my $skip = 0;
c9bf3dbf 1562 foreach my $parent ( $graph->predecessors( $child ) ) {
1563 if( exists $node_ranks->{$parent} ) {
1564 $highest_rank = $node_ranks->{$parent}
1565 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1566 } else {
1567 $skip = 1;
1568 last;
1569 }
1570 }
1571 next if $skip;
c9bf3dbf 1572 my $c_rank = $highest_rank + 1;
1573 # print STDERR "Assigning rank $c_rank to node $child \n";
1574 $node_ranks->{$child} = $c_rank;
910a0a6d 1575 push( @next_nodes, $child );
1576 }
1577 }
1578 return @next_nodes;
4cdd82f1 1579}
910a0a6d 1580
c1915ab9 1581sub _clear_cache {
1582 my $self = shift;
1583 $self->wipe_svg if $self->has_cached_svg;
1584 $self->wipe_table if $self->has_cached_table;
1585}
1586
1587
4e5a7b2c 1588=head2 flatten_ranks
1589
1590A convenience method for parsing collation data. Searches the graph for readings
1591with the same text at the same rank, and merges any that are found.
1592
1593=cut
1594
0e476982 1595sub flatten_ranks {
1596 my $self = shift;
1597 my %unique_rank_rdg;
bf6e338d 1598 my $changed;
0e476982 1599 foreach my $rdg ( $self->readings ) {
1600 next unless $rdg->has_rank;
1601 my $key = $rdg->rank . "||" . $rdg->text;
1602 if( exists $unique_rank_rdg{$key} ) {
1603 # Combine!
56eefa04 1604 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1605 $changed = 1;
0e476982 1606 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1607 # TODO see if this now makes a common point.
0e476982 1608 } else {
1609 $unique_rank_rdg{$key} = $rdg;
1610 }
1611 }
bf6e338d 1612 # If we merged readings, the ranks are still fine but the alignment
1613 # table is wrong. Wipe it.
1614 $self->wipe_table() if $changed;
0e476982 1615}
4633f9e4 1616
1617
d4b75f44 1618=head2 calculate_common_readings
1619
1620Goes through the graph identifying the readings that appear in every witness
1621(apart from those with lacunae at that spot.) Marks them as common and returns
1622the list.
1623
1624=begin testing
1625
1626use Text::Tradition;
1627
1628my $cxfile = 't/data/Collatex-16.xml';
1629my $t = Text::Tradition->new(
1630 'name' => 'inline',
1631 'input' => 'CollateX',
1632 'file' => $cxfile,
1633 );
1634my $c = $t->collation;
1635
1636my @common = $c->calculate_common_readings();
1637is( scalar @common, 8, "Found correct number of common readings" );
1638my @marked = sort $c->common_readings();
1639is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1640my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1641is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1642
1643=end testing
1644
1645=cut
1646
1647sub calculate_common_readings {
1648 my $self = shift;
1649 my @common;
c1915ab9 1650 map { $_->is_common( 0 ) } $self->readings;
1651 # Implicitly calls calculate_ranks
1dd07bda 1652 my $table = $self->alignment_table;
d4b75f44 1653 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1654 my @row = map { $_->{'tokens'}->[$idx]
1655 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1656 @{$table->{'alignment'}};
d4b75f44 1657 my %hash;
1658 foreach my $r ( @row ) {
1659 if( $r ) {
1660 $hash{$r->id} = $r unless $r->is_meta;
1661 } else {
1662 $hash{'UNDEF'} = $r;
1663 }
1664 }
1665 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1666 my( $r ) = values %hash;
1667 $r->is_common( 1 );
1668 push( @common, $r );
1669 }
1670 }
1671 return @common;
1672}
1673
861c3e27 1674=head2 text_from_paths
1675
1676Calculate the text array for all witnesses from the path, for later consistency
1677checking. Only to be used if there is no non-graph-based way to know the
1678original texts.
1679
1680=cut
1681
1682sub text_from_paths {
1683 my $self = shift;
1684 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1685 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1686 my @text;
1687 foreach my $r ( @readings ) {
1688 next if $r->is_meta;
1689 push( @text, $r->text );
1690 }
861c3e27 1691 $wit->text( \@text );
1692 if( $wit->is_layered ) {
5164a6f0 1693 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1694 $wit->sigil.$self->ac_label );
1695 my @uctext;
1696 foreach my $r ( @ucrdgs ) {
1697 next if $r->is_meta;
1698 push( @uctext, $r->text );
1699 }
1700 $wit->layertext( \@uctext );
861c3e27 1701 }
1702 }
1703}
0e476982 1704
4e5a7b2c 1705=head1 UTILITY FUNCTIONS
1706
1707=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1708
4e5a7b2c 1709Find the last reading that occurs in sequence before both the given readings.
414cc046 1710At the very least this should be $self->start.
4e5a7b2c 1711
1712=head2 common_successor( $reading_a, $reading_b )
1713
1714Find the first reading that occurs in sequence after both the given readings.
414cc046 1715At the very least this should be $self->end.
4e5a7b2c 1716
22222af9 1717=begin testing
1718
1719use Text::Tradition;
1720
1721my $cxfile = 't/data/Collatex-16.xml';
1722my $t = Text::Tradition->new(
1723 'name' => 'inline',
1724 'input' => 'CollateX',
1725 'file' => $cxfile,
1726 );
1727my $c = $t->collation;
1728
679f17e1 1729is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1730 'n20', "Found correct common predecessor" );
679f17e1 1731is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1732 '__END__', "Found correct common successor" );
22222af9 1733
4e5a7b2c 1734is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1735 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1736is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1737 '__END__', "Found correct common successor for readings on same path" );
22222af9 1738
1739=end testing
1740
1741=cut
1742
1743## Return the closest reading that is a predecessor of both the given readings.
1744sub common_predecessor {
1745 my $self = shift;
4e5a7b2c 1746 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1747 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1748}
1749
1750sub common_successor {
1751 my $self = shift;
4e5a7b2c 1752 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1753 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1754}
1755
414cc046 1756
1757# TODO think about how to do this without ranks...
027d819c 1758sub _common_in_path {
22222af9 1759 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1760 my $iter = $self->end->rank;
22222af9 1761 my @candidates;
414cc046 1762 my @last_r1 = ( $r1 );
1763 my @last_r2 = ( $r2 );
1764 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1765 my %all_seen;
414cc046 1766 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1767 while( !@candidates ) {
414cc046 1768 last unless $iter--; # Avoid looping infinitely
1769 # Iterate separately down the graph from r1 and r2
1770 my( @new_lc1, @new_lc2 );
1771 foreach my $lc ( @last_r1 ) {
1772 foreach my $p ( $lc->$dir ) {
1773 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1774 # print STDERR "Path candidate $p from $lc\n";
1775 push( @candidates, $p );
002e3600 1776 } elsif( !$all_seen{$p->id} ) {
414cc046 1777 $all_seen{$p->id} = 'r1';
1778 push( @new_lc1, $p );
1779 }
1780 }
1781 }
1782 foreach my $lc ( @last_r2 ) {
22222af9 1783 foreach my $p ( $lc->$dir ) {
414cc046 1784 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1785 # print STDERR "Path candidate $p from $lc\n";
22222af9 1786 push( @candidates, $p );
002e3600 1787 } elsif( !$all_seen{$p->id} ) {
414cc046 1788 $all_seen{$p->id} = 'r2';
1789 push( @new_lc2, $p );
22222af9 1790 }
1791 }
1792 }
414cc046 1793 @last_r1 = @new_lc1;
1794 @last_r2 = @new_lc2;
22222af9 1795 }
1796 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1797 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1798}
1799
63778331 1800sub throw {
1801 Text::Tradition::Error->throw(
1802 'ident' => 'Collation error',
1803 'message' => $_[0],
1804 );
1805}
1806
dd3b58b0 1807no Moose;
1808__PACKAGE__->meta->make_immutable;
e867486f 1809
027d819c 1810=head1 LICENSE
e867486f 1811
027d819c 1812This package is free software and is provided "as is" without express
1813or implied warranty. You can redistribute it and/or modify it under
1814the same terms as Perl itself.
e867486f 1815
027d819c 1816=head1 AUTHOR
e867486f 1817
027d819c 1818Tara L Andrews E<lt>aurum@cpan.orgE<gt>