naive serialization of lexems in GraphML
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
22222af9 302 $self->relations->add_reading( $reading->id );
3a2ebbf4 303 return $reading;
eca16057 304};
305
3a2ebbf4 306around del_reading => sub {
307 my $orig = shift;
308 my $self = shift;
309 my $arg = shift;
310
311 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
312 $arg = $arg->id;
313 }
3a2ebbf4 314 # Remove the reading from the graphs.
c1915ab9 315 $self->_graphcalc_done(0);
4e483aa5 316 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 317 $self->sequence->delete_vertex( $arg );
22222af9 318 $self->relations->delete_reading( $arg );
3a2ebbf4 319
320 # Carry on.
321 $self->$orig( $arg );
322};
7854e12e 323
4e483aa5 324=begin testing
325
326use Text::Tradition;
327
328my $cxfile = 't/data/Collatex-16.xml';
329my $t = Text::Tradition->new(
330 'name' => 'inline',
331 'input' => 'CollateX',
332 'file' => $cxfile,
333 );
334my $c = $t->collation;
335
336my $rno = scalar $c->readings;
337# Split n21 for testing purposes
338my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
339my $old_r = $c->reading( 'n21' );
340$old_r->alter_text( 'to' );
341$c->del_path( 'n20', 'n21', 'A' );
342$c->add_path( 'n20', 'n21p0', 'A' );
343$c->add_path( 'n21p0', 'n21', 'A' );
344$c->flatten_ranks();
345ok( $c->reading( 'n21p0' ), "New reading exists" );
346is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
347
679f17e1 348# Combine n3 and n4 ( with his )
4e483aa5 349$c->merge_readings( 'n3', 'n4', 1 );
350ok( !$c->reading('n4'), "Reading n4 is gone" );
351is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
352
679f17e1 353# Collapse n9 and n10 ( rood / root )
354$c->merge_readings( 'n9', 'n10' );
355ok( !$c->reading('n10'), "Reading n10 is gone" );
356is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 357
358# Combine n21 and n21p0
359my $remaining = $c->reading('n21');
360$remaining ||= $c->reading('n22'); # one of these should still exist
361$c->merge_readings( 'n21p0', $remaining, 1 );
362ok( !$c->reading('n21'), "Reading $remaining is gone" );
363is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
364
365=end testing
366
367=cut
7854e12e 368
3a2ebbf4 369sub merge_readings {
370 my $self = shift;
371
372 # We only need the IDs for adding paths to the graph, not the reading
373 # objects themselves.
4e483aa5 374 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 375 $self->_graphcalc_done(0);
3a2ebbf4 376
377 # The kept reading should inherit the paths and the relationships
378 # of the deleted reading.
379 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
380 my @vector = ( $kept );
381 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
382 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 383 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 384 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
385 $self->sequence->add_edge( @vector );
386 my $fwits = $self->sequence->get_edge_attributes( @vector );
387 @wits{keys %$fwits} = values %$fwits;
388 $self->sequence->set_edge_attributes( @vector, \%wits );
389 }
22222af9 390 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 391
392 # Do the deletion deed.
4e483aa5 393 if( $combine ) {
49d4f2ac 394 my $kept_obj = $self->reading( $kept );
4e483aa5 395 my $del_obj = $self->reading( $deleted );
396 my $joinstr = $combine_char;
397 unless( defined $joinstr ) {
398 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
399 $joinstr = $self->wordsep unless defined $joinstr;
400 }
401 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 402 }
3a2ebbf4 403 $self->del_reading( $deleted );
404}
7854e12e 405
3265b0ce 406
3a2ebbf4 407# Helper function for manipulating the graph.
408sub _stringify_args {
4e483aa5 409 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 410 $first = $first->id
411 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
412 $second = $second->id
413 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 414 return( $first, $second, @args );
3a2ebbf4 415}
df6d9812 416
4e5a7b2c 417# Helper function for manipulating the graph.
418sub _objectify_args {
419 my( $self, $first, $second, $arg ) = @_;
420 $first = $self->reading( $first )
421 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
422 $second = $self->reading( $second )
423 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
424 return( $first, $second, $arg );
425}
3a2ebbf4 426### Path logic
427
428sub add_path {
429 my $self = shift;
430
431 # We only need the IDs for adding paths to the graph, not the reading
432 # objects themselves.
433 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
434
c1915ab9 435 $self->_graphcalc_done(0);
3a2ebbf4 436 # Connect the readings
359944f7 437 unless( $self->sequence->has_edge( $source, $target ) ) {
438 $self->sequence->add_edge( $source, $target );
439 $self->relations->add_equivalence_edge( $source, $target );
440 }
3a2ebbf4 441 # Note the witness in question
442 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 443}
b15511bf 444
3a2ebbf4 445sub del_path {
446 my $self = shift;
49d4f2ac 447 my @args;
448 if( ref( $_[0] ) eq 'ARRAY' ) {
449 my $e = shift @_;
450 @args = ( @$e, @_ );
451 } else {
452 @args = @_;
453 }
3a2ebbf4 454
455 # We only need the IDs for adding paths to the graph, not the reading
456 # objects themselves.
49d4f2ac 457 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 458
c1915ab9 459 $self->_graphcalc_done(0);
3a2ebbf4 460 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 461 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 462 }
463 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
464 $self->sequence->delete_edge( $source, $target );
359944f7 465 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 466 }
784877d9 467}
468
3a2ebbf4 469
15d2d3df 470# Extra graph-alike utility
471sub has_path {
3a2ebbf4 472 my $self = shift;
473 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
474 return undef unless $self->sequence->has_edge( $source, $target );
475 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 476}
477
4e5a7b2c 478=head2 clear_witness( @sigil_list )
3a2ebbf4 479
4e5a7b2c 480Clear the given witnesses out of the collation entirely, removing references
481to them in paths, and removing readings that belong only to them. Should only
482be called via $tradition->del_witness.
3a2ebbf4 483
484=cut
485
4e5a7b2c 486sub clear_witness {
487 my( $self, @sigils ) = @_;
488
c1915ab9 489 $self->_graphcalc_done(0);
4e5a7b2c 490 # Clear the witness(es) out of the paths
491 foreach my $e ( $self->paths ) {
492 foreach my $sig ( @sigils ) {
493 $self->del_path( $e, $sig );
494 }
495 }
496
497 # Clear out the newly unused readings
498 foreach my $r ( $self->readings ) {
499 unless( $self->reading_witnesses( $r ) ) {
500 $self->del_reading( $r );
501 }
502 }
503}
3a2ebbf4 504
505sub add_relationship {
506 my $self = shift;
22222af9 507 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 508 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 509 $self->_graphcalc_done(0);
63778331 510 return @vectors;
22222af9 511}
ef9d481f 512
ca6e6095 513around qw/ get_relationship del_relationship / => sub {
514 my $orig = shift;
515 my $self = shift;
516 my @args = @_;
517 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
518 @args = @{$_[0]};
519 }
520 my( $source, $target ) = $self->_stringify_args( @args );
521 $self->$orig( $source, $target );
522};
523
22222af9 524=head2 reading_witnesses( $reading )
910a0a6d 525
22222af9 526Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 527
22222af9 528=cut
1d310495 529
1d310495 530sub reading_witnesses {
531 my( $self, $reading ) = @_;
532 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 533 # arbitrarily chosen "incoming". Thus, special-case the start node.
534 if( $reading eq $self->start ) {
535 return map { $_->sigil } $self->tradition->witnesses;
536 }
1d310495 537 my %all_witnesses;
538 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
539 my $wits = $self->sequence->get_edge_attributes( @$e );
540 @all_witnesses{ keys %$wits } = 1;
541 }
c12bb878 542 my $acstr = $self->ac_label;
543 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
544 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
545 }
1d310495 546 return keys %all_witnesses;
910a0a6d 547}
548
4e5a7b2c 549=head1 OUTPUT METHODS
8e1394aa 550
0ecb975c 551=head2 as_svg( \%options )
8e1394aa 552
0068967c 553Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 554See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 555
556=cut
557
558sub as_svg {
0ecb975c 559 my( $self, $opts ) = @_;
bfcbcecb 560 throw( "Need GraphViz installed to output SVG" )
561 unless File::Which::which( 'dot' );
e247aad1 562 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 563 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 564 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 565 my @cmd = qw/dot -Tsvg/;
566 my( $svg, $err );
567 my $dotfile = File::Temp->new();
568 ## USE FOR DEBUGGING
569 # $dotfile->unlink_on_destroy(0);
570 binmode $dotfile, ':utf8';
571 print $dotfile $self->as_dot( $opts );
572 push( @cmd, $dotfile->filename );
573 run( \@cmd, ">", binary(), \$svg );
e247aad1 574 $svg = decode_utf8( $svg );
575 $self->cached_svg( $svg ) unless $want_subgraph;
576 return $svg;
577 } else {
578 return $self->cached_svg;
b365fbae 579 }
8e1394aa 580}
581
b22576c6 582
0ecb975c 583=head2 as_dot( \%options )
b22576c6 584
0ecb975c 585Returns a string that is the collation graph expressed in dot
586(i.e. GraphViz) format. Options include:
b22576c6 587
0ecb975c 588=over 4
b22576c6 589
0ecb975c 590=item * from
b22576c6 591
0ecb975c 592=item * to
df6d9812 593
0ecb975c 594=item * color_common
595
596=back
df6d9812 597
598=cut
599
600sub as_dot {
0ecb975c 601 my( $self, $opts ) = @_;
602 my $startrank = $opts->{'from'} if $opts;
603 my $endrank = $opts->{'to'} if $opts;
604 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 605 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
606 && $self->end->rank > 100;
6648ee3d 607 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 608
b22576c6 609 # Check the arguments
610 if( $startrank ) {
611 return if $endrank && $startrank > $endrank;
612 return if $startrank > $self->end->rank;
613 }
614 if( defined $endrank ) {
615 return if $endrank < 0;
f1b3b33a 616 $endrank = undef if $endrank == $self->end->rank;
b22576c6 617 }
618
67da8d6c 619 my $graph_name = $self->tradition->name;
620 $graph_name =~ s/[^\w\s]//g;
621 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 622
623 my %graph_attrs = (
624 'rankdir' => 'LR',
625 'bgcolor' => 'none',
626 );
627 my %node_attrs = (
b8990398 628 'fontsize' => 14,
f13b5582 629 'fillcolor' => 'white',
630 'style' => 'filled',
631 'shape' => 'ellipse'
632 );
633 my %edge_attrs = (
634 'arrowhead' => 'open',
635 'color' => '#000000',
636 'fontcolor' => '#000000',
637 );
638
67da8d6c 639 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 640 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
641 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 642
b22576c6 643 # Output substitute start/end readings if necessary
644 if( $startrank ) {
645 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
646 }
647 if( $endrank ) {
648 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
649 }
b365fbae 650 if( $STRAIGHTENHACK ) {
651 ## HACK part 1
6648ee3d 652 my $startlabel = $startrank ? 'SUBSTART' : 'START';
653 $dot .= "\tsubgraph { rank=same \"#$startlabel#\" \"#SILENT#\" }\n";
b365fbae 654 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
655 }
b22576c6 656 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 657 # Sort the readings by rank if we have ranks; this speeds layout.
658 my @all_readings = $self->end->has_rank
659 ? sort { $a->rank <=> $b->rank } $self->readings
660 : $self->readings;
4633f9e4 661 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 662 foreach my $reading ( @all_readings ) {
b22576c6 663 # Only output readings within our rank range.
664 next if $startrank && $reading->rank < $startrank;
665 next if $endrank && $reading->rank > $endrank;
666 $used{$reading->id} = 1;
910a0a6d 667 # Need not output nodes without separate labels
3a2ebbf4 668 next if $reading->id eq $reading->text;
d4b75f44 669 my $rattrs;
30f0df34 670 my $label = $reading->text;
629e27b0 671 $label .= '-' if $reading->join_next;
672 $label = "-$label" if $reading->join_prior;
8f9cab7b 673 $label =~ s/\"/\\\"/g;
d4b75f44 674 $rattrs->{'label'} = $label;
0ecb975c 675 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 676 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 677 }
3a2ebbf4 678
30ddc24c 679 # Add the real edges. Need to weight one edge per rank jump, in a
680 # continuous line.
b365fbae 681 # my $weighted = $self->_add_edge_weights;
b22576c6 682 my @edges = $self->paths;
3bdec618 683 my( %substart, %subend );
b22576c6 684 foreach my $edge ( @edges ) {
685 # Do we need to output this edge?
508fd430 686 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 687 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 688 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 689
b22576c6 690 # Account for the rank gap if necessary
30ddc24c 691 my $rank0 = $self->reading( $edge->[0] )->rank
692 if $self->reading( $edge->[0] )->has_rank;
693 my $rank1 = $self->reading( $edge->[1] )->rank
694 if $self->reading( $edge->[1] )->has_rank;
695 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
696 $variables->{'minlen'} = $rank1 - $rank0;
697 }
698
699 # Add the calculated edge weights
b365fbae 700 # if( exists $weighted->{$edge->[0]}
e247aad1 701 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
702 # # $variables->{'color'} = 'red';
703 # $variables->{'weight'} = 3.0;
704 # }
30ddc24c 705
508fd430 706 # EXPERIMENTAL: make edge width reflect no. of witnesses
707 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
708 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
709
f13b5582 710 my $varopts = _dot_attr_string( $variables );
711 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
712 $edge->[0], $edge->[1], $varopts );
3bdec618 713 } elsif( $used{$edge->[0]} ) {
714 $subend{$edge->[0]} = 1;
715 } elsif( $used{$edge->[1]} ) {
716 $substart{$edge->[1]} = 1;
b22576c6 717 }
df6d9812 718 }
3bdec618 719 # Add substitute start and end edges if necessary
720 foreach my $node ( keys %substart ) {
027d819c 721 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 722 my $variables = { %edge_attrs, 'label' => $witstr };
723 my $varopts = _dot_attr_string( $variables );
724 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 725 }
726 foreach my $node ( keys %subend ) {
027d819c 727 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 728 my $variables = { %edge_attrs, 'label' => $witstr };
729 my $varopts = _dot_attr_string( $variables );
730 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 731 }
b365fbae 732 # HACK part 2
733 if( $STRAIGHTENHACK ) {
6648ee3d 734 my $endlabel = $endrank ? 'SUBEND' : 'END';
735 $dot .= "\t\"#$endlabel#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 736 }
30ddc24c 737
df6d9812 738 $dot .= "}\n";
739 return $dot;
740}
741
f13b5582 742sub _dot_attr_string {
743 my( $hash ) = @_;
744 my @attrs;
745 foreach my $k ( sort keys %$hash ) {
746 my $v = $hash->{$k};
747 push( @attrs, $k.'="'.$v.'"' );
748 }
749 return( '[ ' . join( ', ', @attrs ) . ' ]' );
750}
751
30ddc24c 752sub _add_edge_weights {
753 my $self = shift;
754 # Walk the graph from START to END, choosing the successor node with
755 # the largest number of witness paths each time.
756 my $weighted = {};
757 my $curr = $self->start->id;
008fc8a6 758 my $ranked = $self->end->has_rank;
30ddc24c 759 while( $curr ne $self->end->id ) {
008fc8a6 760 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 761 my @succ = sort { $self->path_witnesses( $curr, $a )
762 <=> $self->path_witnesses( $curr, $b ) }
763 $self->sequence->successors( $curr );
764 my $next = pop @succ;
008fc8a6 765 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 766 # Try to avoid lacunae in the weighted path.
008fc8a6 767 while( @succ &&
768 ( $self->reading( $next )->is_lacuna ||
769 $nextrank - $rank > 1 ) ){
30ddc24c 770 $next = pop @succ;
771 }
772 $weighted->{$curr} = $next;
773 $curr = $next;
774 }
775 return $weighted;
776}
777
027d819c 778=head2 path_witnesses( $edge )
779
780Returns the list of sigils whose witnesses are associated with the given edge.
781The edge can be passed as either an array or an arrayref of ( $source, $target ).
782
783=cut
784
3a2ebbf4 785sub path_witnesses {
786 my( $self, @edge ) = @_;
787 # If edge is an arrayref, cope.
788 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
789 my $e = shift @edge;
790 @edge = @$e;
791 }
792 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 793 return @wits;
3a2ebbf4 794}
795
7f9f05e8 796# Helper function. Make a display label for the given witnesses, showing a.c.
797# witnesses only where the main witness is not also in the list.
027d819c 798sub _path_display_label {
508fd430 799 my $self = shift;
7f9f05e8 800 my %wits;
801 map { $wits{$_} = 1 } @_;
802
803 # If an a.c. wit is listed, remove it if the main wit is also listed.
804 # Otherwise keep it for explicit listing.
805 my $aclabel = $self->ac_label;
806 my @disp_ac;
807 foreach my $w ( sort keys %wits ) {
808 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
809 if( exists $wits{$1} ) {
810 delete $wits{$w};
811 } else {
812 push( @disp_ac, $w );
813 }
814 }
815 }
816
817 # See if we are in a majority situation.
8f9cab7b 818 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 819 if( scalar keys %wits > $maj ) {
820 unshift( @disp_ac, 'majority' );
821 return join( ', ', @disp_ac );
8f9cab7b 822 } else {
7f9f05e8 823 return join( ', ', sort keys %wits );
8f9cab7b 824 }
825}
1dd07bda 826
bf6e338d 827=head2 readings_at_rank( $rank )
1dd07bda 828
bf6e338d 829Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 830
831=cut
832
bf6e338d 833sub readings_at_rank {
1dd07bda 834 my( $self, $rank ) = @_;
bf6e338d 835 my $table = $self->alignment_table;
836 # Table rank is real rank - 1.
837 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
838 my %readings;
839 foreach my $e ( @elements ) {
840 next unless ref( $e ) eq 'HASH';
841 next unless exists $e->{'t'};
842 $readings{$e->{'t'}->id} = $e->{'t'};
843 }
844 return values %readings;
1dd07bda 845}
8f9cab7b 846
4e5a7b2c 847=head2 as_graphml
8e1394aa 848
4e5a7b2c 849Returns a GraphML representation of the collation. The GraphML will contain
850two graphs. The first expresses the attributes of the readings and the witness
851paths that link them; the second expresses the relationships that link the
852readings. This is the native transfer format for a tradition.
8e1394aa 853
56eefa04 854=begin testing
855
856use Text::Tradition;
857
858my $READINGS = 311;
859my $PATHS = 361;
860
861my $datafile = 't/data/florilegium_tei_ps.xml';
862my $tradition = Text::Tradition->new( 'input' => 'TEI',
863 'name' => 'test0',
864 'file' => $datafile,
865 'linear' => 1 );
866
867ok( $tradition, "Got a tradition object" );
868is( scalar $tradition->witnesses, 13, "Found all witnesses" );
869ok( $tradition->collation, "Tradition has a collation" );
870
871my $c = $tradition->collation;
872is( scalar $c->readings, $READINGS, "Collation has all readings" );
873is( scalar $c->paths, $PATHS, "Collation has all paths" );
874is( scalar $c->relationships, 0, "Collation has all relationships" );
875
876# Add a few relationships
877$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
878$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
879$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
880
881# Now write it to GraphML and parse it again.
882
883my $graphml = $c->as_graphml;
884my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
885is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
886is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
887is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
888
889=end testing
890
8e1394aa 891=cut
892
893sub as_graphml {
a30ca502 894 my( $self, $options ) = @_;
3d14b48e 895 $self->calculate_ranks unless $self->_graphcalc_done;
896
a30ca502 897 my $start = $options->{'from'}
898 ? $self->reading( $options->{'from'} ) : $self->start;
899 my $end = $options->{'to'}
900 ? $self->reading( $options->{'to'} ) : $self->end;
901 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
902 throw( 'Start node must be before end node' );
903 }
904 # The readings need to be ranked for this to work.
905 $start = $self->start unless $start->has_rank;
906 $end = $self->end unless $end->has_rank;
414cc046 907 my $rankoffset = 0;
908 unless( $start eq $self->start ) {
909 $rankoffset = $start->rank - 1;
910 }
a30ca502 911 my %use_readings;
912
8e1394aa 913 # Some namespaces
914 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
915 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
916 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 917 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 918
919 # Create the document and root node
920 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
921 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
922 $graphml->setDocumentElement( $root );
923 $root->setNamespace( $xsi_ns, 'xsi', 0 );
924 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 925
926 # List of attribute types to save on our objects and their corresponding
927 # GraphML types
928 my %save_types = (
929 'Str' => 'string',
930 'Int' => 'int',
931 'Bool' => 'boolean',
932 'RelationshipType' => 'string',
933 'RelationshipScope' => 'string',
934 );
935
936 # List of attribute names *not* to save on our objects.
937 # We will also not save any attribute beginning with _.
938 my %skipsave;
939 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 940
bbd064a9 941 # Add the data keys for the graph. Include an extra key 'version' for the
942 # GraphML output version.
e309421a 943 my %graph_data_keys;
944 my $gdi = 0;
bbd064a9 945 my %graph_attributes = ( 'version' => 'string' );
946 # Graph attributes include those of Tradition and those of Collation.
947 my %gattr_from;
948 my $tmeta = $self->tradition->meta;
949 my $cmeta = $self->meta;
950 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
951 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
952 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
953 next if $attr->name =~ /^_/;
954 next if $skipsave{$attr->name};
955 next unless $save_types{$attr->type_constraint->name};
956 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
957 }
958
959 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 960 $graph_data_keys{$datum} = 'dg'.$gdi++;
961 my $key = $root->addNewChild( $graphml_ns, 'key' );
962 $key->setAttribute( 'attr.name', $datum );
bbd064a9 963 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 964 $key->setAttribute( 'for', 'graph' );
965 $key->setAttribute( 'id', $graph_data_keys{$datum} );
966 }
f6066bac 967
bbd064a9 968 # Add the data keys for reading nodes
969 my %reading_attributes;
970 my $rmeta = Text::Tradition::Collation::Reading->meta;
971 foreach my $attr( $rmeta->get_all_attributes ) {
972 next if $attr->name =~ /^_/;
973 next if $skipsave{$attr->name};
974 next unless $save_types{$attr->type_constraint->name};
975 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
976 }
7cd9f181 977 # Extra custom key for the reading morphology
978 $reading_attributes{'lexemes'} = 'string';
979
ef9d481f 980 my %node_data_keys;
981 my $ndi = 0;
bbd064a9 982 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 983 $node_data_keys{$datum} = 'dn'.$ndi++;
984 my $key = $root->addNewChild( $graphml_ns, 'key' );
985 $key->setAttribute( 'attr.name', $datum );
bbd064a9 986 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 987 $key->setAttribute( 'for', 'node' );
988 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 989 }
990
bbd064a9 991 # Add the data keys for edges, that is, paths and relationships. Path
992 # data does not come from a Moose class so is here manually.
ef9d481f 993 my $edi = 0;
994 my %edge_data_keys;
bbd064a9 995 my %edge_attributes = (
3a2ebbf4 996 witness => 'string', # ID/label for a path
3a2ebbf4 997 extra => 'boolean', # Path key
3a2ebbf4 998 );
bbd064a9 999 my @path_attributes = keys %edge_attributes; # track our manual additions
1000 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1001 foreach my $attr( $pmeta->get_all_attributes ) {
1002 next if $attr->name =~ /^_/;
1003 next if $skipsave{$attr->name};
1004 next unless $save_types{$attr->type_constraint->name};
1005 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1006 }
1007 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1008 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1009 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1010 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1011 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1012 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1013 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1014 }
3a2ebbf4 1015
cc31ebaa 1016 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1017 my $xmlidname = $self->tradition->name;
1018 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1019 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1020 $xmlidname = '_'.$xmlidname;
1021 }
2c669bca 1022 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1023 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1024 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1025 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1026 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1027 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1028 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1029 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1030
e309421a 1031 # Collation attribute data
bbd064a9 1032 foreach my $datum ( keys %graph_attributes ) {
1033 my $value;
1034 if( $datum eq 'version' ) {
1035 $value = '3.1';
1036 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1037 $value = $self->tradition->$datum;
1038 } else {
1039 $value = $self->$datum;
1040 }
2c669bca 1041 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1042 }
8e1394aa 1043
1044 my $node_ctr = 0;
1045 my %node_hash;
22222af9 1046 # Add our readings to the graph
3a2ebbf4 1047 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1048 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1049 ( $n->rank < $start->rank || $n->rank > $end->rank );
1050 $use_readings{$n->id} = 1;
2c669bca 1051 # Add to the main graph
1052 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1053 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1054 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1055 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1056 foreach my $d ( keys %reading_attributes ) {
255875b8 1057 my $nval = $n->$d;
7cd9f181 1058 # Custom serialization
1059 if( $d eq 'lexemes' ) {
1060 # If nval is a true value, we have lexemes so we need to
1061 # serialize them. Otherwise set nval to undef so that the
1062 # key is excluded from this reading.
1063 $nval = $nval ? $n->_serialize_lexemes : undef;
1064 }
cc31ebaa 1065 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1066 # Adjust the ranks within the subgraph.
cc31ebaa 1067 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1068 : $nval - $rankoffset;
414cc046 1069 }
255875b8 1070 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1071 if defined $nval;
1072 }
b15511bf 1073 }
1074
2c669bca 1075 # Add the path edges to the sequence graph
df6d9812 1076 my $edge_ctr = 0;
3a2ebbf4 1077 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1078 # We add an edge in the graphml for every witness in $e.
a30ca502 1079 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1080 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1081 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1082 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1083 # Skip any path from start to end; that witness is not in the subgraph.
1084 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1085 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1086 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1087 $node_hash{ $e->[0] },
1088 $node_hash{ $e->[1] } );
2c669bca 1089 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1090 $edge_el->setAttribute( 'source', $from );
1091 $edge_el->setAttribute( 'target', $to );
1092 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1093
1094 # It's a witness path, so add the witness
1095 my $base = $wit;
1096 my $key = $edge_data_keys{'witness'};
1097 # Is this an ante-corr witness?
1098 my $aclabel = $self->ac_label;
1099 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1100 # Keep the base witness
1101 $base = $1;
1102 # ...and record that this is an 'extra' reading path
1103 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1104 }
1105 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1106 }
1107 }
1108
cc31ebaa 1109 # Report the actual number of nodes and edges that went in
1110 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1111 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1112
22222af9 1113 # Add the relationship graph to the XML
bbd064a9 1114 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1115 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1116 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1117
94c00c71 1118 # Save and return the thing
1119 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1120 return $result;
df6d9812 1121}
1122
b15511bf 1123sub _add_graphml_data {
1124 my( $el, $key, $value ) = @_;
b15511bf 1125 return unless defined $value;
c9bf3dbf 1126 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1127 $data_el->setAttribute( 'key', $key );
1128 $data_el->appendText( $value );
8e1394aa 1129}
1130
4e5a7b2c 1131=head2 as_csv
910a0a6d 1132
1133Returns a CSV alignment table representation of the collation graph, one
2c669bca 1134row per witness (or witness uncorrected.)
910a0a6d 1135
1136=cut
1137
1138sub as_csv {
3a2ebbf4 1139 my( $self ) = @_;
1dd07bda 1140 my $table = $self->alignment_table;
82fa4d57 1141 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1142 my @result;
2c669bca 1143 # Make the header row
1144 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1145 push( @result, decode_utf8( $csv->string ) );
1146 # Make the rest of the rows
1147 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1148 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1149 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1150 $csv->combine( @row );
910a0a6d 1151 push( @result, decode_utf8( $csv->string ) );
1152 }
3a2ebbf4 1153 return join( "\n", @result );
910a0a6d 1154}
1155
1dd07bda 1156=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1157
566f4595 1158Return a reference to an alignment table, in a slightly enhanced CollateX
1159format which looks like this:
1160
1161 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1162 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1163 { witness => "SIG2",
4e5a7b2c 1164 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1165 ... ],
1166 length => TEXTLEN };
1167
1168If $use_refs is set to 1, the reading object is returned in the table
1169instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1170
1171If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1172keys have a true hash value will be included.
2c669bca 1173
1174=cut
9f3ba6f7 1175
1dd07bda 1176sub alignment_table {
1177 my( $self ) = @_;
c1915ab9 1178 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1179 return $self->cached_table if $self->has_cached_table;
1180
0ecb975c 1181 # Make sure we can do this
1182 throw( "Need a linear graph in order to make an alignment table" )
1183 unless $self->linear;
1184 $self->calculate_ranks unless $self->end->has_rank;
1185
2c669bca 1186 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1187 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1188 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1189 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1190 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1191 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1192 push( @{$table->{'alignment'}},
1193 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1194 if( $wit->is_layered ) {
1195 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1196 $wit->sigil.$self->ac_label );
1dd07bda 1197 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1198 push( @{$table->{'alignment'}},
1199 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1200 }
1201 }
1dd07bda 1202 $self->cached_table( $table );
1203 return $table;
910a0a6d 1204}
1205
1206sub _make_witness_row {
1dd07bda 1207 my( $path, $positions ) = @_;
910a0a6d 1208 my %char_hash;
1209 map { $char_hash{$_} = undef } @$positions;
2c669bca 1210 my $debug = 0;
910a0a6d 1211 foreach my $rdg ( @$path ) {
eca16057 1212 my $rtext = $rdg->text;
1213 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1214 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1215 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1216 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1217 }
1218 my @row = map { $char_hash{$_} } @$positions;
eca16057 1219 # Fill in lacuna markers for undef spots in the row
1220 my $last_el = shift @row;
1221 my @filled_row = ( $last_el );
1222 foreach my $el ( @row ) {
0e476982 1223 # If we are using node reference, make the lacuna node appear many times
1224 # in the table. If not, use the lacuna tag.
1dd07bda 1225 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1226 $el = $last_el;
eca16057 1227 }
1228 push( @filled_row, $el );
1229 $last_el = $el;
1230 }
1231 return @filled_row;
910a0a6d 1232}
1233
4e5a7b2c 1234=head1 NAVIGATION METHODS
910a0a6d 1235
4e5a7b2c 1236=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1237
1238Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1239with $last, for the witness given in $sigil. If a $backup sigil is
1240specified (e.g. when walking a layered witness), it will be used wherever
1241no $sigil path exists. If there is a base text reading, that will be
1242used wherever no path exists for $sigil or $backup.
e2902068 1243
1244=cut
1245
910a0a6d 1246# TODO Think about returning some lazy-eval iterator.
b0b4421a 1247# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1248
e2902068 1249sub reading_sequence {
861c3e27 1250 my( $self, $start, $end, $witness ) = @_;
e2902068 1251
930ff666 1252 $witness = $self->baselabel unless $witness;
e2902068 1253 my @readings = ( $start );
1254 my %seen;
1255 my $n = $start;
3a2ebbf4 1256 while( $n && $n->id ne $end->id ) {
1257 if( exists( $seen{$n->id} ) ) {
63778331 1258 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1259 }
3a2ebbf4 1260 $seen{$n->id} = 1;
910a0a6d 1261
861c3e27 1262 my $next = $self->next_reading( $n, $witness );
44771cf2 1263 unless( $next ) {
63778331 1264 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1265 }
910a0a6d 1266 push( @readings, $next );
1267 $n = $next;
e2902068 1268 }
1269 # Check that the last reading is our end reading.
1270 my $last = $readings[$#readings];
63778331 1271 throw( "Last reading found from " . $start->text .
1272 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1273 unless $last->id eq $end->id;
e2902068 1274
1275 return @readings;
1276}
1277
4e5a7b2c 1278=head2 next_reading( $reading, $sigil );
8e1394aa 1279
4a8828f0 1280Returns the reading that follows the given reading along the given witness
930ff666 1281path.
8e1394aa 1282
1283=cut
1284
4a8828f0 1285sub next_reading {
e2902068 1286 # Return the successor via the corresponding path.
8e1394aa 1287 my $self = shift;
3a2ebbf4 1288 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1289 return undef unless $answer;
3a2ebbf4 1290 return $self->reading( $answer );
8e1394aa 1291}
1292
4e5a7b2c 1293=head2 prior_reading( $reading, $sigil )
8e1394aa 1294
4a8828f0 1295Returns the reading that precedes the given reading along the given witness
930ff666 1296path.
8e1394aa 1297
1298=cut
1299
4a8828f0 1300sub prior_reading {
e2902068 1301 # Return the predecessor via the corresponding path.
8e1394aa 1302 my $self = shift;
3a2ebbf4 1303 my $answer = $self->_find_linked_reading( 'prior', @_ );
1304 return $self->reading( $answer );
8e1394aa 1305}
1306
4a8828f0 1307sub _find_linked_reading {
861c3e27 1308 my( $self, $direction, $node, $path ) = @_;
1309
1310 # Get a backup if we are dealing with a layered witness
1311 my $alt_path;
1312 my $aclabel = $self->ac_label;
1313 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1314 $alt_path = $1;
1315 }
1316
e2902068 1317 my @linked_paths = $direction eq 'next'
3a2ebbf4 1318 ? $self->sequence->edges_from( $node )
1319 : $self->sequence->edges_to( $node );
e2902068 1320 return undef unless scalar( @linked_paths );
8e1394aa 1321
e2902068 1322 # We have to find the linked path that contains all of the
1323 # witnesses supplied in $path.
1324 my( @path_wits, @alt_path_wits );
4e5a7b2c 1325 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1326 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1327 my $base_le;
1328 my $alt_le;
1329 foreach my $le ( @linked_paths ) {
3a2ebbf4 1330 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1331 $base_le = $le;
910a0a6d 1332 }
508fd430 1333 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1334 if( _is_within( \@path_wits, \@le_wits ) ) {
1335 # This is the right path.
1336 return $direction eq 'next' ? $le->[1] : $le->[0];
1337 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1338 $alt_le = $le;
1339 }
8e1394aa 1340 }
e2902068 1341 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1342 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1343 if $alt_le;
e2902068 1344
1345 # Got this far? Return the base path if it exists.
3a2ebbf4 1346 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1347 if $base_le;
e2902068 1348
1349 # Got this far? We have no appropriate path.
2c669bca 1350 warn "Could not find $direction node from " . $node->id
910a0a6d 1351 . " along path $path";
8e1394aa 1352 return undef;
1353}
1354
4a8828f0 1355# Some set logic.
1356sub _is_within {
1357 my( $set1, $set2 ) = @_;
7854e12e 1358 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1359 foreach my $el ( @$set1 ) {
910a0a6d 1360 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1361 }
1362 return $ret;
1363}
1364
4e5a7b2c 1365# Return the string that joins together a list of witnesses for
1366# display on a single path.
1367sub _witnesses_of_label {
1368 my( $self, $label ) = @_;
1369 my $regex = $self->wit_list_separator;
1370 my @answer = split( /\Q$regex\E/, $label );
1371 return @answer;
b0b4421a 1372}
1373
d4b75f44 1374=head2 common_readings
1375
1376Returns the list of common readings in the graph (i.e. those readings that are
1377shared by all non-lacunose witnesses.)
1378
1379=cut
1380
1381sub common_readings {
1382 my $self = shift;
1383 my @common = grep { $_->is_common } $self->readings;
1384 return @common;
1385}
1386
fae52efd 1387=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1388
1389Returns the text of a witness (plus its backup, if we are using a layer)
1390as stored in the collation. The text is returned as a string, where the
1391individual readings are joined with spaces and the meta-readings (e.g.
1392lacunae) are omitted. Optional specification of $start and $end allows
1393the generation of a subset of the witness text.
4e5a7b2c 1394
b0b4421a 1395=cut
1396
1397sub path_text {
861c3e27 1398 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1399 $start = $self->start unless $start;
1400 $end = $self->end unless $end;
861c3e27 1401 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1402 my $pathtext = '';
1403 my $last;
1404 foreach my $r ( @path ) {
6ad2ce78 1405 unless ( $r->join_prior || !$last || $last->join_next ) {
1406 $pathtext .= ' ';
1407 }
1408 $pathtext .= $r->text;
629e27b0 1409 $last = $r;
1410 }
1411 return $pathtext;
b0b4421a 1412}
4e5a7b2c 1413
1414=head1 INITIALIZATION METHODS
1415
1416These are mostly for use by parsers.
1417
1418=head2 make_witness_path( $witness )
1419
1420Link the array of readings contained in $witness->path (and in
1421$witness->uncorrected_path if it exists) into collation paths.
1422Clear out the arrays when finished.
de51424a 1423
4e5a7b2c 1424=head2 make_witness_paths
1425
1426Call make_witness_path for all witnesses in the tradition.
1427
1428=cut
930ff666 1429
7e450e44 1430# For use when a collation is constructed from a base text and an apparatus.
1431# We have the sequences of readings and just need to add path edges.
1f7aa795 1432# When we are done, clear out the witness path attributes, as they are no
1433# longer needed.
1434# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1435
6a222840 1436sub make_witness_paths {
1437 my( $self ) = @_;
910a0a6d 1438 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1439 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1440 $self->make_witness_path( $wit );
7854e12e 1441 }
7854e12e 1442}
1443
6a222840 1444sub make_witness_path {
7854e12e 1445 my( $self, $wit ) = @_;
1446 my @chain = @{$wit->path};
15d2d3df 1447 my $sig = $wit->sigil;
fae52efd 1448 # Add start and end if necessary
1449 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1450 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1451 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1452 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1453 }
1f7aa795 1454 if( $wit->is_layered ) {
d9e873d0 1455 @chain = @{$wit->uncorrected_path};
fae52efd 1456 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1457 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1458 foreach my $idx( 0 .. $#chain-1 ) {
1459 my $source = $chain[$idx];
1460 my $target = $chain[$idx+1];
1461 $self->add_path( $source, $target, $sig.$self->ac_label )
1462 unless $self->has_path( $source, $target, $sig );
1463 }
15d2d3df 1464 }
1f7aa795 1465 $wit->clear_path;
1466 $wit->clear_uncorrected_path;
e2902068 1467}
1468
4e5a7b2c 1469=head2 calculate_ranks
1470
1471Calculate the reading ranks (that is, their aligned positions relative
1472to each other) for the graph. This can only be called on linear collations.
1473
b365fbae 1474=begin testing
1475
1476use Text::Tradition;
1477
1478my $cxfile = 't/data/Collatex-16.xml';
1479my $t = Text::Tradition->new(
1480 'name' => 'inline',
1481 'input' => 'CollateX',
1482 'file' => $cxfile,
1483 );
1484my $c = $t->collation;
1485
1486# Make an svg
bfcbcecb 1487my $table = $c->alignment_table;
1488ok( $c->has_cached_table, "Alignment table was cached" );
1489is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1490$c->calculate_ranks;
bfcbcecb 1491is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1492$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1493isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1494
1495=end testing
1496
4e5a7b2c 1497=cut
1498
910a0a6d 1499sub calculate_ranks {
1500 my $self = shift;
b365fbae 1501 # Save the existing ranks, in case we need to invalidate the cached SVG.
1502 my %existing_ranks;
ac4d7ac2 1503 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1504
1505 # Do the rankings based on the relationship equivalence graph, starting
1506 # with the start node.
1507 my $topo_start = $self->equivalence( $self->start->id );
c9bf3dbf 1508 my $node_ranks = { $topo_start => 0 };
910a0a6d 1509 my @curr_origin = ( $topo_start );
1510 # A little iterative function.
1511 while( @curr_origin ) {
359944f7 1512 @curr_origin = _assign_rank( $self->equivalence_graph,
1513 $node_ranks, @curr_origin );
910a0a6d 1514 }
1515 # Transfer our rankings from the topological graph to the real one.
1516 foreach my $r ( $self->readings ) {
359944f7 1517 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1518 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1519 } else {
63778331 1520 # Die. Find the last rank we calculated.
359944f7 1521 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1522 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1523 $self->readings;
1524 my $last = pop @all_defined;
1525 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1526 }
de51424a 1527 }
bfcbcecb 1528 # Do we need to invalidate the cached data?
1529 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1530 foreach my $r ( $self->readings ) {
7c293912 1531 next if defined( $existing_ranks{$r} )
1532 && $existing_ranks{$r} == $r->rank;
c1915ab9 1533 # Something has changed, so clear the cache
bfcbcecb 1534 $self->_clear_cache;
c1915ab9 1535 # ...and recalculate the common readings.
1536 $self->calculate_common_readings();
b365fbae 1537 last;
1538 }
1539 }
c1915ab9 1540 # The graph calculation information is now up to date.
1541 $self->_graphcalc_done(1);
8e1394aa 1542}
3a1f2523 1543
910a0a6d 1544sub _assign_rank {
c9bf3dbf 1545 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1546 # Look at each of the children of @current_nodes. If all the child's
1547 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1548 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1549 # parent gets a rank.
910a0a6d 1550 my @next_nodes;
1551 foreach my $c ( @current_nodes ) {
c9bf3dbf 1552 warn "Current reading $c has no rank!"
1553 unless exists $node_ranks->{$c};
1554 # print STDERR "Looking at child of node $c, rank "
1555 # . $node_ranks->{$c} . "\n";
1556 foreach my $child ( $graph->successors( $c ) ) {
1557 next if exists $node_ranks->{$child};
910a0a6d 1558 my $highest_rank = -1;
1559 my $skip = 0;
c9bf3dbf 1560 foreach my $parent ( $graph->predecessors( $child ) ) {
1561 if( exists $node_ranks->{$parent} ) {
1562 $highest_rank = $node_ranks->{$parent}
1563 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1564 } else {
1565 $skip = 1;
1566 last;
1567 }
1568 }
1569 next if $skip;
c9bf3dbf 1570 my $c_rank = $highest_rank + 1;
1571 # print STDERR "Assigning rank $c_rank to node $child \n";
1572 $node_ranks->{$child} = $c_rank;
910a0a6d 1573 push( @next_nodes, $child );
1574 }
1575 }
1576 return @next_nodes;
4cdd82f1 1577}
910a0a6d 1578
c1915ab9 1579sub _clear_cache {
1580 my $self = shift;
1581 $self->wipe_svg if $self->has_cached_svg;
1582 $self->wipe_table if $self->has_cached_table;
1583}
1584
1585
4e5a7b2c 1586=head2 flatten_ranks
1587
1588A convenience method for parsing collation data. Searches the graph for readings
1589with the same text at the same rank, and merges any that are found.
1590
1591=cut
1592
0e476982 1593sub flatten_ranks {
1594 my $self = shift;
1595 my %unique_rank_rdg;
bf6e338d 1596 my $changed;
0e476982 1597 foreach my $rdg ( $self->readings ) {
1598 next unless $rdg->has_rank;
1599 my $key = $rdg->rank . "||" . $rdg->text;
1600 if( exists $unique_rank_rdg{$key} ) {
1601 # Combine!
56eefa04 1602 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1603 $changed = 1;
0e476982 1604 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1605 # TODO see if this now makes a common point.
0e476982 1606 } else {
1607 $unique_rank_rdg{$key} = $rdg;
1608 }
1609 }
bf6e338d 1610 # If we merged readings, the ranks are still fine but the alignment
1611 # table is wrong. Wipe it.
1612 $self->wipe_table() if $changed;
0e476982 1613}
4633f9e4 1614
1615
d4b75f44 1616=head2 calculate_common_readings
1617
1618Goes through the graph identifying the readings that appear in every witness
1619(apart from those with lacunae at that spot.) Marks them as common and returns
1620the list.
1621
1622=begin testing
1623
1624use Text::Tradition;
1625
1626my $cxfile = 't/data/Collatex-16.xml';
1627my $t = Text::Tradition->new(
1628 'name' => 'inline',
1629 'input' => 'CollateX',
1630 'file' => $cxfile,
1631 );
1632my $c = $t->collation;
1633
1634my @common = $c->calculate_common_readings();
1635is( scalar @common, 8, "Found correct number of common readings" );
1636my @marked = sort $c->common_readings();
1637is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1638my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1639is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1640
1641=end testing
1642
1643=cut
1644
1645sub calculate_common_readings {
1646 my $self = shift;
1647 my @common;
c1915ab9 1648 map { $_->is_common( 0 ) } $self->readings;
1649 # Implicitly calls calculate_ranks
1dd07bda 1650 my $table = $self->alignment_table;
d4b75f44 1651 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1652 my @row = map { $_->{'tokens'}->[$idx]
1653 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1654 @{$table->{'alignment'}};
d4b75f44 1655 my %hash;
1656 foreach my $r ( @row ) {
1657 if( $r ) {
1658 $hash{$r->id} = $r unless $r->is_meta;
1659 } else {
1660 $hash{'UNDEF'} = $r;
1661 }
1662 }
1663 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1664 my( $r ) = values %hash;
1665 $r->is_common( 1 );
1666 push( @common, $r );
1667 }
1668 }
1669 return @common;
1670}
1671
861c3e27 1672=head2 text_from_paths
1673
1674Calculate the text array for all witnesses from the path, for later consistency
1675checking. Only to be used if there is no non-graph-based way to know the
1676original texts.
1677
1678=cut
1679
1680sub text_from_paths {
1681 my $self = shift;
1682 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1683 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1684 my @text;
1685 foreach my $r ( @readings ) {
1686 next if $r->is_meta;
1687 push( @text, $r->text );
1688 }
861c3e27 1689 $wit->text( \@text );
1690 if( $wit->is_layered ) {
5164a6f0 1691 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1692 $wit->sigil.$self->ac_label );
1693 my @uctext;
1694 foreach my $r ( @ucrdgs ) {
1695 next if $r->is_meta;
1696 push( @uctext, $r->text );
1697 }
1698 $wit->layertext( \@uctext );
861c3e27 1699 }
1700 }
1701}
0e476982 1702
4e5a7b2c 1703=head1 UTILITY FUNCTIONS
1704
1705=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1706
4e5a7b2c 1707Find the last reading that occurs in sequence before both the given readings.
414cc046 1708At the very least this should be $self->start.
4e5a7b2c 1709
1710=head2 common_successor( $reading_a, $reading_b )
1711
1712Find the first reading that occurs in sequence after both the given readings.
414cc046 1713At the very least this should be $self->end.
4e5a7b2c 1714
22222af9 1715=begin testing
1716
1717use Text::Tradition;
1718
1719my $cxfile = 't/data/Collatex-16.xml';
1720my $t = Text::Tradition->new(
1721 'name' => 'inline',
1722 'input' => 'CollateX',
1723 'file' => $cxfile,
1724 );
1725my $c = $t->collation;
1726
679f17e1 1727is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1728 'n20', "Found correct common predecessor" );
679f17e1 1729is( $c->common_successor( 'n24', 'n23' )->id,
22222af9 1730 '#END#', "Found correct common successor" );
1731
4e5a7b2c 1732is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1733 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1734is( $c->common_successor( 'n21', 'n10' )->id,
22222af9 1735 '#END#', "Found correct common successor for readings on same path" );
1736
1737=end testing
1738
1739=cut
1740
1741## Return the closest reading that is a predecessor of both the given readings.
1742sub common_predecessor {
1743 my $self = shift;
4e5a7b2c 1744 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1745 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1746}
1747
1748sub common_successor {
1749 my $self = shift;
4e5a7b2c 1750 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1751 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1752}
1753
414cc046 1754
1755# TODO think about how to do this without ranks...
027d819c 1756sub _common_in_path {
22222af9 1757 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1758 my $iter = $self->end->rank;
22222af9 1759 my @candidates;
414cc046 1760 my @last_r1 = ( $r1 );
1761 my @last_r2 = ( $r2 );
1762 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1763 my %all_seen;
414cc046 1764 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1765 while( !@candidates ) {
414cc046 1766 last unless $iter--; # Avoid looping infinitely
1767 # Iterate separately down the graph from r1 and r2
1768 my( @new_lc1, @new_lc2 );
1769 foreach my $lc ( @last_r1 ) {
1770 foreach my $p ( $lc->$dir ) {
1771 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1772 # print STDERR "Path candidate $p from $lc\n";
1773 push( @candidates, $p );
002e3600 1774 } elsif( !$all_seen{$p->id} ) {
414cc046 1775 $all_seen{$p->id} = 'r1';
1776 push( @new_lc1, $p );
1777 }
1778 }
1779 }
1780 foreach my $lc ( @last_r2 ) {
22222af9 1781 foreach my $p ( $lc->$dir ) {
414cc046 1782 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1783 # print STDERR "Path candidate $p from $lc\n";
22222af9 1784 push( @candidates, $p );
002e3600 1785 } elsif( !$all_seen{$p->id} ) {
414cc046 1786 $all_seen{$p->id} = 'r2';
1787 push( @new_lc2, $p );
22222af9 1788 }
1789 }
1790 }
414cc046 1791 @last_r1 = @new_lc1;
1792 @last_r2 = @new_lc2;
22222af9 1793 }
1794 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1795 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1796}
1797
63778331 1798sub throw {
1799 Text::Tradition::Error->throw(
1800 'ident' => 'Collation error',
1801 'message' => $_[0],
1802 );
1803}
1804
dd3b58b0 1805no Moose;
1806__PACKAGE__->meta->make_immutable;
e867486f 1807
027d819c 1808=head1 LICENSE
e867486f 1809
027d819c 1810This package is free software and is provided "as is" without express
1811or implied warranty. You can redistribute it and/or modify it under
1812the same terms as Perl itself.
e867486f 1813
027d819c 1814=head1 AUTHOR
e867486f 1815
027d819c 1816Tara L Andrews E<lt>aurum@cpan.orgE<gt>