save any defined stemmata in GraphML
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
22222af9 302 $self->relations->add_reading( $reading->id );
3a2ebbf4 303 return $reading;
eca16057 304};
305
3a2ebbf4 306around del_reading => sub {
307 my $orig = shift;
308 my $self = shift;
309 my $arg = shift;
310
311 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
312 $arg = $arg->id;
313 }
3a2ebbf4 314 # Remove the reading from the graphs.
c1915ab9 315 $self->_graphcalc_done(0);
4e483aa5 316 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 317 $self->sequence->delete_vertex( $arg );
22222af9 318 $self->relations->delete_reading( $arg );
3a2ebbf4 319
320 # Carry on.
321 $self->$orig( $arg );
322};
7854e12e 323
4e483aa5 324=begin testing
325
326use Text::Tradition;
327
328my $cxfile = 't/data/Collatex-16.xml';
329my $t = Text::Tradition->new(
330 'name' => 'inline',
331 'input' => 'CollateX',
332 'file' => $cxfile,
333 );
334my $c = $t->collation;
335
336my $rno = scalar $c->readings;
337# Split n21 for testing purposes
338my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
339my $old_r = $c->reading( 'n21' );
340$old_r->alter_text( 'to' );
341$c->del_path( 'n20', 'n21', 'A' );
342$c->add_path( 'n20', 'n21p0', 'A' );
343$c->add_path( 'n21p0', 'n21', 'A' );
344$c->flatten_ranks();
345ok( $c->reading( 'n21p0' ), "New reading exists" );
346is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
347
679f17e1 348# Combine n3 and n4 ( with his )
4e483aa5 349$c->merge_readings( 'n3', 'n4', 1 );
350ok( !$c->reading('n4'), "Reading n4 is gone" );
351is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
352
679f17e1 353# Collapse n9 and n10 ( rood / root )
354$c->merge_readings( 'n9', 'n10' );
355ok( !$c->reading('n10'), "Reading n10 is gone" );
356is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 357
358# Combine n21 and n21p0
359my $remaining = $c->reading('n21');
360$remaining ||= $c->reading('n22'); # one of these should still exist
361$c->merge_readings( 'n21p0', $remaining, 1 );
362ok( !$c->reading('n21'), "Reading $remaining is gone" );
363is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
364
365=end testing
366
367=cut
7854e12e 368
3a2ebbf4 369sub merge_readings {
370 my $self = shift;
371
372 # We only need the IDs for adding paths to the graph, not the reading
373 # objects themselves.
4e483aa5 374 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 375 $self->_graphcalc_done(0);
3a2ebbf4 376
377 # The kept reading should inherit the paths and the relationships
378 # of the deleted reading.
379 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
380 my @vector = ( $kept );
381 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
382 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 383 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 384 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
385 $self->sequence->add_edge( @vector );
386 my $fwits = $self->sequence->get_edge_attributes( @vector );
387 @wits{keys %$fwits} = values %$fwits;
388 $self->sequence->set_edge_attributes( @vector, \%wits );
389 }
22222af9 390 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 391
392 # Do the deletion deed.
4e483aa5 393 if( $combine ) {
49d4f2ac 394 my $kept_obj = $self->reading( $kept );
4e483aa5 395 my $del_obj = $self->reading( $deleted );
396 my $joinstr = $combine_char;
397 unless( defined $joinstr ) {
398 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
399 $joinstr = $self->wordsep unless defined $joinstr;
400 }
401 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 402 }
3a2ebbf4 403 $self->del_reading( $deleted );
404}
7854e12e 405
3265b0ce 406
3a2ebbf4 407# Helper function for manipulating the graph.
408sub _stringify_args {
4e483aa5 409 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 410 $first = $first->id
411 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
412 $second = $second->id
413 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 414 return( $first, $second, @args );
3a2ebbf4 415}
df6d9812 416
4e5a7b2c 417# Helper function for manipulating the graph.
418sub _objectify_args {
419 my( $self, $first, $second, $arg ) = @_;
420 $first = $self->reading( $first )
421 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
422 $second = $self->reading( $second )
423 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
424 return( $first, $second, $arg );
425}
3a2ebbf4 426### Path logic
427
428sub add_path {
429 my $self = shift;
430
431 # We only need the IDs for adding paths to the graph, not the reading
432 # objects themselves.
433 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
434
c1915ab9 435 $self->_graphcalc_done(0);
3a2ebbf4 436 # Connect the readings
359944f7 437 unless( $self->sequence->has_edge( $source, $target ) ) {
438 $self->sequence->add_edge( $source, $target );
439 $self->relations->add_equivalence_edge( $source, $target );
440 }
3a2ebbf4 441 # Note the witness in question
442 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 443}
b15511bf 444
3a2ebbf4 445sub del_path {
446 my $self = shift;
49d4f2ac 447 my @args;
448 if( ref( $_[0] ) eq 'ARRAY' ) {
449 my $e = shift @_;
450 @args = ( @$e, @_ );
451 } else {
452 @args = @_;
453 }
3a2ebbf4 454
455 # We only need the IDs for adding paths to the graph, not the reading
456 # objects themselves.
49d4f2ac 457 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 458
c1915ab9 459 $self->_graphcalc_done(0);
3a2ebbf4 460 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 461 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 462 }
463 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
464 $self->sequence->delete_edge( $source, $target );
359944f7 465 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 466 }
784877d9 467}
468
3a2ebbf4 469
15d2d3df 470# Extra graph-alike utility
471sub has_path {
3a2ebbf4 472 my $self = shift;
473 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
474 return undef unless $self->sequence->has_edge( $source, $target );
475 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 476}
477
4e5a7b2c 478=head2 clear_witness( @sigil_list )
3a2ebbf4 479
4e5a7b2c 480Clear the given witnesses out of the collation entirely, removing references
481to them in paths, and removing readings that belong only to them. Should only
482be called via $tradition->del_witness.
3a2ebbf4 483
484=cut
485
4e5a7b2c 486sub clear_witness {
487 my( $self, @sigils ) = @_;
488
c1915ab9 489 $self->_graphcalc_done(0);
4e5a7b2c 490 # Clear the witness(es) out of the paths
491 foreach my $e ( $self->paths ) {
492 foreach my $sig ( @sigils ) {
493 $self->del_path( $e, $sig );
494 }
495 }
496
497 # Clear out the newly unused readings
498 foreach my $r ( $self->readings ) {
499 unless( $self->reading_witnesses( $r ) ) {
500 $self->del_reading( $r );
501 }
502 }
503}
3a2ebbf4 504
505sub add_relationship {
506 my $self = shift;
22222af9 507 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 508 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 509 $self->_graphcalc_done(0);
63778331 510 return @vectors;
22222af9 511}
ef9d481f 512
ca6e6095 513around qw/ get_relationship del_relationship / => sub {
514 my $orig = shift;
515 my $self = shift;
516 my @args = @_;
517 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
518 @args = @{$_[0]};
519 }
520 my( $source, $target ) = $self->_stringify_args( @args );
521 $self->$orig( $source, $target );
522};
523
22222af9 524=head2 reading_witnesses( $reading )
910a0a6d 525
22222af9 526Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 527
22222af9 528=cut
1d310495 529
1d310495 530sub reading_witnesses {
531 my( $self, $reading ) = @_;
532 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 533 # arbitrarily chosen "incoming". Thus, special-case the start node.
534 if( $reading eq $self->start ) {
535 return map { $_->sigil } $self->tradition->witnesses;
536 }
1d310495 537 my %all_witnesses;
538 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
539 my $wits = $self->sequence->get_edge_attributes( @$e );
540 @all_witnesses{ keys %$wits } = 1;
541 }
c12bb878 542 my $acstr = $self->ac_label;
543 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
544 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
545 }
1d310495 546 return keys %all_witnesses;
910a0a6d 547}
548
4e5a7b2c 549=head1 OUTPUT METHODS
8e1394aa 550
0ecb975c 551=head2 as_svg( \%options )
8e1394aa 552
0068967c 553Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 554See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 555
556=cut
557
558sub as_svg {
0ecb975c 559 my( $self, $opts ) = @_;
bfcbcecb 560 throw( "Need GraphViz installed to output SVG" )
561 unless File::Which::which( 'dot' );
e247aad1 562 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
5f082716 563 $self->calculate_ranks() unless( $self->_graphcalc_done || $opts->{'nocalc'} );
e247aad1 564 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 565 my @cmd = qw/dot -Tsvg/;
566 my( $svg, $err );
567 my $dotfile = File::Temp->new();
568 ## USE FOR DEBUGGING
569 # $dotfile->unlink_on_destroy(0);
570 binmode $dotfile, ':utf8';
571 print $dotfile $self->as_dot( $opts );
572 push( @cmd, $dotfile->filename );
573 run( \@cmd, ">", binary(), \$svg );
e247aad1 574 $svg = decode_utf8( $svg );
575 $self->cached_svg( $svg ) unless $want_subgraph;
576 return $svg;
577 } else {
578 return $self->cached_svg;
b365fbae 579 }
8e1394aa 580}
581
b22576c6 582
0ecb975c 583=head2 as_dot( \%options )
b22576c6 584
0ecb975c 585Returns a string that is the collation graph expressed in dot
586(i.e. GraphViz) format. Options include:
b22576c6 587
0ecb975c 588=over 4
b22576c6 589
0ecb975c 590=item * from
b22576c6 591
0ecb975c 592=item * to
df6d9812 593
0ecb975c 594=item * color_common
595
596=back
df6d9812 597
598=cut
599
600sub as_dot {
0ecb975c 601 my( $self, $opts ) = @_;
602 my $startrank = $opts->{'from'} if $opts;
603 my $endrank = $opts->{'to'} if $opts;
604 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 605 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
606 && $self->end->rank > 100;
6648ee3d 607 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 608
b22576c6 609 # Check the arguments
610 if( $startrank ) {
611 return if $endrank && $startrank > $endrank;
612 return if $startrank > $self->end->rank;
613 }
614 if( defined $endrank ) {
615 return if $endrank < 0;
f1b3b33a 616 $endrank = undef if $endrank == $self->end->rank;
b22576c6 617 }
618
67da8d6c 619 my $graph_name = $self->tradition->name;
620 $graph_name =~ s/[^\w\s]//g;
621 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 622
623 my %graph_attrs = (
624 'rankdir' => 'LR',
625 'bgcolor' => 'none',
626 );
627 my %node_attrs = (
b8990398 628 'fontsize' => 14,
f13b5582 629 'fillcolor' => 'white',
630 'style' => 'filled',
631 'shape' => 'ellipse'
632 );
633 my %edge_attrs = (
634 'arrowhead' => 'open',
635 'color' => '#000000',
636 'fontcolor' => '#000000',
637 );
638
67da8d6c 639 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 640 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
641 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 642
b22576c6 643 # Output substitute start/end readings if necessary
644 if( $startrank ) {
43c94341 645 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 646 }
647 if( $endrank ) {
43c94341 648 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 649 }
b365fbae 650 if( $STRAIGHTENHACK ) {
651 ## HACK part 1
43c94341 652 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
653 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 654 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
655 }
b22576c6 656 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 657 # Sort the readings by rank if we have ranks; this speeds layout.
658 my @all_readings = $self->end->has_rank
659 ? sort { $a->rank <=> $b->rank } $self->readings
660 : $self->readings;
4633f9e4 661 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 662 foreach my $reading ( @all_readings ) {
b22576c6 663 # Only output readings within our rank range.
664 next if $startrank && $reading->rank < $startrank;
665 next if $endrank && $reading->rank > $endrank;
666 $used{$reading->id} = 1;
910a0a6d 667 # Need not output nodes without separate labels
3a2ebbf4 668 next if $reading->id eq $reading->text;
d4b75f44 669 my $rattrs;
30f0df34 670 my $label = $reading->text;
629e27b0 671 $label .= '-' if $reading->join_next;
672 $label = "-$label" if $reading->join_prior;
8f9cab7b 673 $label =~ s/\"/\\\"/g;
d4b75f44 674 $rattrs->{'label'} = $label;
10e4b1ac 675 $rattrs->{'id'} = $reading->id;
0ecb975c 676 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 677 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 678 }
3a2ebbf4 679
30ddc24c 680 # Add the real edges. Need to weight one edge per rank jump, in a
681 # continuous line.
b365fbae 682 # my $weighted = $self->_add_edge_weights;
b22576c6 683 my @edges = $self->paths;
3bdec618 684 my( %substart, %subend );
b22576c6 685 foreach my $edge ( @edges ) {
686 # Do we need to output this edge?
508fd430 687 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 688 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 689 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 690
b22576c6 691 # Account for the rank gap if necessary
30ddc24c 692 my $rank0 = $self->reading( $edge->[0] )->rank
693 if $self->reading( $edge->[0] )->has_rank;
694 my $rank1 = $self->reading( $edge->[1] )->rank
695 if $self->reading( $edge->[1] )->has_rank;
696 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
697 $variables->{'minlen'} = $rank1 - $rank0;
698 }
699
700 # Add the calculated edge weights
b365fbae 701 # if( exists $weighted->{$edge->[0]}
e247aad1 702 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
703 # # $variables->{'color'} = 'red';
704 # $variables->{'weight'} = 3.0;
705 # }
30ddc24c 706
508fd430 707 # EXPERIMENTAL: make edge width reflect no. of witnesses
708 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
709 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
710
f13b5582 711 my $varopts = _dot_attr_string( $variables );
712 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
713 $edge->[0], $edge->[1], $varopts );
3bdec618 714 } elsif( $used{$edge->[0]} ) {
715 $subend{$edge->[0]} = 1;
716 } elsif( $used{$edge->[1]} ) {
717 $substart{$edge->[1]} = 1;
b22576c6 718 }
df6d9812 719 }
3bdec618 720 # Add substitute start and end edges if necessary
721 foreach my $node ( keys %substart ) {
027d819c 722 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 723 my $variables = { %edge_attrs, 'label' => $witstr };
724 my $varopts = _dot_attr_string( $variables );
43c94341 725 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;";
3bdec618 726 }
727 foreach my $node ( keys %subend ) {
027d819c 728 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 729 my $variables = { %edge_attrs, 'label' => $witstr };
730 my $varopts = _dot_attr_string( $variables );
43c94341 731 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;";
3bdec618 732 }
b365fbae 733 # HACK part 2
734 if( $STRAIGHTENHACK ) {
43c94341 735 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
736 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 737 }
30ddc24c 738
df6d9812 739 $dot .= "}\n";
740 return $dot;
741}
742
f13b5582 743sub _dot_attr_string {
744 my( $hash ) = @_;
745 my @attrs;
746 foreach my $k ( sort keys %$hash ) {
747 my $v = $hash->{$k};
748 push( @attrs, $k.'="'.$v.'"' );
749 }
750 return( '[ ' . join( ', ', @attrs ) . ' ]' );
751}
752
30ddc24c 753sub _add_edge_weights {
754 my $self = shift;
755 # Walk the graph from START to END, choosing the successor node with
756 # the largest number of witness paths each time.
757 my $weighted = {};
758 my $curr = $self->start->id;
008fc8a6 759 my $ranked = $self->end->has_rank;
30ddc24c 760 while( $curr ne $self->end->id ) {
008fc8a6 761 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 762 my @succ = sort { $self->path_witnesses( $curr, $a )
763 <=> $self->path_witnesses( $curr, $b ) }
764 $self->sequence->successors( $curr );
765 my $next = pop @succ;
008fc8a6 766 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 767 # Try to avoid lacunae in the weighted path.
008fc8a6 768 while( @succ &&
769 ( $self->reading( $next )->is_lacuna ||
770 $nextrank - $rank > 1 ) ){
30ddc24c 771 $next = pop @succ;
772 }
773 $weighted->{$curr} = $next;
774 $curr = $next;
775 }
776 return $weighted;
777}
778
027d819c 779=head2 path_witnesses( $edge )
780
781Returns the list of sigils whose witnesses are associated with the given edge.
782The edge can be passed as either an array or an arrayref of ( $source, $target ).
783
784=cut
785
3a2ebbf4 786sub path_witnesses {
787 my( $self, @edge ) = @_;
788 # If edge is an arrayref, cope.
789 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
790 my $e = shift @edge;
791 @edge = @$e;
792 }
793 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 794 return @wits;
3a2ebbf4 795}
796
7f9f05e8 797# Helper function. Make a display label for the given witnesses, showing a.c.
798# witnesses only where the main witness is not also in the list.
027d819c 799sub _path_display_label {
508fd430 800 my $self = shift;
7f9f05e8 801 my %wits;
802 map { $wits{$_} = 1 } @_;
803
804 # If an a.c. wit is listed, remove it if the main wit is also listed.
805 # Otherwise keep it for explicit listing.
806 my $aclabel = $self->ac_label;
807 my @disp_ac;
808 foreach my $w ( sort keys %wits ) {
809 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
810 if( exists $wits{$1} ) {
811 delete $wits{$w};
812 } else {
813 push( @disp_ac, $w );
814 }
815 }
816 }
817
818 # See if we are in a majority situation.
8f9cab7b 819 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
7f9f05e8 820 if( scalar keys %wits > $maj ) {
821 unshift( @disp_ac, 'majority' );
822 return join( ', ', @disp_ac );
8f9cab7b 823 } else {
7f9f05e8 824 return join( ', ', sort keys %wits );
8f9cab7b 825 }
826}
1dd07bda 827
bf6e338d 828=head2 readings_at_rank( $rank )
1dd07bda 829
bf6e338d 830Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 831
832=cut
833
bf6e338d 834sub readings_at_rank {
1dd07bda 835 my( $self, $rank ) = @_;
bf6e338d 836 my $table = $self->alignment_table;
837 # Table rank is real rank - 1.
838 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
839 my %readings;
840 foreach my $e ( @elements ) {
841 next unless ref( $e ) eq 'HASH';
842 next unless exists $e->{'t'};
843 $readings{$e->{'t'}->id} = $e->{'t'};
844 }
845 return values %readings;
1dd07bda 846}
8f9cab7b 847
4e5a7b2c 848=head2 as_graphml
8e1394aa 849
4e5a7b2c 850Returns a GraphML representation of the collation. The GraphML will contain
851two graphs. The first expresses the attributes of the readings and the witness
852paths that link them; the second expresses the relationships that link the
853readings. This is the native transfer format for a tradition.
8e1394aa 854
56eefa04 855=begin testing
856
857use Text::Tradition;
858
859my $READINGS = 311;
860my $PATHS = 361;
861
862my $datafile = 't/data/florilegium_tei_ps.xml';
863my $tradition = Text::Tradition->new( 'input' => 'TEI',
864 'name' => 'test0',
865 'file' => $datafile,
866 'linear' => 1 );
867
868ok( $tradition, "Got a tradition object" );
869is( scalar $tradition->witnesses, 13, "Found all witnesses" );
870ok( $tradition->collation, "Tradition has a collation" );
871
872my $c = $tradition->collation;
873is( scalar $c->readings, $READINGS, "Collation has all readings" );
874is( scalar $c->paths, $PATHS, "Collation has all paths" );
875is( scalar $c->relationships, 0, "Collation has all relationships" );
876
877# Add a few relationships
878$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
879$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
880$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
881
882# Now write it to GraphML and parse it again.
883
884my $graphml = $c->as_graphml;
885my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
886is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
887is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
888is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
889
2a812726 890# Now add a stemma, write to GraphML, and parse again.
891my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
892is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
893is( $tradition->stemmata, 1, "Tradition now has the stemma" );
894$graphml = $c->as_graphml;
895like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
896
56eefa04 897=end testing
898
8e1394aa 899=cut
900
901sub as_graphml {
a30ca502 902 my( $self, $options ) = @_;
3d14b48e 903 $self->calculate_ranks unless $self->_graphcalc_done;
904
a30ca502 905 my $start = $options->{'from'}
906 ? $self->reading( $options->{'from'} ) : $self->start;
907 my $end = $options->{'to'}
908 ? $self->reading( $options->{'to'} ) : $self->end;
909 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
910 throw( 'Start node must be before end node' );
911 }
912 # The readings need to be ranked for this to work.
913 $start = $self->start unless $start->has_rank;
914 $end = $self->end unless $end->has_rank;
414cc046 915 my $rankoffset = 0;
916 unless( $start eq $self->start ) {
917 $rankoffset = $start->rank - 1;
918 }
a30ca502 919 my %use_readings;
920
8e1394aa 921 # Some namespaces
922 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
923 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
924 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 925 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 926
927 # Create the document and root node
928 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
929 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
930 $graphml->setDocumentElement( $root );
931 $root->setNamespace( $xsi_ns, 'xsi', 0 );
932 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 933
934 # List of attribute types to save on our objects and their corresponding
935 # GraphML types
936 my %save_types = (
937 'Str' => 'string',
938 'Int' => 'int',
939 'Bool' => 'boolean',
10e4b1ac 940 'ReadingID' => 'string',
bbd064a9 941 'RelationshipType' => 'string',
942 'RelationshipScope' => 'string',
943 );
944
945 # List of attribute names *not* to save on our objects.
946 # We will also not save any attribute beginning with _.
947 my %skipsave;
948 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 949
bbd064a9 950 # Add the data keys for the graph. Include an extra key 'version' for the
951 # GraphML output version.
e309421a 952 my %graph_data_keys;
953 my $gdi = 0;
bbd064a9 954 my %graph_attributes = ( 'version' => 'string' );
955 # Graph attributes include those of Tradition and those of Collation.
956 my %gattr_from;
957 my $tmeta = $self->tradition->meta;
958 my $cmeta = $self->meta;
959 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
960 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
961 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
962 next if $attr->name =~ /^_/;
963 next if $skipsave{$attr->name};
964 next unless $save_types{$attr->type_constraint->name};
965 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
966 }
2a812726 967 # Extra custom key for the tradition stemma(ta)
968 $graph_attributes{'stemmata'} = 'string';
bbd064a9 969
970 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 971 $graph_data_keys{$datum} = 'dg'.$gdi++;
972 my $key = $root->addNewChild( $graphml_ns, 'key' );
973 $key->setAttribute( 'attr.name', $datum );
bbd064a9 974 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 975 $key->setAttribute( 'for', 'graph' );
976 $key->setAttribute( 'id', $graph_data_keys{$datum} );
977 }
f6066bac 978
bbd064a9 979 # Add the data keys for reading nodes
980 my %reading_attributes;
981 my $rmeta = Text::Tradition::Collation::Reading->meta;
982 foreach my $attr( $rmeta->get_all_attributes ) {
983 next if $attr->name =~ /^_/;
984 next if $skipsave{$attr->name};
985 next unless $save_types{$attr->type_constraint->name};
986 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
987 }
7cd9f181 988 # Extra custom key for the reading morphology
989 $reading_attributes{'lexemes'} = 'string';
990
ef9d481f 991 my %node_data_keys;
992 my $ndi = 0;
bbd064a9 993 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 994 $node_data_keys{$datum} = 'dn'.$ndi++;
995 my $key = $root->addNewChild( $graphml_ns, 'key' );
996 $key->setAttribute( 'attr.name', $datum );
bbd064a9 997 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 998 $key->setAttribute( 'for', 'node' );
999 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1000 }
1001
bbd064a9 1002 # Add the data keys for edges, that is, paths and relationships. Path
1003 # data does not come from a Moose class so is here manually.
ef9d481f 1004 my $edi = 0;
1005 my %edge_data_keys;
bbd064a9 1006 my %edge_attributes = (
3a2ebbf4 1007 witness => 'string', # ID/label for a path
3a2ebbf4 1008 extra => 'boolean', # Path key
3a2ebbf4 1009 );
bbd064a9 1010 my @path_attributes = keys %edge_attributes; # track our manual additions
1011 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1012 foreach my $attr( $pmeta->get_all_attributes ) {
1013 next if $attr->name =~ /^_/;
1014 next if $skipsave{$attr->name};
1015 next unless $save_types{$attr->type_constraint->name};
1016 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1017 }
1018 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1019 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1020 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1021 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1022 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1023 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1024 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1025 }
3a2ebbf4 1026
cc31ebaa 1027 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1028 my $xmlidname = $self->tradition->name;
1029 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1030 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1031 $xmlidname = '_'.$xmlidname;
1032 }
2c669bca 1033 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1034 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1035 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1036 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1037 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1038 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1039 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1040 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1041
2a812726 1042 # Tradition/collation attribute data
bbd064a9 1043 foreach my $datum ( keys %graph_attributes ) {
1044 my $value;
1045 if( $datum eq 'version' ) {
2a812726 1046 $value = '3.2';
1047 } elsif( $datum eq 'stemmata' ) {
1048 my @stemstrs;
1049 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1050 $self->tradition->stemmata;
1051 $value = join( "\n", @stemstrs );
bbd064a9 1052 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1053 $value = $self->tradition->$datum;
1054 } else {
1055 $value = $self->$datum;
1056 }
2c669bca 1057 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1058 }
8e1394aa 1059
1060 my $node_ctr = 0;
1061 my %node_hash;
22222af9 1062 # Add our readings to the graph
3a2ebbf4 1063 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1064 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1065 ( $n->rank < $start->rank || $n->rank > $end->rank );
1066 $use_readings{$n->id} = 1;
2c669bca 1067 # Add to the main graph
1068 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1069 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1070 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1071 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1072 foreach my $d ( keys %reading_attributes ) {
255875b8 1073 my $nval = $n->$d;
7cd9f181 1074 # Custom serialization
1075 if( $d eq 'lexemes' ) {
1076 # If nval is a true value, we have lexemes so we need to
1077 # serialize them. Otherwise set nval to undef so that the
1078 # key is excluded from this reading.
1079 $nval = $nval ? $n->_serialize_lexemes : undef;
1080 }
cc31ebaa 1081 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1082 # Adjust the ranks within the subgraph.
cc31ebaa 1083 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1084 : $nval - $rankoffset;
414cc046 1085 }
255875b8 1086 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1087 if defined $nval;
1088 }
b15511bf 1089 }
1090
2c669bca 1091 # Add the path edges to the sequence graph
df6d9812 1092 my $edge_ctr = 0;
3a2ebbf4 1093 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1094 # We add an edge in the graphml for every witness in $e.
a30ca502 1095 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1096 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1097 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1098 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1099 # Skip any path from start to end; that witness is not in the subgraph.
1100 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1101 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1102 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1103 $node_hash{ $e->[0] },
1104 $node_hash{ $e->[1] } );
2c669bca 1105 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1106 $edge_el->setAttribute( 'source', $from );
1107 $edge_el->setAttribute( 'target', $to );
1108 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1109
1110 # It's a witness path, so add the witness
1111 my $base = $wit;
1112 my $key = $edge_data_keys{'witness'};
1113 # Is this an ante-corr witness?
1114 my $aclabel = $self->ac_label;
1115 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1116 # Keep the base witness
1117 $base = $1;
1118 # ...and record that this is an 'extra' reading path
1119 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1120 }
1121 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1122 }
1123 }
1124
cc31ebaa 1125 # Report the actual number of nodes and edges that went in
1126 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1127 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1128
22222af9 1129 # Add the relationship graph to the XML
bbd064a9 1130 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1131 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1132 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1133
94c00c71 1134 # Save and return the thing
1135 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1136 return $result;
df6d9812 1137}
1138
b15511bf 1139sub _add_graphml_data {
1140 my( $el, $key, $value ) = @_;
b15511bf 1141 return unless defined $value;
c9bf3dbf 1142 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1143 $data_el->setAttribute( 'key', $key );
1144 $data_el->appendText( $value );
8e1394aa 1145}
1146
4e5a7b2c 1147=head2 as_csv
910a0a6d 1148
1149Returns a CSV alignment table representation of the collation graph, one
2c669bca 1150row per witness (or witness uncorrected.)
910a0a6d 1151
1152=cut
1153
1154sub as_csv {
3a2ebbf4 1155 my( $self ) = @_;
1dd07bda 1156 my $table = $self->alignment_table;
82fa4d57 1157 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1158 my @result;
2c669bca 1159 # Make the header row
1160 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1161 push( @result, decode_utf8( $csv->string ) );
1162 # Make the rest of the rows
1163 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1164 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1165 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1166 $csv->combine( @row );
910a0a6d 1167 push( @result, decode_utf8( $csv->string ) );
1168 }
3a2ebbf4 1169 return join( "\n", @result );
910a0a6d 1170}
1171
1dd07bda 1172=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1173
566f4595 1174Return a reference to an alignment table, in a slightly enhanced CollateX
1175format which looks like this:
1176
1177 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1178 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1179 { witness => "SIG2",
4e5a7b2c 1180 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1181 ... ],
1182 length => TEXTLEN };
1183
1184If $use_refs is set to 1, the reading object is returned in the table
1185instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1186
1187If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1188keys have a true hash value will be included.
2c669bca 1189
1190=cut
9f3ba6f7 1191
1dd07bda 1192sub alignment_table {
1193 my( $self ) = @_;
c1915ab9 1194 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1195 return $self->cached_table if $self->has_cached_table;
1196
0ecb975c 1197 # Make sure we can do this
1198 throw( "Need a linear graph in order to make an alignment table" )
1199 unless $self->linear;
1200 $self->calculate_ranks unless $self->end->has_rank;
1201
2c669bca 1202 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1203 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1204 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1205 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1206 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1207 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1208 push( @{$table->{'alignment'}},
1209 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1210 if( $wit->is_layered ) {
1211 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1212 $wit->sigil.$self->ac_label );
1dd07bda 1213 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1214 push( @{$table->{'alignment'}},
1215 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1216 }
1217 }
1dd07bda 1218 $self->cached_table( $table );
1219 return $table;
910a0a6d 1220}
1221
1222sub _make_witness_row {
1dd07bda 1223 my( $path, $positions ) = @_;
910a0a6d 1224 my %char_hash;
1225 map { $char_hash{$_} = undef } @$positions;
2c669bca 1226 my $debug = 0;
910a0a6d 1227 foreach my $rdg ( @$path ) {
eca16057 1228 my $rtext = $rdg->text;
1229 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1230 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1231 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1232 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1233 }
1234 my @row = map { $char_hash{$_} } @$positions;
eca16057 1235 # Fill in lacuna markers for undef spots in the row
1236 my $last_el = shift @row;
1237 my @filled_row = ( $last_el );
1238 foreach my $el ( @row ) {
0e476982 1239 # If we are using node reference, make the lacuna node appear many times
1240 # in the table. If not, use the lacuna tag.
1dd07bda 1241 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1242 $el = $last_el;
eca16057 1243 }
1244 push( @filled_row, $el );
1245 $last_el = $el;
1246 }
1247 return @filled_row;
910a0a6d 1248}
1249
4e5a7b2c 1250=head1 NAVIGATION METHODS
910a0a6d 1251
4e5a7b2c 1252=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1253
1254Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1255with $last, for the witness given in $sigil. If a $backup sigil is
1256specified (e.g. when walking a layered witness), it will be used wherever
1257no $sigil path exists. If there is a base text reading, that will be
1258used wherever no path exists for $sigil or $backup.
e2902068 1259
1260=cut
1261
910a0a6d 1262# TODO Think about returning some lazy-eval iterator.
b0b4421a 1263# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1264
e2902068 1265sub reading_sequence {
861c3e27 1266 my( $self, $start, $end, $witness ) = @_;
e2902068 1267
930ff666 1268 $witness = $self->baselabel unless $witness;
e2902068 1269 my @readings = ( $start );
1270 my %seen;
1271 my $n = $start;
3a2ebbf4 1272 while( $n && $n->id ne $end->id ) {
1273 if( exists( $seen{$n->id} ) ) {
63778331 1274 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1275 }
3a2ebbf4 1276 $seen{$n->id} = 1;
910a0a6d 1277
861c3e27 1278 my $next = $self->next_reading( $n, $witness );
44771cf2 1279 unless( $next ) {
63778331 1280 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1281 }
910a0a6d 1282 push( @readings, $next );
1283 $n = $next;
e2902068 1284 }
1285 # Check that the last reading is our end reading.
1286 my $last = $readings[$#readings];
63778331 1287 throw( "Last reading found from " . $start->text .
1288 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1289 unless $last->id eq $end->id;
e2902068 1290
1291 return @readings;
1292}
1293
4e5a7b2c 1294=head2 next_reading( $reading, $sigil );
8e1394aa 1295
4a8828f0 1296Returns the reading that follows the given reading along the given witness
930ff666 1297path.
8e1394aa 1298
1299=cut
1300
4a8828f0 1301sub next_reading {
e2902068 1302 # Return the successor via the corresponding path.
8e1394aa 1303 my $self = shift;
3a2ebbf4 1304 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1305 return undef unless $answer;
3a2ebbf4 1306 return $self->reading( $answer );
8e1394aa 1307}
1308
4e5a7b2c 1309=head2 prior_reading( $reading, $sigil )
8e1394aa 1310
4a8828f0 1311Returns the reading that precedes the given reading along the given witness
930ff666 1312path.
8e1394aa 1313
1314=cut
1315
4a8828f0 1316sub prior_reading {
e2902068 1317 # Return the predecessor via the corresponding path.
8e1394aa 1318 my $self = shift;
3a2ebbf4 1319 my $answer = $self->_find_linked_reading( 'prior', @_ );
1320 return $self->reading( $answer );
8e1394aa 1321}
1322
4a8828f0 1323sub _find_linked_reading {
861c3e27 1324 my( $self, $direction, $node, $path ) = @_;
1325
1326 # Get a backup if we are dealing with a layered witness
1327 my $alt_path;
1328 my $aclabel = $self->ac_label;
1329 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1330 $alt_path = $1;
1331 }
1332
e2902068 1333 my @linked_paths = $direction eq 'next'
3a2ebbf4 1334 ? $self->sequence->edges_from( $node )
1335 : $self->sequence->edges_to( $node );
e2902068 1336 return undef unless scalar( @linked_paths );
8e1394aa 1337
e2902068 1338 # We have to find the linked path that contains all of the
1339 # witnesses supplied in $path.
1340 my( @path_wits, @alt_path_wits );
4e5a7b2c 1341 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1342 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1343 my $base_le;
1344 my $alt_le;
1345 foreach my $le ( @linked_paths ) {
3a2ebbf4 1346 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1347 $base_le = $le;
910a0a6d 1348 }
508fd430 1349 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1350 if( _is_within( \@path_wits, \@le_wits ) ) {
1351 # This is the right path.
1352 return $direction eq 'next' ? $le->[1] : $le->[0];
1353 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1354 $alt_le = $le;
1355 }
8e1394aa 1356 }
e2902068 1357 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1358 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1359 if $alt_le;
e2902068 1360
1361 # Got this far? Return the base path if it exists.
3a2ebbf4 1362 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1363 if $base_le;
e2902068 1364
1365 # Got this far? We have no appropriate path.
2c669bca 1366 warn "Could not find $direction node from " . $node->id
910a0a6d 1367 . " along path $path";
8e1394aa 1368 return undef;
1369}
1370
4a8828f0 1371# Some set logic.
1372sub _is_within {
1373 my( $set1, $set2 ) = @_;
7854e12e 1374 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1375 foreach my $el ( @$set1 ) {
910a0a6d 1376 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1377 }
1378 return $ret;
1379}
1380
4e5a7b2c 1381# Return the string that joins together a list of witnesses for
1382# display on a single path.
1383sub _witnesses_of_label {
1384 my( $self, $label ) = @_;
1385 my $regex = $self->wit_list_separator;
1386 my @answer = split( /\Q$regex\E/, $label );
1387 return @answer;
b0b4421a 1388}
1389
d4b75f44 1390=head2 common_readings
1391
1392Returns the list of common readings in the graph (i.e. those readings that are
1393shared by all non-lacunose witnesses.)
1394
1395=cut
1396
1397sub common_readings {
1398 my $self = shift;
1399 my @common = grep { $_->is_common } $self->readings;
1400 return @common;
1401}
1402
fae52efd 1403=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1404
1405Returns the text of a witness (plus its backup, if we are using a layer)
1406as stored in the collation. The text is returned as a string, where the
1407individual readings are joined with spaces and the meta-readings (e.g.
1408lacunae) are omitted. Optional specification of $start and $end allows
1409the generation of a subset of the witness text.
4e5a7b2c 1410
b0b4421a 1411=cut
1412
1413sub path_text {
861c3e27 1414 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1415 $start = $self->start unless $start;
1416 $end = $self->end unless $end;
861c3e27 1417 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1418 my $pathtext = '';
1419 my $last;
1420 foreach my $r ( @path ) {
6ad2ce78 1421 unless ( $r->join_prior || !$last || $last->join_next ) {
1422 $pathtext .= ' ';
1423 }
1424 $pathtext .= $r->text;
629e27b0 1425 $last = $r;
1426 }
1427 return $pathtext;
b0b4421a 1428}
4e5a7b2c 1429
1430=head1 INITIALIZATION METHODS
1431
1432These are mostly for use by parsers.
1433
1434=head2 make_witness_path( $witness )
1435
1436Link the array of readings contained in $witness->path (and in
1437$witness->uncorrected_path if it exists) into collation paths.
1438Clear out the arrays when finished.
de51424a 1439
4e5a7b2c 1440=head2 make_witness_paths
1441
1442Call make_witness_path for all witnesses in the tradition.
1443
1444=cut
930ff666 1445
7e450e44 1446# For use when a collation is constructed from a base text and an apparatus.
1447# We have the sequences of readings and just need to add path edges.
1f7aa795 1448# When we are done, clear out the witness path attributes, as they are no
1449# longer needed.
1450# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1451
6a222840 1452sub make_witness_paths {
1453 my( $self ) = @_;
910a0a6d 1454 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1455 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1456 $self->make_witness_path( $wit );
7854e12e 1457 }
7854e12e 1458}
1459
6a222840 1460sub make_witness_path {
7854e12e 1461 my( $self, $wit ) = @_;
1462 my @chain = @{$wit->path};
15d2d3df 1463 my $sig = $wit->sigil;
fae52efd 1464 # Add start and end if necessary
1465 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1466 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1467 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1468 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1469 }
1f7aa795 1470 if( $wit->is_layered ) {
d9e873d0 1471 @chain = @{$wit->uncorrected_path};
fae52efd 1472 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1473 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1474 foreach my $idx( 0 .. $#chain-1 ) {
1475 my $source = $chain[$idx];
1476 my $target = $chain[$idx+1];
1477 $self->add_path( $source, $target, $sig.$self->ac_label )
1478 unless $self->has_path( $source, $target, $sig );
1479 }
15d2d3df 1480 }
1f7aa795 1481 $wit->clear_path;
1482 $wit->clear_uncorrected_path;
e2902068 1483}
1484
4e5a7b2c 1485=head2 calculate_ranks
1486
1487Calculate the reading ranks (that is, their aligned positions relative
1488to each other) for the graph. This can only be called on linear collations.
1489
b365fbae 1490=begin testing
1491
1492use Text::Tradition;
1493
1494my $cxfile = 't/data/Collatex-16.xml';
1495my $t = Text::Tradition->new(
1496 'name' => 'inline',
1497 'input' => 'CollateX',
1498 'file' => $cxfile,
1499 );
1500my $c = $t->collation;
1501
1502# Make an svg
bfcbcecb 1503my $table = $c->alignment_table;
1504ok( $c->has_cached_table, "Alignment table was cached" );
1505is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1506$c->calculate_ranks;
bfcbcecb 1507is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1508$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1509isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1510
1511=end testing
1512
4e5a7b2c 1513=cut
1514
910a0a6d 1515sub calculate_ranks {
1516 my $self = shift;
b365fbae 1517 # Save the existing ranks, in case we need to invalidate the cached SVG.
1518 my %existing_ranks;
ac4d7ac2 1519 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1520
1521 # Do the rankings based on the relationship equivalence graph, starting
1522 # with the start node.
1523 my $topo_start = $self->equivalence( $self->start->id );
c9bf3dbf 1524 my $node_ranks = { $topo_start => 0 };
910a0a6d 1525 my @curr_origin = ( $topo_start );
1526 # A little iterative function.
1527 while( @curr_origin ) {
359944f7 1528 @curr_origin = _assign_rank( $self->equivalence_graph,
1529 $node_ranks, @curr_origin );
910a0a6d 1530 }
1531 # Transfer our rankings from the topological graph to the real one.
1532 foreach my $r ( $self->readings ) {
359944f7 1533 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1534 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1535 } else {
63778331 1536 # Die. Find the last rank we calculated.
359944f7 1537 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1538 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1539 $self->readings;
1540 my $last = pop @all_defined;
1541 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1542 }
de51424a 1543 }
bfcbcecb 1544 # Do we need to invalidate the cached data?
1545 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1546 foreach my $r ( $self->readings ) {
7c293912 1547 next if defined( $existing_ranks{$r} )
1548 && $existing_ranks{$r} == $r->rank;
c1915ab9 1549 # Something has changed, so clear the cache
bfcbcecb 1550 $self->_clear_cache;
c1915ab9 1551 # ...and recalculate the common readings.
1552 $self->calculate_common_readings();
b365fbae 1553 last;
1554 }
1555 }
c1915ab9 1556 # The graph calculation information is now up to date.
1557 $self->_graphcalc_done(1);
8e1394aa 1558}
3a1f2523 1559
910a0a6d 1560sub _assign_rank {
c9bf3dbf 1561 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1562 # Look at each of the children of @current_nodes. If all the child's
1563 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1564 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1565 # parent gets a rank.
910a0a6d 1566 my @next_nodes;
1567 foreach my $c ( @current_nodes ) {
c9bf3dbf 1568 warn "Current reading $c has no rank!"
1569 unless exists $node_ranks->{$c};
1570 # print STDERR "Looking at child of node $c, rank "
1571 # . $node_ranks->{$c} . "\n";
1572 foreach my $child ( $graph->successors( $c ) ) {
1573 next if exists $node_ranks->{$child};
910a0a6d 1574 my $highest_rank = -1;
1575 my $skip = 0;
c9bf3dbf 1576 foreach my $parent ( $graph->predecessors( $child ) ) {
1577 if( exists $node_ranks->{$parent} ) {
1578 $highest_rank = $node_ranks->{$parent}
1579 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1580 } else {
1581 $skip = 1;
1582 last;
1583 }
1584 }
1585 next if $skip;
c9bf3dbf 1586 my $c_rank = $highest_rank + 1;
1587 # print STDERR "Assigning rank $c_rank to node $child \n";
1588 $node_ranks->{$child} = $c_rank;
910a0a6d 1589 push( @next_nodes, $child );
1590 }
1591 }
1592 return @next_nodes;
4cdd82f1 1593}
910a0a6d 1594
c1915ab9 1595sub _clear_cache {
1596 my $self = shift;
1597 $self->wipe_svg if $self->has_cached_svg;
1598 $self->wipe_table if $self->has_cached_table;
1599}
1600
1601
4e5a7b2c 1602=head2 flatten_ranks
1603
1604A convenience method for parsing collation data. Searches the graph for readings
1605with the same text at the same rank, and merges any that are found.
1606
1607=cut
1608
0e476982 1609sub flatten_ranks {
1610 my $self = shift;
1611 my %unique_rank_rdg;
bf6e338d 1612 my $changed;
0e476982 1613 foreach my $rdg ( $self->readings ) {
1614 next unless $rdg->has_rank;
1615 my $key = $rdg->rank . "||" . $rdg->text;
1616 if( exists $unique_rank_rdg{$key} ) {
1617 # Combine!
56eefa04 1618 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1619 $changed = 1;
0e476982 1620 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1621 # TODO see if this now makes a common point.
0e476982 1622 } else {
1623 $unique_rank_rdg{$key} = $rdg;
1624 }
1625 }
bf6e338d 1626 # If we merged readings, the ranks are still fine but the alignment
1627 # table is wrong. Wipe it.
1628 $self->wipe_table() if $changed;
0e476982 1629}
4633f9e4 1630
1631
d4b75f44 1632=head2 calculate_common_readings
1633
1634Goes through the graph identifying the readings that appear in every witness
1635(apart from those with lacunae at that spot.) Marks them as common and returns
1636the list.
1637
1638=begin testing
1639
1640use Text::Tradition;
1641
1642my $cxfile = 't/data/Collatex-16.xml';
1643my $t = Text::Tradition->new(
1644 'name' => 'inline',
1645 'input' => 'CollateX',
1646 'file' => $cxfile,
1647 );
1648my $c = $t->collation;
1649
1650my @common = $c->calculate_common_readings();
1651is( scalar @common, 8, "Found correct number of common readings" );
1652my @marked = sort $c->common_readings();
1653is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1654my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1655is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1656
1657=end testing
1658
1659=cut
1660
1661sub calculate_common_readings {
1662 my $self = shift;
1663 my @common;
c1915ab9 1664 map { $_->is_common( 0 ) } $self->readings;
1665 # Implicitly calls calculate_ranks
1dd07bda 1666 my $table = $self->alignment_table;
d4b75f44 1667 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1668 my @row = map { $_->{'tokens'}->[$idx]
1669 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1670 @{$table->{'alignment'}};
d4b75f44 1671 my %hash;
1672 foreach my $r ( @row ) {
1673 if( $r ) {
1674 $hash{$r->id} = $r unless $r->is_meta;
1675 } else {
1676 $hash{'UNDEF'} = $r;
1677 }
1678 }
1679 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1680 my( $r ) = values %hash;
1681 $r->is_common( 1 );
1682 push( @common, $r );
1683 }
1684 }
1685 return @common;
1686}
1687
861c3e27 1688=head2 text_from_paths
1689
1690Calculate the text array for all witnesses from the path, for later consistency
1691checking. Only to be used if there is no non-graph-based way to know the
1692original texts.
1693
1694=cut
1695
1696sub text_from_paths {
1697 my $self = shift;
1698 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1699 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1700 my @text;
1701 foreach my $r ( @readings ) {
1702 next if $r->is_meta;
1703 push( @text, $r->text );
1704 }
861c3e27 1705 $wit->text( \@text );
1706 if( $wit->is_layered ) {
5164a6f0 1707 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1708 $wit->sigil.$self->ac_label );
1709 my @uctext;
1710 foreach my $r ( @ucrdgs ) {
1711 next if $r->is_meta;
1712 push( @uctext, $r->text );
1713 }
1714 $wit->layertext( \@uctext );
861c3e27 1715 }
1716 }
1717}
0e476982 1718
4e5a7b2c 1719=head1 UTILITY FUNCTIONS
1720
1721=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1722
4e5a7b2c 1723Find the last reading that occurs in sequence before both the given readings.
414cc046 1724At the very least this should be $self->start.
4e5a7b2c 1725
1726=head2 common_successor( $reading_a, $reading_b )
1727
1728Find the first reading that occurs in sequence after both the given readings.
414cc046 1729At the very least this should be $self->end.
4e5a7b2c 1730
22222af9 1731=begin testing
1732
1733use Text::Tradition;
1734
1735my $cxfile = 't/data/Collatex-16.xml';
1736my $t = Text::Tradition->new(
1737 'name' => 'inline',
1738 'input' => 'CollateX',
1739 'file' => $cxfile,
1740 );
1741my $c = $t->collation;
1742
679f17e1 1743is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1744 'n20', "Found correct common predecessor" );
679f17e1 1745is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1746 '__END__', "Found correct common successor" );
22222af9 1747
4e5a7b2c 1748is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1749 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1750is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1751 '__END__', "Found correct common successor for readings on same path" );
22222af9 1752
1753=end testing
1754
1755=cut
1756
1757## Return the closest reading that is a predecessor of both the given readings.
1758sub common_predecessor {
1759 my $self = shift;
4e5a7b2c 1760 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1761 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1762}
1763
1764sub common_successor {
1765 my $self = shift;
4e5a7b2c 1766 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1767 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1768}
1769
414cc046 1770
1771# TODO think about how to do this without ranks...
027d819c 1772sub _common_in_path {
22222af9 1773 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1774 my $iter = $self->end->rank;
22222af9 1775 my @candidates;
414cc046 1776 my @last_r1 = ( $r1 );
1777 my @last_r2 = ( $r2 );
1778 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1779 my %all_seen;
414cc046 1780 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1781 while( !@candidates ) {
414cc046 1782 last unless $iter--; # Avoid looping infinitely
1783 # Iterate separately down the graph from r1 and r2
1784 my( @new_lc1, @new_lc2 );
1785 foreach my $lc ( @last_r1 ) {
1786 foreach my $p ( $lc->$dir ) {
1787 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1788 # print STDERR "Path candidate $p from $lc\n";
1789 push( @candidates, $p );
002e3600 1790 } elsif( !$all_seen{$p->id} ) {
414cc046 1791 $all_seen{$p->id} = 'r1';
1792 push( @new_lc1, $p );
1793 }
1794 }
1795 }
1796 foreach my $lc ( @last_r2 ) {
22222af9 1797 foreach my $p ( $lc->$dir ) {
414cc046 1798 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1799 # print STDERR "Path candidate $p from $lc\n";
22222af9 1800 push( @candidates, $p );
002e3600 1801 } elsif( !$all_seen{$p->id} ) {
414cc046 1802 $all_seen{$p->id} = 'r2';
1803 push( @new_lc2, $p );
22222af9 1804 }
1805 }
1806 }
414cc046 1807 @last_r1 = @new_lc1;
1808 @last_r2 = @new_lc2;
22222af9 1809 }
1810 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1811 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1812}
1813
63778331 1814sub throw {
1815 Text::Tradition::Error->throw(
1816 'ident' => 'Collation error',
1817 'message' => $_[0],
1818 );
1819}
1820
dd3b58b0 1821no Moose;
1822__PACKAGE__->meta->make_immutable;
e867486f 1823
027d819c 1824=head1 LICENSE
e867486f 1825
027d819c 1826This package is free software and is provided "as is" without express
1827or implied warranty. You can redistribute it and/or modify it under
1828the same terms as Perl itself.
e867486f 1829
027d819c 1830=head1 AUTHOR
e867486f 1831
027d819c 1832Tara L Andrews E<lt>aurum@cpan.orgE<gt>