refrain from recording normal form in graphml if it is unchanged
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
56772e8c 302 # All meta readings save 'start' and 'end' get disregarded for relationships.
303 unless( $reading->is_nonrel ) {
304 $self->relations->add_reading( $reading->id );
305 }
3a2ebbf4 306 return $reading;
eca16057 307};
308
3a2ebbf4 309around del_reading => sub {
310 my $orig = shift;
311 my $self = shift;
312 my $arg = shift;
313
56772e8c 314 unless( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
315 $arg = $self->reading( $arg )
3a2ebbf4 316 }
56772e8c 317 my $argid = $arg->id;
3a2ebbf4 318 # Remove the reading from the graphs.
c1915ab9 319 $self->_graphcalc_done(0);
4e483aa5 320 $self->_clear_cache; # Explicitly clear caches to GC the reading
56772e8c 321 $self->sequence->delete_vertex( $argid );
322 $self->relations->delete_reading( $argid )
323 unless $arg->is_nonrel;
3a2ebbf4 324
325 # Carry on.
56772e8c 326 $self->$orig( $argid );
3a2ebbf4 327};
7854e12e 328
4e483aa5 329=begin testing
330
331use Text::Tradition;
332
333my $cxfile = 't/data/Collatex-16.xml';
334my $t = Text::Tradition->new(
335 'name' => 'inline',
336 'input' => 'CollateX',
337 'file' => $cxfile,
338 );
339my $c = $t->collation;
340
341my $rno = scalar $c->readings;
342# Split n21 for testing purposes
343my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
344my $old_r = $c->reading( 'n21' );
345$old_r->alter_text( 'to' );
346$c->del_path( 'n20', 'n21', 'A' );
347$c->add_path( 'n20', 'n21p0', 'A' );
348$c->add_path( 'n21p0', 'n21', 'A' );
349$c->flatten_ranks();
350ok( $c->reading( 'n21p0' ), "New reading exists" );
351is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
352
679f17e1 353# Combine n3 and n4 ( with his )
4e483aa5 354$c->merge_readings( 'n3', 'n4', 1 );
355ok( !$c->reading('n4'), "Reading n4 is gone" );
356is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
357
679f17e1 358# Collapse n9 and n10 ( rood / root )
359$c->merge_readings( 'n9', 'n10' );
360ok( !$c->reading('n10'), "Reading n10 is gone" );
361is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 362
363# Combine n21 and n21p0
364my $remaining = $c->reading('n21');
365$remaining ||= $c->reading('n22'); # one of these should still exist
366$c->merge_readings( 'n21p0', $remaining, 1 );
367ok( !$c->reading('n21'), "Reading $remaining is gone" );
368is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
369
370=end testing
371
372=cut
7854e12e 373
3a2ebbf4 374sub merge_readings {
375 my $self = shift;
376
56772e8c 377 # Sanity check
378 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
379 my $mergemeta = $kept_obj->is_meta;
380 throw( "Cannot merge meta and non-meta reading" )
381 unless ( $mergemeta && $del_obj->is_meta )
382 || ( !$mergemeta && !$del_obj->is_meta );
383 if( $mergemeta ) {
384 throw( "Cannot merge with start or end node" )
385 if( $kept_obj eq $self->start || $kept_obj eq $self->end
386 || $del_obj eq $self->start || $del_obj eq $self->end );
387 }
3a2ebbf4 388 # We only need the IDs for adding paths to the graph, not the reading
389 # objects themselves.
56772e8c 390 my $kept = $kept_obj->id;
391 my $deleted = $del_obj->id;
c1915ab9 392 $self->_graphcalc_done(0);
10943ab0 393
3a2ebbf4 394 # The kept reading should inherit the paths and the relationships
395 # of the deleted reading.
396 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
397 my @vector = ( $kept );
398 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
399 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 400 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 401 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
402 $self->sequence->add_edge( @vector );
403 my $fwits = $self->sequence->get_edge_attributes( @vector );
404 @wits{keys %$fwits} = values %$fwits;
405 $self->sequence->set_edge_attributes( @vector, \%wits );
406 }
56772e8c 407 $self->relations->merge_readings( $kept, $deleted, $combine )
408 unless $mergemeta;
3a2ebbf4 409
410 # Do the deletion deed.
4e483aa5 411 if( $combine ) {
4e483aa5 412 my $joinstr = $combine_char;
413 unless( defined $joinstr ) {
414 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
415 $joinstr = $self->wordsep unless defined $joinstr;
416 }
417 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 418 }
3a2ebbf4 419 $self->del_reading( $deleted );
420}
7854e12e 421
3265b0ce 422
3a2ebbf4 423# Helper function for manipulating the graph.
424sub _stringify_args {
4e483aa5 425 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 426 $first = $first->id
427 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
428 $second = $second->id
429 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 430 return( $first, $second, @args );
3a2ebbf4 431}
df6d9812 432
4e5a7b2c 433# Helper function for manipulating the graph.
434sub _objectify_args {
435 my( $self, $first, $second, $arg ) = @_;
436 $first = $self->reading( $first )
437 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
438 $second = $self->reading( $second )
439 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
440 return( $first, $second, $arg );
441}
3a2ebbf4 442### Path logic
443
444sub add_path {
445 my $self = shift;
446
447 # We only need the IDs for adding paths to the graph, not the reading
448 # objects themselves.
56772e8c 449 my( $source, $target, $wit ) = $self->_objectify_args( @_ );
3a2ebbf4 450
c1915ab9 451 $self->_graphcalc_done(0);
3a2ebbf4 452 # Connect the readings
359944f7 453 unless( $self->sequence->has_edge( $source, $target ) ) {
454 $self->sequence->add_edge( $source, $target );
455 $self->relations->add_equivalence_edge( $source, $target );
456 }
3a2ebbf4 457 # Note the witness in question
458 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 459}
b15511bf 460
3a2ebbf4 461sub del_path {
462 my $self = shift;
49d4f2ac 463 my @args;
464 if( ref( $_[0] ) eq 'ARRAY' ) {
465 my $e = shift @_;
466 @args = ( @$e, @_ );
467 } else {
468 @args = @_;
469 }
3a2ebbf4 470
471 # We only need the IDs for adding paths to the graph, not the reading
472 # objects themselves.
49d4f2ac 473 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 474
c1915ab9 475 $self->_graphcalc_done(0);
3a2ebbf4 476 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 477 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 478 }
479 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
480 $self->sequence->delete_edge( $source, $target );
359944f7 481 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 482 }
784877d9 483}
484
3a2ebbf4 485
15d2d3df 486# Extra graph-alike utility
487sub has_path {
3a2ebbf4 488 my $self = shift;
489 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
490 return undef unless $self->sequence->has_edge( $source, $target );
491 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 492}
493
4e5a7b2c 494=head2 clear_witness( @sigil_list )
3a2ebbf4 495
4e5a7b2c 496Clear the given witnesses out of the collation entirely, removing references
497to them in paths, and removing readings that belong only to them. Should only
498be called via $tradition->del_witness.
3a2ebbf4 499
500=cut
501
4e5a7b2c 502sub clear_witness {
503 my( $self, @sigils ) = @_;
504
c1915ab9 505 $self->_graphcalc_done(0);
4e5a7b2c 506 # Clear the witness(es) out of the paths
507 foreach my $e ( $self->paths ) {
508 foreach my $sig ( @sigils ) {
509 $self->del_path( $e, $sig );
510 }
511 }
512
513 # Clear out the newly unused readings
514 foreach my $r ( $self->readings ) {
515 unless( $self->reading_witnesses( $r ) ) {
516 $self->del_reading( $r );
517 }
518 }
519}
3a2ebbf4 520
521sub add_relationship {
522 my $self = shift;
22222af9 523 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 524 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 525 $self->_graphcalc_done(0);
63778331 526 return @vectors;
22222af9 527}
ef9d481f 528
ca6e6095 529around qw/ get_relationship del_relationship / => sub {
530 my $orig = shift;
531 my $self = shift;
532 my @args = @_;
533 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
534 @args = @{$_[0]};
535 }
536 my( $source, $target ) = $self->_stringify_args( @args );
537 $self->$orig( $source, $target );
538};
539
22222af9 540=head2 reading_witnesses( $reading )
910a0a6d 541
22222af9 542Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 543
22222af9 544=cut
1d310495 545
1d310495 546sub reading_witnesses {
547 my( $self, $reading ) = @_;
548 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 549 # arbitrarily chosen "incoming". Thus, special-case the start node.
550 if( $reading eq $self->start ) {
551 return map { $_->sigil } $self->tradition->witnesses;
552 }
1d310495 553 my %all_witnesses;
554 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
555 my $wits = $self->sequence->get_edge_attributes( @$e );
556 @all_witnesses{ keys %$wits } = 1;
557 }
c12bb878 558 my $acstr = $self->ac_label;
559 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
560 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
561 }
1d310495 562 return keys %all_witnesses;
910a0a6d 563}
564
4e5a7b2c 565=head1 OUTPUT METHODS
8e1394aa 566
0ecb975c 567=head2 as_svg( \%options )
8e1394aa 568
0068967c 569Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 570See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 571
572=cut
573
574sub as_svg {
0ecb975c 575 my( $self, $opts ) = @_;
bfcbcecb 576 throw( "Need GraphViz installed to output SVG" )
577 unless File::Which::which( 'dot' );
e247aad1 578 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 579 $self->calculate_ranks()
580 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
e247aad1 581 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 582 my @cmd = qw/dot -Tsvg/;
583 my( $svg, $err );
584 my $dotfile = File::Temp->new();
585 ## USE FOR DEBUGGING
586 # $dotfile->unlink_on_destroy(0);
587 binmode $dotfile, ':utf8';
588 print $dotfile $self->as_dot( $opts );
589 push( @cmd, $dotfile->filename );
590 run( \@cmd, ">", binary(), \$svg );
e247aad1 591 $svg = decode_utf8( $svg );
592 $self->cached_svg( $svg ) unless $want_subgraph;
593 return $svg;
594 } else {
595 return $self->cached_svg;
b365fbae 596 }
8e1394aa 597}
598
b22576c6 599
0ecb975c 600=head2 as_dot( \%options )
b22576c6 601
0ecb975c 602Returns a string that is the collation graph expressed in dot
603(i.e. GraphViz) format. Options include:
b22576c6 604
0ecb975c 605=over 4
b22576c6 606
0ecb975c 607=item * from
b22576c6 608
0ecb975c 609=item * to
df6d9812 610
0ecb975c 611=item * color_common
612
613=back
df6d9812 614
615=cut
616
617sub as_dot {
0ecb975c 618 my( $self, $opts ) = @_;
619 my $startrank = $opts->{'from'} if $opts;
620 my $endrank = $opts->{'to'} if $opts;
621 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 622 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
623 && $self->end->rank > 100;
6648ee3d 624 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 625
b22576c6 626 # Check the arguments
627 if( $startrank ) {
628 return if $endrank && $startrank > $endrank;
629 return if $startrank > $self->end->rank;
630 }
631 if( defined $endrank ) {
632 return if $endrank < 0;
f1b3b33a 633 $endrank = undef if $endrank == $self->end->rank;
b22576c6 634 }
635
67da8d6c 636 my $graph_name = $self->tradition->name;
637 $graph_name =~ s/[^\w\s]//g;
638 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 639
640 my %graph_attrs = (
641 'rankdir' => 'LR',
642 'bgcolor' => 'none',
643 );
644 my %node_attrs = (
b8990398 645 'fontsize' => 14,
f13b5582 646 'fillcolor' => 'white',
647 'style' => 'filled',
648 'shape' => 'ellipse'
649 );
650 my %edge_attrs = (
651 'arrowhead' => 'open',
652 'color' => '#000000',
653 'fontcolor' => '#000000',
654 );
655
67da8d6c 656 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 657 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
658 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 659
b22576c6 660 # Output substitute start/end readings if necessary
661 if( $startrank ) {
43c94341 662 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 663 }
664 if( $endrank ) {
43c94341 665 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 666 }
b365fbae 667 if( $STRAIGHTENHACK ) {
668 ## HACK part 1
43c94341 669 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
670 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 671 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
672 }
b22576c6 673 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 674 # Sort the readings by rank if we have ranks; this speeds layout.
675 my @all_readings = $self->end->has_rank
676 ? sort { $a->rank <=> $b->rank } $self->readings
677 : $self->readings;
4633f9e4 678 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 679 foreach my $reading ( @all_readings ) {
b22576c6 680 # Only output readings within our rank range.
681 next if $startrank && $reading->rank < $startrank;
682 next if $endrank && $reading->rank > $endrank;
683 $used{$reading->id} = 1;
910a0a6d 684 # Need not output nodes without separate labels
3a2ebbf4 685 next if $reading->id eq $reading->text;
d4b75f44 686 my $rattrs;
30f0df34 687 my $label = $reading->text;
629e27b0 688 $label .= '-' if $reading->join_next;
689 $label = "-$label" if $reading->join_prior;
8f9cab7b 690 $label =~ s/\"/\\\"/g;
d4b75f44 691 $rattrs->{'label'} = $label;
10e4b1ac 692 $rattrs->{'id'} = $reading->id;
0ecb975c 693 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 694 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 695 }
3a2ebbf4 696
30ddc24c 697 # Add the real edges. Need to weight one edge per rank jump, in a
698 # continuous line.
b365fbae 699 # my $weighted = $self->_add_edge_weights;
b22576c6 700 my @edges = $self->paths;
3bdec618 701 my( %substart, %subend );
b22576c6 702 foreach my $edge ( @edges ) {
703 # Do we need to output this edge?
508fd430 704 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 705 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 706 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 707
b22576c6 708 # Account for the rank gap if necessary
30ddc24c 709 my $rank0 = $self->reading( $edge->[0] )->rank
710 if $self->reading( $edge->[0] )->has_rank;
711 my $rank1 = $self->reading( $edge->[1] )->rank
712 if $self->reading( $edge->[1] )->has_rank;
713 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
714 $variables->{'minlen'} = $rank1 - $rank0;
715 }
716
717 # Add the calculated edge weights
b365fbae 718 # if( exists $weighted->{$edge->[0]}
e247aad1 719 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
720 # # $variables->{'color'} = 'red';
721 # $variables->{'weight'} = 3.0;
722 # }
30ddc24c 723
508fd430 724 # EXPERIMENTAL: make edge width reflect no. of witnesses
725 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
726 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
727
f13b5582 728 my $varopts = _dot_attr_string( $variables );
729 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
730 $edge->[0], $edge->[1], $varopts );
3bdec618 731 } elsif( $used{$edge->[0]} ) {
732 $subend{$edge->[0]} = 1;
733 } elsif( $used{$edge->[1]} ) {
734 $substart{$edge->[1]} = 1;
b22576c6 735 }
df6d9812 736 }
3bdec618 737 # Add substitute start and end edges if necessary
738 foreach my $node ( keys %substart ) {
027d819c 739 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 740 my $variables = { %edge_attrs, 'label' => $witstr };
741 my $varopts = _dot_attr_string( $variables );
43c94341 742 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;";
3bdec618 743 }
744 foreach my $node ( keys %subend ) {
027d819c 745 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 746 my $variables = { %edge_attrs, 'label' => $witstr };
747 my $varopts = _dot_attr_string( $variables );
43c94341 748 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;";
3bdec618 749 }
b365fbae 750 # HACK part 2
751 if( $STRAIGHTENHACK ) {
43c94341 752 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
753 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 754 }
30ddc24c 755
df6d9812 756 $dot .= "}\n";
757 return $dot;
758}
759
f13b5582 760sub _dot_attr_string {
761 my( $hash ) = @_;
762 my @attrs;
763 foreach my $k ( sort keys %$hash ) {
764 my $v = $hash->{$k};
765 push( @attrs, $k.'="'.$v.'"' );
766 }
767 return( '[ ' . join( ', ', @attrs ) . ' ]' );
768}
769
30ddc24c 770sub _add_edge_weights {
771 my $self = shift;
772 # Walk the graph from START to END, choosing the successor node with
773 # the largest number of witness paths each time.
774 my $weighted = {};
775 my $curr = $self->start->id;
008fc8a6 776 my $ranked = $self->end->has_rank;
30ddc24c 777 while( $curr ne $self->end->id ) {
008fc8a6 778 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 779 my @succ = sort { $self->path_witnesses( $curr, $a )
780 <=> $self->path_witnesses( $curr, $b ) }
781 $self->sequence->successors( $curr );
782 my $next = pop @succ;
008fc8a6 783 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 784 # Try to avoid lacunae in the weighted path.
008fc8a6 785 while( @succ &&
786 ( $self->reading( $next )->is_lacuna ||
787 $nextrank - $rank > 1 ) ){
30ddc24c 788 $next = pop @succ;
789 }
790 $weighted->{$curr} = $next;
791 $curr = $next;
792 }
793 return $weighted;
794}
795
027d819c 796=head2 path_witnesses( $edge )
797
798Returns the list of sigils whose witnesses are associated with the given edge.
799The edge can be passed as either an array or an arrayref of ( $source, $target ).
800
801=cut
802
3a2ebbf4 803sub path_witnesses {
804 my( $self, @edge ) = @_;
805 # If edge is an arrayref, cope.
806 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
807 my $e = shift @edge;
808 @edge = @$e;
809 }
810 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 811 return @wits;
3a2ebbf4 812}
813
7f9f05e8 814# Helper function. Make a display label for the given witnesses, showing a.c.
815# witnesses only where the main witness is not also in the list.
027d819c 816sub _path_display_label {
508fd430 817 my $self = shift;
7f9f05e8 818 my %wits;
819 map { $wits{$_} = 1 } @_;
820
821 # If an a.c. wit is listed, remove it if the main wit is also listed.
822 # Otherwise keep it for explicit listing.
823 my $aclabel = $self->ac_label;
824 my @disp_ac;
825 foreach my $w ( sort keys %wits ) {
826 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
827 if( exists $wits{$1} ) {
828 delete $wits{$w};
829 } else {
830 push( @disp_ac, $w );
831 }
832 }
833 }
834
835 # See if we are in a majority situation.
8f9cab7b 836 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 837 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 838 if( scalar keys %wits > $maj ) {
839 unshift( @disp_ac, 'majority' );
840 return join( ', ', @disp_ac );
8f9cab7b 841 } else {
7f9f05e8 842 return join( ', ', sort keys %wits );
8f9cab7b 843 }
844}
1dd07bda 845
bf6e338d 846=head2 readings_at_rank( $rank )
1dd07bda 847
bf6e338d 848Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 849
850=cut
851
bf6e338d 852sub readings_at_rank {
1dd07bda 853 my( $self, $rank ) = @_;
bf6e338d 854 my $table = $self->alignment_table;
855 # Table rank is real rank - 1.
856 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
857 my %readings;
858 foreach my $e ( @elements ) {
859 next unless ref( $e ) eq 'HASH';
860 next unless exists $e->{'t'};
861 $readings{$e->{'t'}->id} = $e->{'t'};
862 }
863 return values %readings;
1dd07bda 864}
8f9cab7b 865
4e5a7b2c 866=head2 as_graphml
8e1394aa 867
4e5a7b2c 868Returns a GraphML representation of the collation. The GraphML will contain
869two graphs. The first expresses the attributes of the readings and the witness
870paths that link them; the second expresses the relationships that link the
871readings. This is the native transfer format for a tradition.
8e1394aa 872
56eefa04 873=begin testing
874
875use Text::Tradition;
876
877my $READINGS = 311;
878my $PATHS = 361;
879
880my $datafile = 't/data/florilegium_tei_ps.xml';
881my $tradition = Text::Tradition->new( 'input' => 'TEI',
882 'name' => 'test0',
883 'file' => $datafile,
884 'linear' => 1 );
885
886ok( $tradition, "Got a tradition object" );
887is( scalar $tradition->witnesses, 13, "Found all witnesses" );
888ok( $tradition->collation, "Tradition has a collation" );
889
890my $c = $tradition->collation;
891is( scalar $c->readings, $READINGS, "Collation has all readings" );
892is( scalar $c->paths, $PATHS, "Collation has all paths" );
893is( scalar $c->relationships, 0, "Collation has all relationships" );
894
895# Add a few relationships
896$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
897$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
898$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
899
900# Now write it to GraphML and parse it again.
901
902my $graphml = $c->as_graphml;
903my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
904is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
905is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
906is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
907
2a812726 908# Now add a stemma, write to GraphML, and parse again.
909my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
910is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
911is( $tradition->stemmata, 1, "Tradition now has the stemma" );
912$graphml = $c->as_graphml;
913like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
914
56eefa04 915=end testing
916
8e1394aa 917=cut
918
919sub as_graphml {
a30ca502 920 my( $self, $options ) = @_;
3d14b48e 921 $self->calculate_ranks unless $self->_graphcalc_done;
922
a30ca502 923 my $start = $options->{'from'}
924 ? $self->reading( $options->{'from'} ) : $self->start;
925 my $end = $options->{'to'}
926 ? $self->reading( $options->{'to'} ) : $self->end;
927 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
928 throw( 'Start node must be before end node' );
929 }
930 # The readings need to be ranked for this to work.
931 $start = $self->start unless $start->has_rank;
932 $end = $self->end unless $end->has_rank;
414cc046 933 my $rankoffset = 0;
934 unless( $start eq $self->start ) {
935 $rankoffset = $start->rank - 1;
936 }
a30ca502 937 my %use_readings;
938
8e1394aa 939 # Some namespaces
940 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
941 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
942 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 943 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 944
945 # Create the document and root node
946 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
947 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
948 $graphml->setDocumentElement( $root );
949 $root->setNamespace( $xsi_ns, 'xsi', 0 );
950 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 951
952 # List of attribute types to save on our objects and their corresponding
953 # GraphML types
954 my %save_types = (
955 'Str' => 'string',
956 'Int' => 'int',
957 'Bool' => 'boolean',
10e4b1ac 958 'ReadingID' => 'string',
bbd064a9 959 'RelationshipType' => 'string',
960 'RelationshipScope' => 'string',
961 );
962
963 # List of attribute names *not* to save on our objects.
964 # We will also not save any attribute beginning with _.
965 my %skipsave;
966 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 967
bbd064a9 968 # Add the data keys for the graph. Include an extra key 'version' for the
969 # GraphML output version.
e309421a 970 my %graph_data_keys;
971 my $gdi = 0;
bbd064a9 972 my %graph_attributes = ( 'version' => 'string' );
973 # Graph attributes include those of Tradition and those of Collation.
974 my %gattr_from;
975 my $tmeta = $self->tradition->meta;
976 my $cmeta = $self->meta;
977 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
978 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
979 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
980 next if $attr->name =~ /^_/;
981 next if $skipsave{$attr->name};
982 next unless $save_types{$attr->type_constraint->name};
983 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
984 }
2a812726 985 # Extra custom key for the tradition stemma(ta)
986 $graph_attributes{'stemmata'} = 'string';
bbd064a9 987
988 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 989 $graph_data_keys{$datum} = 'dg'.$gdi++;
990 my $key = $root->addNewChild( $graphml_ns, 'key' );
991 $key->setAttribute( 'attr.name', $datum );
bbd064a9 992 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 993 $key->setAttribute( 'for', 'graph' );
994 $key->setAttribute( 'id', $graph_data_keys{$datum} );
995 }
f6066bac 996
bbd064a9 997 # Add the data keys for reading nodes
998 my %reading_attributes;
999 my $rmeta = Text::Tradition::Collation::Reading->meta;
1000 foreach my $attr( $rmeta->get_all_attributes ) {
1001 next if $attr->name =~ /^_/;
1002 next if $skipsave{$attr->name};
1003 next unless $save_types{$attr->type_constraint->name};
1004 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1005 }
7cd9f181 1006 # Extra custom key for the reading morphology
1007 $reading_attributes{'lexemes'} = 'string';
1008
ef9d481f 1009 my %node_data_keys;
1010 my $ndi = 0;
bbd064a9 1011 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1012 $node_data_keys{$datum} = 'dn'.$ndi++;
1013 my $key = $root->addNewChild( $graphml_ns, 'key' );
1014 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1015 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1016 $key->setAttribute( 'for', 'node' );
1017 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1018 }
1019
bbd064a9 1020 # Add the data keys for edges, that is, paths and relationships. Path
1021 # data does not come from a Moose class so is here manually.
ef9d481f 1022 my $edi = 0;
1023 my %edge_data_keys;
bbd064a9 1024 my %edge_attributes = (
3a2ebbf4 1025 witness => 'string', # ID/label for a path
3a2ebbf4 1026 extra => 'boolean', # Path key
3a2ebbf4 1027 );
bbd064a9 1028 my @path_attributes = keys %edge_attributes; # track our manual additions
1029 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1030 foreach my $attr( $pmeta->get_all_attributes ) {
1031 next if $attr->name =~ /^_/;
1032 next if $skipsave{$attr->name};
1033 next unless $save_types{$attr->type_constraint->name};
1034 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1035 }
1036 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1037 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1038 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1039 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1040 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1041 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1042 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1043 }
3a2ebbf4 1044
cc31ebaa 1045 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1046 my $xmlidname = $self->tradition->name;
1047 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1048 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1049 $xmlidname = '_'.$xmlidname;
1050 }
2c669bca 1051 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1052 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1053 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1054 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1055 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1056 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1057 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1058 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1059
2a812726 1060 # Tradition/collation attribute data
bbd064a9 1061 foreach my $datum ( keys %graph_attributes ) {
1062 my $value;
1063 if( $datum eq 'version' ) {
2a812726 1064 $value = '3.2';
1065 } elsif( $datum eq 'stemmata' ) {
1066 my @stemstrs;
1067 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1068 $self->tradition->stemmata;
1069 $value = join( "\n", @stemstrs );
bbd064a9 1070 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1071 $value = $self->tradition->$datum;
1072 } else {
1073 $value = $self->$datum;
1074 }
2c669bca 1075 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1076 }
8e1394aa 1077
1078 my $node_ctr = 0;
1079 my %node_hash;
22222af9 1080 # Add our readings to the graph
3a2ebbf4 1081 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1082 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1083 ( $n->rank < $start->rank || $n->rank > $end->rank );
1084 $use_readings{$n->id} = 1;
2c669bca 1085 # Add to the main graph
1086 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1087 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1088 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1089 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1090 foreach my $d ( keys %reading_attributes ) {
255875b8 1091 my $nval = $n->$d;
7cd9f181 1092 # Custom serialization
1093 if( $d eq 'lexemes' ) {
1094 # If nval is a true value, we have lexemes so we need to
1095 # serialize them. Otherwise set nval to undef so that the
1096 # key is excluded from this reading.
1097 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1098 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1099 $nval = undef;
7cd9f181 1100 }
cc31ebaa 1101 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1102 # Adjust the ranks within the subgraph.
cc31ebaa 1103 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1104 : $nval - $rankoffset;
414cc046 1105 }
255875b8 1106 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1107 if defined $nval;
1108 }
b15511bf 1109 }
1110
2c669bca 1111 # Add the path edges to the sequence graph
df6d9812 1112 my $edge_ctr = 0;
3a2ebbf4 1113 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1114 # We add an edge in the graphml for every witness in $e.
a30ca502 1115 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1116 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1117 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1118 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1119 # Skip any path from start to end; that witness is not in the subgraph.
1120 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1121 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1122 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1123 $node_hash{ $e->[0] },
1124 $node_hash{ $e->[1] } );
2c669bca 1125 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1126 $edge_el->setAttribute( 'source', $from );
1127 $edge_el->setAttribute( 'target', $to );
1128 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1129
1130 # It's a witness path, so add the witness
1131 my $base = $wit;
1132 my $key = $edge_data_keys{'witness'};
1133 # Is this an ante-corr witness?
1134 my $aclabel = $self->ac_label;
1135 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1136 # Keep the base witness
1137 $base = $1;
1138 # ...and record that this is an 'extra' reading path
1139 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1140 }
1141 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1142 }
1143 }
1144
cc31ebaa 1145 # Report the actual number of nodes and edges that went in
1146 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1147 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1148
22222af9 1149 # Add the relationship graph to the XML
bbd064a9 1150 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1151 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1152 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1153
94c00c71 1154 # Save and return the thing
1155 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1156 return $result;
df6d9812 1157}
1158
b15511bf 1159sub _add_graphml_data {
1160 my( $el, $key, $value ) = @_;
b15511bf 1161 return unless defined $value;
c9bf3dbf 1162 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1163 $data_el->setAttribute( 'key', $key );
1164 $data_el->appendText( $value );
8e1394aa 1165}
1166
4e5a7b2c 1167=head2 as_csv
910a0a6d 1168
1169Returns a CSV alignment table representation of the collation graph, one
2c669bca 1170row per witness (or witness uncorrected.)
910a0a6d 1171
1172=cut
1173
1174sub as_csv {
3a2ebbf4 1175 my( $self ) = @_;
1dd07bda 1176 my $table = $self->alignment_table;
82fa4d57 1177 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1178 my @result;
2c669bca 1179 # Make the header row
1180 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1181 push( @result, decode_utf8( $csv->string ) );
1182 # Make the rest of the rows
1183 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1184 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1185 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1186 $csv->combine( @row );
910a0a6d 1187 push( @result, decode_utf8( $csv->string ) );
1188 }
3a2ebbf4 1189 return join( "\n", @result );
910a0a6d 1190}
1191
1dd07bda 1192=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1193
566f4595 1194Return a reference to an alignment table, in a slightly enhanced CollateX
1195format which looks like this:
1196
1197 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1198 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1199 { witness => "SIG2",
4e5a7b2c 1200 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1201 ... ],
1202 length => TEXTLEN };
1203
1204If $use_refs is set to 1, the reading object is returned in the table
1205instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1206
1207If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1208keys have a true hash value will be included.
2c669bca 1209
1210=cut
9f3ba6f7 1211
1dd07bda 1212sub alignment_table {
1213 my( $self ) = @_;
c1915ab9 1214 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1215 return $self->cached_table if $self->has_cached_table;
1216
0ecb975c 1217 # Make sure we can do this
1218 throw( "Need a linear graph in order to make an alignment table" )
1219 unless $self->linear;
1220 $self->calculate_ranks unless $self->end->has_rank;
1221
2c669bca 1222 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1223 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1224 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1225 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1226 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1227 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1228 push( @{$table->{'alignment'}},
1229 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1230 if( $wit->is_layered ) {
1231 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1232 $wit->sigil.$self->ac_label );
1dd07bda 1233 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1234 push( @{$table->{'alignment'}},
1235 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1236 }
1237 }
1dd07bda 1238 $self->cached_table( $table );
1239 return $table;
910a0a6d 1240}
1241
1242sub _make_witness_row {
1dd07bda 1243 my( $path, $positions ) = @_;
910a0a6d 1244 my %char_hash;
1245 map { $char_hash{$_} = undef } @$positions;
2c669bca 1246 my $debug = 0;
910a0a6d 1247 foreach my $rdg ( @$path ) {
eca16057 1248 my $rtext = $rdg->text;
1249 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1250 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1251 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1252 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1253 }
1254 my @row = map { $char_hash{$_} } @$positions;
eca16057 1255 # Fill in lacuna markers for undef spots in the row
1256 my $last_el = shift @row;
1257 my @filled_row = ( $last_el );
1258 foreach my $el ( @row ) {
0e476982 1259 # If we are using node reference, make the lacuna node appear many times
1260 # in the table. If not, use the lacuna tag.
1dd07bda 1261 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1262 $el = $last_el;
eca16057 1263 }
1264 push( @filled_row, $el );
1265 $last_el = $el;
1266 }
1267 return @filled_row;
910a0a6d 1268}
1269
4e5a7b2c 1270=head1 NAVIGATION METHODS
910a0a6d 1271
4e5a7b2c 1272=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1273
1274Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1275with $last, for the witness given in $sigil. If a $backup sigil is
1276specified (e.g. when walking a layered witness), it will be used wherever
1277no $sigil path exists. If there is a base text reading, that will be
1278used wherever no path exists for $sigil or $backup.
e2902068 1279
1280=cut
1281
910a0a6d 1282# TODO Think about returning some lazy-eval iterator.
b0b4421a 1283# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1284
e2902068 1285sub reading_sequence {
861c3e27 1286 my( $self, $start, $end, $witness ) = @_;
e2902068 1287
930ff666 1288 $witness = $self->baselabel unless $witness;
e2902068 1289 my @readings = ( $start );
1290 my %seen;
1291 my $n = $start;
3a2ebbf4 1292 while( $n && $n->id ne $end->id ) {
1293 if( exists( $seen{$n->id} ) ) {
63778331 1294 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1295 }
3a2ebbf4 1296 $seen{$n->id} = 1;
910a0a6d 1297
861c3e27 1298 my $next = $self->next_reading( $n, $witness );
44771cf2 1299 unless( $next ) {
63778331 1300 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1301 }
910a0a6d 1302 push( @readings, $next );
1303 $n = $next;
e2902068 1304 }
1305 # Check that the last reading is our end reading.
1306 my $last = $readings[$#readings];
63778331 1307 throw( "Last reading found from " . $start->text .
1308 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1309 unless $last->id eq $end->id;
e2902068 1310
1311 return @readings;
1312}
1313
4e5a7b2c 1314=head2 next_reading( $reading, $sigil );
8e1394aa 1315
4a8828f0 1316Returns the reading that follows the given reading along the given witness
930ff666 1317path.
8e1394aa 1318
1319=cut
1320
4a8828f0 1321sub next_reading {
e2902068 1322 # Return the successor via the corresponding path.
8e1394aa 1323 my $self = shift;
3a2ebbf4 1324 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1325 return undef unless $answer;
3a2ebbf4 1326 return $self->reading( $answer );
8e1394aa 1327}
1328
4e5a7b2c 1329=head2 prior_reading( $reading, $sigil )
8e1394aa 1330
4a8828f0 1331Returns the reading that precedes the given reading along the given witness
930ff666 1332path.
8e1394aa 1333
1334=cut
1335
4a8828f0 1336sub prior_reading {
e2902068 1337 # Return the predecessor via the corresponding path.
8e1394aa 1338 my $self = shift;
3a2ebbf4 1339 my $answer = $self->_find_linked_reading( 'prior', @_ );
1340 return $self->reading( $answer );
8e1394aa 1341}
1342
4a8828f0 1343sub _find_linked_reading {
861c3e27 1344 my( $self, $direction, $node, $path ) = @_;
1345
1346 # Get a backup if we are dealing with a layered witness
1347 my $alt_path;
1348 my $aclabel = $self->ac_label;
1349 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1350 $alt_path = $1;
1351 }
1352
e2902068 1353 my @linked_paths = $direction eq 'next'
3a2ebbf4 1354 ? $self->sequence->edges_from( $node )
1355 : $self->sequence->edges_to( $node );
e2902068 1356 return undef unless scalar( @linked_paths );
8e1394aa 1357
e2902068 1358 # We have to find the linked path that contains all of the
1359 # witnesses supplied in $path.
1360 my( @path_wits, @alt_path_wits );
4e5a7b2c 1361 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1362 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1363 my $base_le;
1364 my $alt_le;
1365 foreach my $le ( @linked_paths ) {
3a2ebbf4 1366 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1367 $base_le = $le;
910a0a6d 1368 }
508fd430 1369 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1370 if( _is_within( \@path_wits, \@le_wits ) ) {
1371 # This is the right path.
1372 return $direction eq 'next' ? $le->[1] : $le->[0];
1373 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1374 $alt_le = $le;
1375 }
8e1394aa 1376 }
e2902068 1377 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1378 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1379 if $alt_le;
e2902068 1380
1381 # Got this far? Return the base path if it exists.
3a2ebbf4 1382 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1383 if $base_le;
e2902068 1384
1385 # Got this far? We have no appropriate path.
2c669bca 1386 warn "Could not find $direction node from " . $node->id
910a0a6d 1387 . " along path $path";
8e1394aa 1388 return undef;
1389}
1390
4a8828f0 1391# Some set logic.
1392sub _is_within {
1393 my( $set1, $set2 ) = @_;
7854e12e 1394 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1395 foreach my $el ( @$set1 ) {
910a0a6d 1396 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1397 }
1398 return $ret;
1399}
1400
4e5a7b2c 1401# Return the string that joins together a list of witnesses for
1402# display on a single path.
1403sub _witnesses_of_label {
1404 my( $self, $label ) = @_;
1405 my $regex = $self->wit_list_separator;
1406 my @answer = split( /\Q$regex\E/, $label );
1407 return @answer;
b0b4421a 1408}
1409
d4b75f44 1410=head2 common_readings
1411
1412Returns the list of common readings in the graph (i.e. those readings that are
1413shared by all non-lacunose witnesses.)
1414
1415=cut
1416
1417sub common_readings {
1418 my $self = shift;
1419 my @common = grep { $_->is_common } $self->readings;
1420 return @common;
1421}
1422
fae52efd 1423=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1424
1425Returns the text of a witness (plus its backup, if we are using a layer)
1426as stored in the collation. The text is returned as a string, where the
1427individual readings are joined with spaces and the meta-readings (e.g.
1428lacunae) are omitted. Optional specification of $start and $end allows
1429the generation of a subset of the witness text.
4e5a7b2c 1430
b0b4421a 1431=cut
1432
1433sub path_text {
861c3e27 1434 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1435 $start = $self->start unless $start;
1436 $end = $self->end unless $end;
861c3e27 1437 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1438 my $pathtext = '';
1439 my $last;
1440 foreach my $r ( @path ) {
6ad2ce78 1441 unless ( $r->join_prior || !$last || $last->join_next ) {
1442 $pathtext .= ' ';
1443 }
1444 $pathtext .= $r->text;
629e27b0 1445 $last = $r;
1446 }
1447 return $pathtext;
b0b4421a 1448}
4e5a7b2c 1449
1450=head1 INITIALIZATION METHODS
1451
1452These are mostly for use by parsers.
1453
1454=head2 make_witness_path( $witness )
1455
1456Link the array of readings contained in $witness->path (and in
1457$witness->uncorrected_path if it exists) into collation paths.
1458Clear out the arrays when finished.
de51424a 1459
4e5a7b2c 1460=head2 make_witness_paths
1461
1462Call make_witness_path for all witnesses in the tradition.
1463
1464=cut
930ff666 1465
7e450e44 1466# For use when a collation is constructed from a base text and an apparatus.
1467# We have the sequences of readings and just need to add path edges.
1f7aa795 1468# When we are done, clear out the witness path attributes, as they are no
1469# longer needed.
1470# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1471
6a222840 1472sub make_witness_paths {
1473 my( $self ) = @_;
910a0a6d 1474 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1475 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1476 $self->make_witness_path( $wit );
7854e12e 1477 }
7854e12e 1478}
1479
6a222840 1480sub make_witness_path {
7854e12e 1481 my( $self, $wit ) = @_;
1482 my @chain = @{$wit->path};
15d2d3df 1483 my $sig = $wit->sigil;
fae52efd 1484 # Add start and end if necessary
1485 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1486 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1487 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1488 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1489 }
1f7aa795 1490 if( $wit->is_layered ) {
d9e873d0 1491 @chain = @{$wit->uncorrected_path};
fae52efd 1492 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1493 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1494 foreach my $idx( 0 .. $#chain-1 ) {
1495 my $source = $chain[$idx];
1496 my $target = $chain[$idx+1];
1497 $self->add_path( $source, $target, $sig.$self->ac_label )
1498 unless $self->has_path( $source, $target, $sig );
1499 }
15d2d3df 1500 }
1f7aa795 1501 $wit->clear_path;
1502 $wit->clear_uncorrected_path;
e2902068 1503}
1504
4e5a7b2c 1505=head2 calculate_ranks
1506
1507Calculate the reading ranks (that is, their aligned positions relative
1508to each other) for the graph. This can only be called on linear collations.
1509
b365fbae 1510=begin testing
1511
1512use Text::Tradition;
1513
1514my $cxfile = 't/data/Collatex-16.xml';
1515my $t = Text::Tradition->new(
1516 'name' => 'inline',
1517 'input' => 'CollateX',
1518 'file' => $cxfile,
1519 );
1520my $c = $t->collation;
1521
1522# Make an svg
bfcbcecb 1523my $table = $c->alignment_table;
1524ok( $c->has_cached_table, "Alignment table was cached" );
1525is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1526$c->calculate_ranks;
bfcbcecb 1527is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1528$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1529isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1530
1531=end testing
1532
4e5a7b2c 1533=cut
1534
910a0a6d 1535sub calculate_ranks {
1536 my $self = shift;
b365fbae 1537 # Save the existing ranks, in case we need to invalidate the cached SVG.
1538 my %existing_ranks;
ac4d7ac2 1539 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1540
1541 # Do the rankings based on the relationship equivalence graph, starting
1542 # with the start node.
56772e8c 1543 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1544
910a0a6d 1545 # Transfer our rankings from the topological graph to the real one.
1546 foreach my $r ( $self->readings ) {
56772e8c 1547 if( $r->is_nonrel ) {
1548 # These are not in the equivalence graph. Grab the rank of the highest
1549 # predecessor + 1.
1550 my @preds = $self->sequence->predecessors( $r );
1551 my $mrank = 0;
1552 map { my $rk = $node_ranks->{$self->equivalence( $_ )} + 1;
1553 $mrank = $rk > $mrank ? $rk : $mrank; }
1554 $self->sequence->predecessors( $r );
1555 throw( "All predecessors of $r unranked!" ) unless $mrank;
1556 $r->rank( $mrank );
1557 } elsif( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1558 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1559 } else {
63778331 1560 # Die. Find the last rank we calculated.
359944f7 1561 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1562 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1563 $self->readings;
1564 my $last = pop @all_defined;
1565 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1566 }
de51424a 1567 }
bfcbcecb 1568 # Do we need to invalidate the cached data?
1569 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1570 foreach my $r ( $self->readings ) {
7c293912 1571 next if defined( $existing_ranks{$r} )
1572 && $existing_ranks{$r} == $r->rank;
c1915ab9 1573 # Something has changed, so clear the cache
bfcbcecb 1574 $self->_clear_cache;
c1915ab9 1575 # ...and recalculate the common readings.
1576 $self->calculate_common_readings();
b365fbae 1577 last;
1578 }
1579 }
c1915ab9 1580 # The graph calculation information is now up to date.
1581 $self->_graphcalc_done(1);
8e1394aa 1582}
3a1f2523 1583
c1915ab9 1584sub _clear_cache {
1585 my $self = shift;
1586 $self->wipe_svg if $self->has_cached_svg;
1587 $self->wipe_table if $self->has_cached_table;
1588}
1589
1590
4e5a7b2c 1591=head2 flatten_ranks
1592
1593A convenience method for parsing collation data. Searches the graph for readings
1594with the same text at the same rank, and merges any that are found.
1595
1596=cut
1597
0e476982 1598sub flatten_ranks {
1599 my $self = shift;
1600 my %unique_rank_rdg;
bf6e338d 1601 my $changed;
0e476982 1602 foreach my $rdg ( $self->readings ) {
1603 next unless $rdg->has_rank;
1604 my $key = $rdg->rank . "||" . $rdg->text;
1605 if( exists $unique_rank_rdg{$key} ) {
1606 # Combine!
56eefa04 1607 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1608 $changed = 1;
0e476982 1609 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1610 # TODO see if this now makes a common point.
0e476982 1611 } else {
1612 $unique_rank_rdg{$key} = $rdg;
1613 }
1614 }
bf6e338d 1615 # If we merged readings, the ranks are still fine but the alignment
1616 # table is wrong. Wipe it.
1617 $self->wipe_table() if $changed;
0e476982 1618}
4633f9e4 1619
1620
d4b75f44 1621=head2 calculate_common_readings
1622
1623Goes through the graph identifying the readings that appear in every witness
1624(apart from those with lacunae at that spot.) Marks them as common and returns
1625the list.
1626
1627=begin testing
1628
1629use Text::Tradition;
1630
1631my $cxfile = 't/data/Collatex-16.xml';
1632my $t = Text::Tradition->new(
1633 'name' => 'inline',
1634 'input' => 'CollateX',
1635 'file' => $cxfile,
1636 );
1637my $c = $t->collation;
1638
1639my @common = $c->calculate_common_readings();
1640is( scalar @common, 8, "Found correct number of common readings" );
1641my @marked = sort $c->common_readings();
1642is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1643my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1644is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1645
1646=end testing
1647
1648=cut
1649
1650sub calculate_common_readings {
1651 my $self = shift;
1652 my @common;
c1915ab9 1653 map { $_->is_common( 0 ) } $self->readings;
1654 # Implicitly calls calculate_ranks
1dd07bda 1655 my $table = $self->alignment_table;
d4b75f44 1656 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1657 my @row = map { $_->{'tokens'}->[$idx]
1658 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1659 @{$table->{'alignment'}};
d4b75f44 1660 my %hash;
1661 foreach my $r ( @row ) {
1662 if( $r ) {
1663 $hash{$r->id} = $r unless $r->is_meta;
1664 } else {
1665 $hash{'UNDEF'} = $r;
1666 }
1667 }
1668 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1669 my( $r ) = values %hash;
1670 $r->is_common( 1 );
1671 push( @common, $r );
1672 }
1673 }
1674 return @common;
1675}
1676
861c3e27 1677=head2 text_from_paths
1678
1679Calculate the text array for all witnesses from the path, for later consistency
1680checking. Only to be used if there is no non-graph-based way to know the
1681original texts.
1682
1683=cut
1684
1685sub text_from_paths {
1686 my $self = shift;
1687 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1688 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1689 my @text;
1690 foreach my $r ( @readings ) {
1691 next if $r->is_meta;
1692 push( @text, $r->text );
1693 }
861c3e27 1694 $wit->text( \@text );
1695 if( $wit->is_layered ) {
5164a6f0 1696 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1697 $wit->sigil.$self->ac_label );
1698 my @uctext;
1699 foreach my $r ( @ucrdgs ) {
1700 next if $r->is_meta;
1701 push( @uctext, $r->text );
1702 }
1703 $wit->layertext( \@uctext );
861c3e27 1704 }
1705 }
1706}
0e476982 1707
4e5a7b2c 1708=head1 UTILITY FUNCTIONS
1709
1710=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1711
4e5a7b2c 1712Find the last reading that occurs in sequence before both the given readings.
414cc046 1713At the very least this should be $self->start.
4e5a7b2c 1714
1715=head2 common_successor( $reading_a, $reading_b )
1716
1717Find the first reading that occurs in sequence after both the given readings.
414cc046 1718At the very least this should be $self->end.
4e5a7b2c 1719
22222af9 1720=begin testing
1721
1722use Text::Tradition;
1723
1724my $cxfile = 't/data/Collatex-16.xml';
1725my $t = Text::Tradition->new(
1726 'name' => 'inline',
1727 'input' => 'CollateX',
1728 'file' => $cxfile,
1729 );
1730my $c = $t->collation;
1731
679f17e1 1732is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1733 'n20', "Found correct common predecessor" );
679f17e1 1734is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1735 '__END__', "Found correct common successor" );
22222af9 1736
4e5a7b2c 1737is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1738 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1739is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1740 '__END__', "Found correct common successor for readings on same path" );
22222af9 1741
1742=end testing
1743
1744=cut
1745
1746## Return the closest reading that is a predecessor of both the given readings.
1747sub common_predecessor {
1748 my $self = shift;
4e5a7b2c 1749 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1750 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1751}
1752
1753sub common_successor {
1754 my $self = shift;
4e5a7b2c 1755 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1756 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1757}
1758
414cc046 1759
1760# TODO think about how to do this without ranks...
027d819c 1761sub _common_in_path {
22222af9 1762 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1763 my $iter = $self->end->rank;
22222af9 1764 my @candidates;
414cc046 1765 my @last_r1 = ( $r1 );
1766 my @last_r2 = ( $r2 );
1767 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1768 my %all_seen;
414cc046 1769 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1770 while( !@candidates ) {
414cc046 1771 last unless $iter--; # Avoid looping infinitely
1772 # Iterate separately down the graph from r1 and r2
1773 my( @new_lc1, @new_lc2 );
1774 foreach my $lc ( @last_r1 ) {
1775 foreach my $p ( $lc->$dir ) {
1776 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1777 # print STDERR "Path candidate $p from $lc\n";
1778 push( @candidates, $p );
002e3600 1779 } elsif( !$all_seen{$p->id} ) {
414cc046 1780 $all_seen{$p->id} = 'r1';
1781 push( @new_lc1, $p );
1782 }
1783 }
1784 }
1785 foreach my $lc ( @last_r2 ) {
22222af9 1786 foreach my $p ( $lc->$dir ) {
414cc046 1787 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1788 # print STDERR "Path candidate $p from $lc\n";
22222af9 1789 push( @candidates, $p );
002e3600 1790 } elsif( !$all_seen{$p->id} ) {
414cc046 1791 $all_seen{$p->id} = 'r2';
1792 push( @new_lc2, $p );
22222af9 1793 }
1794 }
1795 }
414cc046 1796 @last_r1 = @new_lc1;
1797 @last_r2 = @new_lc2;
22222af9 1798 }
1799 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1800 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1801}
1802
63778331 1803sub throw {
1804 Text::Tradition::Error->throw(
1805 'ident' => 'Collation error',
1806 'message' => $_[0],
1807 );
1808}
1809
dd3b58b0 1810no Moose;
1811__PACKAGE__->meta->make_immutable;
e867486f 1812
027d819c 1813=head1 LICENSE
e867486f 1814
027d819c 1815This package is free software and is provided "as is" without express
1816or implied warranty. You can redistribute it and/or modify it under
1817the same terms as Perl itself.
e867486f 1818
027d819c 1819=head1 AUTHOR
e867486f 1820
027d819c 1821Tara L Andrews E<lt>aurum@cpan.orgE<gt>