fix test failure
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
82fa4d57 9use Text::CSV;
5c0072ef 10use Text::Tradition::Collation::Data;
b15511bf 11use Text::Tradition::Collation::Reading;
22222af9 12use Text::Tradition::Collation::RelationshipStore;
63778331 13use Text::Tradition::Error;
cc31ebaa 14use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
5c0072ef 15use XML::LibXML;
16use XML::LibXML::XPathContext;
dd3b58b0 17use Moose;
18
5c0072ef 19has _data => (
20 isa => 'Text::Tradition::Collation::Data',
21 is => 'ro',
22 required => 1,
23 handles => [ qw(
24 sequence
25 paths
26 _set_relations
27 relations
28 _set_start
29 _set_end
30 ac_label
31 has_cached_table
32 relationships
33 related_readings
34 get_relationship
35 del_relationship
36 equivalence
37 equivalence_graph
38 readings
39 reading
40 _add_reading
41 del_reading
42 has_reading
43 wit_list_separator
44 baselabel
45 linear
46 wordsep
47 start
48 end
49 cached_table
50 _graphcalc_done
51 has_cached_svg
52 wipe_table
53 )]
54);
dd3b58b0 55
3a2ebbf4 56has 'tradition' => (
57 is => 'ro',
d047cd52 58 isa => 'Text::Tradition',
8cfd99c4 59 writer => '_set_tradition',
8d9a1cd8 60 weak_ref => 1,
d047cd52 61 );
dd3b58b0 62
4e5a7b2c 63=head1 NAME
64
65Text::Tradition::Collation - a software model for a text collation
66
67=head1 SYNOPSIS
68
69 use Text::Tradition;
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
72 'input' => 'TEI',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
74
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
79
80 my $svg_variant_graph = $t->collation->as_svg();
81
82=head1 DESCRIPTION
83
84Text::Tradition is a library for representation and analysis of collated
85texts, particularly medieval ones. The Collation is the central feature of
86a Tradition, where the text, its sequence of readings, and its relationships
87between readings are actually kept.
88
89=head1 CONSTRUCTOR
90
91=head2 new
92
93The constructor. Takes a hash or hashref of the following arguments:
94
95=over
96
97=item * tradition - The Text::Tradition object to which the collation
98belongs. Required.
99
100=item * linear - Whether the collation should be linear; that is, whether
101transposed readings should be treated as two linked readings rather than one,
102and therefore whether the collation graph is acyclic. Defaults to true.
103
4e5a7b2c 104=item * baselabel - The default label for the path taken by a base text
105(if any). Defaults to 'base text'.
106
107=item * wit_list_separator - The string to join a list of witnesses for
108purposes of making labels in display graphs. Defaults to ', '.
109
110=item * ac_label - The extra label to tack onto a witness sigil when
111representing another layer of path for the given witness - that is, when
112a text has more than one possible reading due to scribal corrections or
113the like. Defaults to ' (a.c.)'.
114
4e483aa5 115=item * wordsep - The string used to separate words in the original text.
116Defaults to ' '.
117
4e5a7b2c 118=back
119
120=head1 ACCESSORS
121
122=head2 tradition
123
124=head2 linear
125
4e5a7b2c 126=head2 wit_list_separator
127
128=head2 baselabel
129
130=head2 ac_label
131
4e483aa5 132=head2 wordsep
133
4e5a7b2c 134Simple accessors for collation attributes.
135
136=head2 start
137
138The meta-reading at the start of every witness path.
139
140=head2 end
141
142The meta-reading at the end of every witness path.
143
144=head2 readings
145
146Returns all Reading objects in the graph.
147
148=head2 reading( $id )
149
150Returns the Reading object corresponding to the given ID.
151
152=head2 add_reading( $reading_args )
153
154Adds a new reading object to the collation.
155See L<Text::Tradition::Collation::Reading> for the available arguments.
156
157=head2 del_reading( $object_or_id )
158
159Removes the given reading from the collation, implicitly removing its
160paths and relationships.
161
4e5a7b2c 162=head2 has_reading( $id )
163
164Predicate to see whether a given reading ID is in the graph.
165
166=head2 reading_witnesses( $object_or_id )
167
168Returns a list of sigils whose witnesses contain the reading.
169
170=head2 paths
171
172Returns all reading paths within the document - that is, all edges in the
173collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
174
175=head2 add_path( $source, $target, $sigil )
176
177Links the given readings in the collation in sequence, under the given witness
178sigil. The readings may be specified by object or ID.
179
180=head2 del_path( $source, $target, $sigil )
181
182Links the given readings in the collation in sequence, under the given witness
183sigil. The readings may be specified by object or ID.
184
185=head2 has_path( $source, $target );
186
187Returns true if the two readings are linked in sequence in any witness.
188The readings may be specified by object or ID.
189
190=head2 relationships
191
192Returns all Relationship objects in the collation.
193
194=head2 add_relationship( $reading, $other_reading, $options )
195
196Adds a new relationship of the type given in $options between the two readings,
197which may be specified by object or ID. Returns a value of ( $status, @vectors)
198where $status is true on success, and @vectors is a list of relationship edges
199that were ultimately added.
200See L<Text::Tradition::Collation::Relationship> for the available options.
201
202=cut
dd3b58b0 203
5c0072ef 204sub BUILDARGS {
205 my ( $class, @args ) = @_;
206 my %args = @args == 1 ? %{ $args[0] } : @args;
207 # TODO determine these from the Moose::Meta object
208 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
209 linear wordsep start end cached_table _graphcalc_done);
210 my %data_args;
211 for my $attr (@delegate_attrs) {
212 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
213 }
214 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
215 return \%args;
216}
217
d047cd52 218sub BUILD {
3a2ebbf4 219 my $self = shift;
22222af9 220 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 221 $self->_set_start( $self->add_reading(
222 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
223 $self->_set_end( $self->add_reading(
224 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 225}
784877d9 226
24efa55d 227sub register_relationship_type {
228 my $self = shift;
229 my %args = @_ == 1 ? %{$_[0]} : @_;
230 if( $self->relations->has_type( $args{name} ) ) {
231 throw( 'Relationship type ' . $args{name} . ' already registered' );
232 }
233 $self->relations->add_type( %args );
234}
235
39d1ddc7 236sub get_relationship_type {
237 my( $self, $name ) = @_;
238 return $self->relations->has_type( $name )
239 ? $self->relations->type( $name ) : undef;
240}
241
3a2ebbf4 242### Reading construct/destruct functions
243
244sub add_reading {
245 my( $self, $reading ) = @_;
246 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
247 my %args = %$reading;
e4b73942 248 if( $args{'init'} ) {
249 # If we are initializing an empty collation, don't assume that we
250 # have set a tradition.
251 delete $args{'init'};
8943ff68 252 } elsif( $self->tradition->can('language') && $self->tradition->has_language
253 && !exists $args{'language'} ) {
6ad2ce78 254 $args{'language'} = $self->tradition->language;
255 }
3a2ebbf4 256 $reading = Text::Tradition::Collation::Reading->new(
257 'collation' => $self,
258 %args );
259 }
260 # First check to see if a reading with this ID exists.
261 if( $self->reading( $reading->id ) ) {
63778331 262 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 263 }
c1915ab9 264 $self->_graphcalc_done(0);
3a2ebbf4 265 $self->_add_reading( $reading->id => $reading );
266 # Once the reading has been added, put it in both graphs.
267 $self->sequence->add_vertex( $reading->id );
cecbe56d 268 $self->relations->add_reading( $reading->id );
3a2ebbf4 269 return $reading;
eca16057 270};
271
3a2ebbf4 272around del_reading => sub {
273 my $orig = shift;
274 my $self = shift;
275 my $arg = shift;
276
cecbe56d 277 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
278 $arg = $arg->id;
3a2ebbf4 279 }
3a2ebbf4 280 # Remove the reading from the graphs.
c1915ab9 281 $self->_graphcalc_done(0);
4e483aa5 282 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 283 $self->sequence->delete_vertex( $arg );
284 $self->relations->delete_reading( $arg );
3a2ebbf4 285
286 # Carry on.
cecbe56d 287 $self->$orig( $arg );
3a2ebbf4 288};
7854e12e 289
3c234eb6 290=head2 merge_readings( $main, $second, $concatenate, $with_str )
291
292Merges the $second reading into the $main one. If $concatenate is true, then
293the merged node will carry the text of both readings, concatenated with either
294$with_str (if specified) or a sensible default (the empty string if the
295appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
296
297The first two arguments may be either readings or reading IDs.
298
4e483aa5 299=begin testing
300
301use Text::Tradition;
302
303my $cxfile = 't/data/Collatex-16.xml';
304my $t = Text::Tradition->new(
305 'name' => 'inline',
306 'input' => 'CollateX',
307 'file' => $cxfile,
308 );
309my $c = $t->collation;
310
311my $rno = scalar $c->readings;
4ef65ab4 312# Split n21 ('unto') for testing purposes
4e483aa5 313my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
314my $old_r = $c->reading( 'n21' );
315$old_r->alter_text( 'to' );
316$c->del_path( 'n20', 'n21', 'A' );
317$c->add_path( 'n20', 'n21p0', 'A' );
318$c->add_path( 'n21p0', 'n21', 'A' );
7a0956c1 319$c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
4e483aa5 320$c->flatten_ranks();
321ok( $c->reading( 'n21p0' ), "New reading exists" );
322is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
323
679f17e1 324# Combine n3 and n4 ( with his )
4e483aa5 325$c->merge_readings( 'n3', 'n4', 1 );
326ok( !$c->reading('n4'), "Reading n4 is gone" );
327is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
328
679f17e1 329# Collapse n9 and n10 ( rood / root )
330$c->merge_readings( 'n9', 'n10' );
331ok( !$c->reading('n10'), "Reading n10 is gone" );
332is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 333
334# Combine n21 and n21p0
335my $remaining = $c->reading('n21');
336$remaining ||= $c->reading('n22'); # one of these should still exist
337$c->merge_readings( 'n21p0', $remaining, 1 );
338ok( !$c->reading('n21'), "Reading $remaining is gone" );
339is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
340
341=end testing
342
343=cut
7854e12e 344
3a2ebbf4 345sub merge_readings {
346 my $self = shift;
347
56772e8c 348 # Sanity check
349 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
350 my $mergemeta = $kept_obj->is_meta;
351 throw( "Cannot merge meta and non-meta reading" )
352 unless ( $mergemeta && $del_obj->is_meta )
353 || ( !$mergemeta && !$del_obj->is_meta );
354 if( $mergemeta ) {
355 throw( "Cannot merge with start or end node" )
356 if( $kept_obj eq $self->start || $kept_obj eq $self->end
357 || $del_obj eq $self->start || $del_obj eq $self->end );
a445ce40 358 throw( "Cannot combine text of meta readings" ) if $combine;
56772e8c 359 }
3a2ebbf4 360 # We only need the IDs for adding paths to the graph, not the reading
361 # objects themselves.
56772e8c 362 my $kept = $kept_obj->id;
363 my $deleted = $del_obj->id;
c1915ab9 364 $self->_graphcalc_done(0);
10943ab0 365
3a2ebbf4 366 # The kept reading should inherit the paths and the relationships
367 # of the deleted reading.
368 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
369 my @vector = ( $kept );
370 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
371 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 372 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 373 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
374 $self->sequence->add_edge( @vector );
375 my $fwits = $self->sequence->get_edge_attributes( @vector );
376 @wits{keys %$fwits} = values %$fwits;
377 $self->sequence->set_edge_attributes( @vector, \%wits );
378 }
cecbe56d 379 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 380
381 # Do the deletion deed.
4e483aa5 382 if( $combine ) {
869a1ada 383 # Combine the text of the readings
4e483aa5 384 my $joinstr = $combine_char;
385 unless( defined $joinstr ) {
386 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
387 $joinstr = $self->wordsep unless defined $joinstr;
388 }
a445ce40 389 $kept_obj->_combine( $del_obj, $joinstr );
49d4f2ac 390 }
3a2ebbf4 391 $self->del_reading( $deleted );
392}
7854e12e 393
8d9494a8 394=head2 merge_related( @relationship_types )
395
396Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
397
398WARNING: This operation cannot be undone.
399
400=cut
401
402=begin testing
403
404use Text::Tradition;
405use TryCatch;
406
407my $t = Text::Tradition->new(
408 'name' => 'inline',
409 'input' => 'Self',
410 'file' => 't/data/legendfrag.xml',
411 );
412my $c = $t->collation;
413
414my %rdg_ids;
415map { $rdg_ids{$_} = 1 } $c->readings;
416$c->merge_related( 'orthographic' );
417is( scalar( $c->readings ), keys( %rdg_ids ) - 8,
418 "Successfully collapsed orthographic variation" );
419map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.1 /;
420foreach my $rid ( keys %rdg_ids ) {
421 my $exp = $rdg_ids{$rid};
422 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
423 ( $exp ? "retained" : "removed" ) );
424}
425ok( $c->linear, "Graph is still linear" );
426try {
427 $c->calculate_ranks; # This should succeed
428 ok( 1, "Can still calculate ranks on the new graph" );
429} catch {
430 ok( 0, "Rank calculation on merged graph failed: $@" );
431}
432
433# Now add some transpositions
434$c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
435$c->merge_related( 'transposition' );
436is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
437 "Transposed relationship is merged away" );
438ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
439ok( !$c->linear, "Graph is no longer linear" );
440try {
441 $c->calculate_ranks; # This should fail
442 ok( 0, "Rank calculation happened on nonlinear graph?!" );
443} catch ( Text::Tradition::Error $e ) {
444 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
445 "Rank calculation on merged graph threw an error" );
446}
447
448
449
450=end testing
451
452=cut
453
454# TODO: there should be a way to display merged without affecting the underlying data!
455
456sub merge_related {
457 my $self = shift;
458 my %reltypehash;
459 map { $reltypehash{$_} = 1 } @_;
460
461 # Set up the filter for finding related readings
462 my $filter = sub {
463 exists $reltypehash{$_[0]->type};
464 };
465
466 my $linear = 1;
467 # Go through all readings looking for related ones
468 foreach my $r ( $self->readings ) {
469 next unless $self->reading( "$r" ); # might have been deleted meanwhile
470 my @related = $self->related_readings( $r, $filter );
471 if( @related ) {
472 push( @related, $r );
473 @related = sort {
474 scalar $b->witnesses <=> scalar $a->witnesses
475 } @related;
476 my $keep = shift @related;
477 foreach my $delr ( @related ) {
478 $linear = undef
479 unless( $self->get_relationship( $keep, $delr )->colocated );
480 $self->merge_readings( $keep, $delr );
481 }
482 }
483 }
484 $self->linear( $linear );
485}
486
6771a1b1 487=head2 compress_readings
488
489Where possible in the graph, compresses plain sequences of readings into a
490single reading. The sequences must consist of readings with no
491relationships to other readings, with only a single witness path between
492them and no other witness paths from either that would skip the other. The
493readings must also not be marked as nonsense or bad grammar.
494
495WARNING: This operation cannot be undone.
496
497=cut
498
499sub compress_readings {
500 my $self = shift;
501 # Anywhere in the graph that there is a reading that joins only to a single
502 # successor, and neither of these have any relationships, just join the two
503 # readings.
6771a1b1 504 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 505 # Now look for readings that can be joined to their successors.
a445ce40 506 next unless $rdg->is_combinable;
6771a1b1 507 my %seen;
508 while( $self->sequence->successors( $rdg ) == 1 ) {
509 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
510 throw( "Infinite loop" ) if $seen{$next->id};
511 $seen{$next->id} = 1;
512 last if $self->sequence->predecessors( $next ) > 1;
a445ce40 513 last unless $next->is_combinable;
6771a1b1 514 say "Joining readings $rdg and $next";
515 $self->merge_readings( $rdg, $next, 1 );
516 }
517 }
518 # Make sure we haven't screwed anything up
519 foreach my $wit ( $self->tradition->witnesses ) {
520 my $pathtext = $self->path_text( $wit->sigil );
521 my $origtext = join( ' ', @{$wit->text} );
522 throw( "Text differs for witness " . $wit->sigil )
523 unless $pathtext eq $origtext;
524 if( $wit->is_layered ) {
525 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
526 $origtext = join( ' ', @{$wit->layertext} );
527 throw( "Ante-corr text differs for witness " . $wit->sigil )
528 unless $pathtext eq $origtext;
529 }
530 }
531
532 $self->relations->rebuild_equivalence();
533 $self->calculate_ranks();
534}
3265b0ce 535
3a2ebbf4 536# Helper function for manipulating the graph.
537sub _stringify_args {
4e483aa5 538 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 539 $first = $first->id
540 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
541 $second = $second->id
542 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 543 return( $first, $second, @args );
3a2ebbf4 544}
df6d9812 545
4e5a7b2c 546# Helper function for manipulating the graph.
547sub _objectify_args {
548 my( $self, $first, $second, $arg ) = @_;
549 $first = $self->reading( $first )
550 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
551 $second = $self->reading( $second )
552 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
553 return( $first, $second, $arg );
554}
f97ef19e 555
556=head2 duplicate_reading( $reading, @witlist )
557
558Split the given reading into two, so that the new reading is in the path for
ef73c20a 559the witnesses given in @witlist. If the result is that certain non-colocated
560relationships (e.g. transpositions) are no longer valid, these will be removed.
561Returns the newly-created reading.
f97ef19e 562
563=begin testing
564
68e48c06 565use Test::More::UTF8;
f97ef19e 566use Text::Tradition;
68e48c06 567use TryCatch;
f97ef19e 568
569my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
570is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
571ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
572
573my $sc = $st->collation;
574my $numr = 17;
575ok( $sc->reading('n131'), "Tradition has the affected reading" );
576is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
577is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
578
579# Detach the erroneously collated reading
2dcb5d11 580my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
ef73c20a 581ok( $newr, "New reading was created" );
f97ef19e 582ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
583is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
584is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
3c234eb6 585my $csucc = $sc->common_successor( 'n131', 'n131_0' );
586is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
f97ef19e 587
588# Check that the bad transposition is gone
2dcb5d11 589is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
f97ef19e 590is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
591
e19635f8 592# The collation should not be fixed
593my @pairs = $sc->identical_readings();
594is( scalar @pairs, 0, "Not re-collated yet" );
f97ef19e 595# Fix the collation
3c234eb6 596ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
e19635f8 597@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
3c234eb6 598is( scalar @pairs, 3, "Found three more identical readings" );
e19635f8 599is( $sc->end->rank, 11, "The ranks shifted appropriately" );
3c234eb6 600$sc->flatten_ranks();
f97ef19e 601is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
602
68e48c06 603# Check that we can't "duplicate" a reading with no wits or with all wits
604try {
605 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
606 ok( 0, "Reading duplication without witnesses throws an error" );
607} catch( Text::Tradition::Error $e ) {
608 like( $e->message, qr/Must specify one or more witnesses/,
609 "Reading duplication without witnesses throws the expected error" );
610} catch {
611 ok( 0, "Reading duplication without witnesses threw the wrong error" );
612}
613
614try {
615 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
616 ok( 0, "Reading duplication with all witnesses throws an error" );
617} catch( Text::Tradition::Error $e ) {
618 like( $e->message, qr/Cannot join all witnesses/,
619 "Reading duplication with all witnesses throws the expected error" );
620} catch {
621 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
622}
623
f97ef19e 624=end testing
625
626=cut
627
628sub duplicate_reading {
629 my( $self, $r, @wits ) = @_;
68e48c06 630 # Check that we are not doing anything unwise.
631 throw( "Must specify one or more witnesses for the duplicated reading" )
632 unless @wits;
f97ef19e 633 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
634 $r = $self->reading( $r );
635 }
636 throw( "Cannot duplicate a meta-reading" )
637 if $r->is_meta;
68e48c06 638 throw( "Cannot join all witnesses to the new reading" )
47cd4d16 639 if scalar( @wits ) == scalar( $r->witnesses );
68e48c06 640
f97ef19e 641 # Get all the reading attributes and duplicate them.
642 my $rmeta = Text::Tradition::Collation::Reading->meta;
643 my %args;
644 foreach my $attr( $rmeta->get_all_attributes ) {
645 next if $attr->name =~ /^_/;
646 my $acc = $attr->get_read_method;
647 if( !$acc && $attr->has_applied_traits ) {
648 my $tr = $attr->applied_traits;
649 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
650 my $which = $1;
651 my %methods = reverse %{$attr->handles};
652 $acc = $methods{elements};
653 $args{$attr->name} = $which eq 'Array'
654 ? [ $r->$acc ] : { $r->$acc };
655 }
656 } else {
657 $args{$attr->name} = $r->$acc if $acc;
658 }
659 }
660 # By definition the new reading will no longer be common.
661 $args{is_common} = 0;
662 # The new reading also needs its own ID.
663 $args{id} = $self->_generate_dup_id( $r->id );
664
665 # Try to make the new reading.
666 my $newr = $self->add_reading( \%args );
667 # The old reading is also no longer common.
668 $r->is_common( 0 );
669
670 # For each of the witnesses, dissociate from the old reading and
671 # associate with the new.
672 foreach my $wit ( @wits ) {
673 my $prior = $self->prior_reading( $r, $wit );
674 my $next = $self->next_reading( $r, $wit );
675 $self->del_path( $prior, $r, $wit );
676 $self->add_path( $prior, $newr, $wit );
677 $self->del_path( $r, $next, $wit );
678 $self->add_path( $newr, $next, $wit );
679 }
680
3c234eb6 681 # If the graph is ranked, we need to look for relationships that are now
682 # invalid (i.e. 'non-colocation' types that might now be colocated) and
683 # remove them. If not, we can skip it.
f97ef19e 684 my $succ;
3c234eb6 685 my %rrk;
2dcb5d11 686 my @deleted_relations;
f97ef19e 687 if( $self->end->has_rank ) {
3c234eb6 688 # Find the point where we can stop checking
f97ef19e 689 $succ = $self->common_successor( $r, $newr );
3c234eb6 690
691 # Hash the existing ranks
f97ef19e 692 foreach my $rdg ( $self->readings ) {
693 $rrk{$rdg->id} = $rdg->rank;
694 }
3c234eb6 695 # Calculate the new ranks
696 $self->calculate_ranks();
f97ef19e 697
3c234eb6 698 # Check for invalid non-colocated relationships among changed-rank readings
699 # from where the ranks start changing up to $succ
f97ef19e 700 my $lastrank = $succ->rank;
701 foreach my $rdg ( $self->readings ) {
702 next if $rdg->rank > $lastrank;
703 next if $rdg->rank == $rrk{$rdg->id};
704 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
705 next unless @noncolo;
706 foreach my $nc ( @noncolo ) {
2dcb5d11 707 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
f6a1d5f0 708 push( @deleted_relations, [ $rdg->id, $nc->id ] );
2dcb5d11 709 }
f97ef19e 710 }
711 }
ef73c20a 712 }
2dcb5d11 713 return ( $newr, @deleted_relations );
f97ef19e 714}
715
716sub _generate_dup_id {
717 my( $self, $rid ) = @_;
718 my $newid;
719 my $i = 0;
720 while( !$newid ) {
721 $newid = $rid."_$i";
722 if( $self->has_reading( $newid ) ) {
723 $newid = '';
724 $i++;
725 }
726 }
727 return $newid;
728}
729
3a2ebbf4 730### Path logic
731
732sub add_path {
733 my $self = shift;
734
735 # We only need the IDs for adding paths to the graph, not the reading
736 # objects themselves.
cecbe56d 737 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 738
c1915ab9 739 $self->_graphcalc_done(0);
3a2ebbf4 740 # Connect the readings
359944f7 741 unless( $self->sequence->has_edge( $source, $target ) ) {
742 $self->sequence->add_edge( $source, $target );
743 $self->relations->add_equivalence_edge( $source, $target );
744 }
3a2ebbf4 745 # Note the witness in question
746 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 747}
b15511bf 748
3a2ebbf4 749sub del_path {
750 my $self = shift;
49d4f2ac 751 my @args;
752 if( ref( $_[0] ) eq 'ARRAY' ) {
753 my $e = shift @_;
754 @args = ( @$e, @_ );
755 } else {
756 @args = @_;
757 }
3a2ebbf4 758
f97ef19e 759 # We only need the IDs for removing paths from the graph, not the reading
3a2ebbf4 760 # objects themselves.
49d4f2ac 761 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 762
c1915ab9 763 $self->_graphcalc_done(0);
3a2ebbf4 764 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 765 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 766 }
30897024 767 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
3a2ebbf4 768 $self->sequence->delete_edge( $source, $target );
359944f7 769 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 770 }
784877d9 771}
772
3a2ebbf4 773
15d2d3df 774# Extra graph-alike utility
775sub has_path {
3a2ebbf4 776 my $self = shift;
777 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
778 return undef unless $self->sequence->has_edge( $source, $target );
779 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 780}
781
4e5a7b2c 782=head2 clear_witness( @sigil_list )
3a2ebbf4 783
4e5a7b2c 784Clear the given witnesses out of the collation entirely, removing references
785to them in paths, and removing readings that belong only to them. Should only
786be called via $tradition->del_witness.
3a2ebbf4 787
788=cut
789
4e5a7b2c 790sub clear_witness {
791 my( $self, @sigils ) = @_;
792
c1915ab9 793 $self->_graphcalc_done(0);
4e5a7b2c 794 # Clear the witness(es) out of the paths
795 foreach my $e ( $self->paths ) {
796 foreach my $sig ( @sigils ) {
797 $self->del_path( $e, $sig );
798 }
799 }
800
801 # Clear out the newly unused readings
802 foreach my $r ( $self->readings ) {
803 unless( $self->reading_witnesses( $r ) ) {
804 $self->del_reading( $r );
805 }
806 }
807}
3a2ebbf4 808
809sub add_relationship {
810 my $self = shift;
22222af9 811 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 812 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
864ee4bf 813 foreach my $v ( @vectors ) {
814 next unless $self->get_relationship( $v )->colocated;
815 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
816 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
817 $self->_graphcalc_done(0);
818 $self->_clear_cache;
819 last;
820 }
821 }
63778331 822 return @vectors;
22222af9 823}
ef9d481f 824
ca6e6095 825around qw/ get_relationship del_relationship / => sub {
826 my $orig = shift;
827 my $self = shift;
828 my @args = @_;
829 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
830 @args = @{$_[0]};
831 }
e19635f8 832 my @stringargs = $self->_stringify_args( @args );
833 $self->$orig( @stringargs );
ca6e6095 834};
835
22222af9 836=head2 reading_witnesses( $reading )
910a0a6d 837
22222af9 838Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 839
22222af9 840=cut
1d310495 841
1d310495 842sub reading_witnesses {
843 my( $self, $reading ) = @_;
844 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 845 # arbitrarily chosen "incoming". Thus, special-case the start node.
846 if( $reading eq $self->start ) {
bed6ce83 847 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
96dc90ec 848 }
1d310495 849 my %all_witnesses;
850 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
851 my $wits = $self->sequence->get_edge_attributes( @$e );
852 @all_witnesses{ keys %$wits } = 1;
853 }
c12bb878 854 my $acstr = $self->ac_label;
855 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
856 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
857 }
1d310495 858 return keys %all_witnesses;
910a0a6d 859}
860
4e5a7b2c 861=head1 OUTPUT METHODS
8e1394aa 862
0ecb975c 863=head2 as_svg( \%options )
8e1394aa 864
0068967c 865Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 866See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 867
868=cut
869
870sub as_svg {
0ecb975c 871 my( $self, $opts ) = @_;
bfcbcecb 872 throw( "Need GraphViz installed to output SVG" )
873 unless File::Which::which( 'dot' );
e247aad1 874 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 875 $self->calculate_ranks()
876 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
be3af600 877 my @cmd = qw/dot -Tsvg/;
878 my( $svg, $err );
879 my $dotfile = File::Temp->new();
880 ## USE FOR DEBUGGING
881 # $dotfile->unlink_on_destroy(0);
882 binmode $dotfile, ':utf8';
883 print $dotfile $self->as_dot( $opts );
884 push( @cmd, $dotfile->filename );
885 run( \@cmd, ">", binary(), \$svg );
886 $svg = decode_utf8( $svg );
887 return $svg;
8e1394aa 888}
889
b22576c6 890
0ecb975c 891=head2 as_dot( \%options )
b22576c6 892
0ecb975c 893Returns a string that is the collation graph expressed in dot
894(i.e. GraphViz) format. Options include:
b22576c6 895
0ecb975c 896=over 4
b22576c6 897
0ecb975c 898=item * from
b22576c6 899
0ecb975c 900=item * to
df6d9812 901
0ecb975c 902=item * color_common
903
904=back
df6d9812 905
906=cut
907
908sub as_dot {
0ecb975c 909 my( $self, $opts ) = @_;
910 my $startrank = $opts->{'from'} if $opts;
911 my $endrank = $opts->{'to'} if $opts;
912 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 913 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
914 && $self->end->rank > 100;
6648ee3d 915 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 916
b22576c6 917 # Check the arguments
918 if( $startrank ) {
919 return if $endrank && $startrank > $endrank;
920 return if $startrank > $self->end->rank;
921 }
922 if( defined $endrank ) {
923 return if $endrank < 0;
f1b3b33a 924 $endrank = undef if $endrank == $self->end->rank;
b22576c6 925 }
926
67da8d6c 927 my $graph_name = $self->tradition->name;
928 $graph_name =~ s/[^\w\s]//g;
929 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 930
931 my %graph_attrs = (
932 'rankdir' => 'LR',
933 'bgcolor' => 'none',
934 );
935 my %node_attrs = (
b8990398 936 'fontsize' => 14,
f13b5582 937 'fillcolor' => 'white',
938 'style' => 'filled',
939 'shape' => 'ellipse'
940 );
941 my %edge_attrs = (
942 'arrowhead' => 'open',
943 'color' => '#000000',
944 'fontcolor' => '#000000',
945 );
946
67da8d6c 947 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 948 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
949 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 950
b22576c6 951 # Output substitute start/end readings if necessary
952 if( $startrank ) {
43c94341 953 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 954 }
955 if( $endrank ) {
43c94341 956 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 957 }
b365fbae 958 if( $STRAIGHTENHACK ) {
959 ## HACK part 1
43c94341 960 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
961 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 962 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
963 }
b22576c6 964 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 965 # Sort the readings by rank if we have ranks; this speeds layout.
966 my @all_readings = $self->end->has_rank
967 ? sort { $a->rank <=> $b->rank } $self->readings
968 : $self->readings;
4633f9e4 969 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 970 foreach my $reading ( @all_readings ) {
b22576c6 971 # Only output readings within our rank range.
972 next if $startrank && $reading->rank < $startrank;
973 next if $endrank && $reading->rank > $endrank;
974 $used{$reading->id} = 1;
910a0a6d 975 # Need not output nodes without separate labels
3a2ebbf4 976 next if $reading->id eq $reading->text;
d4b75f44 977 my $rattrs;
30f0df34 978 my $label = $reading->text;
629e27b0 979 $label .= '-' if $reading->join_next;
980 $label = "-$label" if $reading->join_prior;
8f9cab7b 981 $label =~ s/\"/\\\"/g;
d4b75f44 982 $rattrs->{'label'} = $label;
10e4b1ac 983 $rattrs->{'id'} = $reading->id;
0ecb975c 984 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 985 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 986 }
3a2ebbf4 987
30ddc24c 988 # Add the real edges. Need to weight one edge per rank jump, in a
989 # continuous line.
b365fbae 990 # my $weighted = $self->_add_edge_weights;
b22576c6 991 my @edges = $self->paths;
3bdec618 992 my( %substart, %subend );
b22576c6 993 foreach my $edge ( @edges ) {
994 # Do we need to output this edge?
508fd430 995 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
57560672 996 my $label = $self->_path_display_label( $opts,
997 $self->path_witnesses( $edge ) );
f13b5582 998 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 999
b22576c6 1000 # Account for the rank gap if necessary
30ddc24c 1001 my $rank0 = $self->reading( $edge->[0] )->rank
1002 if $self->reading( $edge->[0] )->has_rank;
1003 my $rank1 = $self->reading( $edge->[1] )->rank
1004 if $self->reading( $edge->[1] )->has_rank;
1005 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1006 $variables->{'minlen'} = $rank1 - $rank0;
1007 }
1008
1009 # Add the calculated edge weights
b365fbae 1010 # if( exists $weighted->{$edge->[0]}
e247aad1 1011 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
1012 # # $variables->{'color'} = 'red';
1013 # $variables->{'weight'} = 3.0;
1014 # }
30ddc24c 1015
508fd430 1016 # EXPERIMENTAL: make edge width reflect no. of witnesses
1017 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1018 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1019
f13b5582 1020 my $varopts = _dot_attr_string( $variables );
1021 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1022 $edge->[0], $edge->[1], $varopts );
3bdec618 1023 } elsif( $used{$edge->[0]} ) {
96ba0418 1024 $subend{$edge->[0]} = $edge->[1];
3bdec618 1025 } elsif( $used{$edge->[1]} ) {
96ba0418 1026 $substart{$edge->[1]} = $edge->[0];
b22576c6 1027 }
df6d9812 1028 }
bed6ce83 1029
1030 # If we are asked to, add relationship links
1031 if( exists $opts->{show_relations} ) {
1032 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1033 if( $filter eq 'transposition' ) {
1034 $filter =~ qr/^transposition$/;
1035 }
1036 foreach my $redge ( $self->relationships ) {
1037 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
1038 if( $filter ne 'all' ) {
1039 my $rel = $self->get_relationship( $redge );
1040 next unless $rel->type =~ /$filter/;
1041 my $variables = {
1042 arrowhead => 'none',
1043 color => '#FFA14F',
1044 constraint => 'false',
1045 label => uc( substr( $rel->type, 0, 4 ) ),
1046 penwidth => '3',
1047 };
1048 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1049 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
1050 }
1051 }
1052 }
1053 }
1054
3bdec618 1055 # Add substitute start and end edges if necessary
1056 foreach my $node ( keys %substart ) {
57560672 1057 my $witstr = $self->_path_display_label( $opts,
1058 $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 1059 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 1060 my $nrdg = $self->reading( $node );
1061 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1062 # Substart is actually one lower than $startrank
1063 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1064 }
f13b5582 1065 my $varopts = _dot_attr_string( $variables );
96ba0418 1066 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 1067 }
1068 foreach my $node ( keys %subend ) {
57560672 1069 my $witstr = $self->_path_display_label( $opts,
1070 $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 1071 my $variables = { %edge_attrs, 'label' => $witstr };
1072 my $varopts = _dot_attr_string( $variables );
96ba0418 1073 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 1074 }
b365fbae 1075 # HACK part 2
1076 if( $STRAIGHTENHACK ) {
43c94341 1077 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1078 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 1079 }
30ddc24c 1080
df6d9812 1081 $dot .= "}\n";
1082 return $dot;
1083}
1084
f13b5582 1085sub _dot_attr_string {
1086 my( $hash ) = @_;
1087 my @attrs;
1088 foreach my $k ( sort keys %$hash ) {
1089 my $v = $hash->{$k};
1090 push( @attrs, $k.'="'.$v.'"' );
1091 }
1092 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1093}
1094
30ddc24c 1095sub _add_edge_weights {
1096 my $self = shift;
1097 # Walk the graph from START to END, choosing the successor node with
1098 # the largest number of witness paths each time.
1099 my $weighted = {};
1100 my $curr = $self->start->id;
008fc8a6 1101 my $ranked = $self->end->has_rank;
30ddc24c 1102 while( $curr ne $self->end->id ) {
008fc8a6 1103 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 1104 my @succ = sort { $self->path_witnesses( $curr, $a )
1105 <=> $self->path_witnesses( $curr, $b ) }
1106 $self->sequence->successors( $curr );
1107 my $next = pop @succ;
008fc8a6 1108 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 1109 # Try to avoid lacunae in the weighted path.
008fc8a6 1110 while( @succ &&
1111 ( $self->reading( $next )->is_lacuna ||
1112 $nextrank - $rank > 1 ) ){
30ddc24c 1113 $next = pop @succ;
1114 }
1115 $weighted->{$curr} = $next;
1116 $curr = $next;
1117 }
1118 return $weighted;
1119}
1120
027d819c 1121=head2 path_witnesses( $edge )
1122
1123Returns the list of sigils whose witnesses are associated with the given edge.
1124The edge can be passed as either an array or an arrayref of ( $source, $target ).
1125
1126=cut
1127
3a2ebbf4 1128sub path_witnesses {
1129 my( $self, @edge ) = @_;
1130 # If edge is an arrayref, cope.
1131 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1132 my $e = shift @edge;
1133 @edge = @$e;
1134 }
1135 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 1136 return @wits;
3a2ebbf4 1137}
1138
7f9f05e8 1139# Helper function. Make a display label for the given witnesses, showing a.c.
1140# witnesses only where the main witness is not also in the list.
027d819c 1141sub _path_display_label {
508fd430 1142 my $self = shift;
57560672 1143 my $opts = shift;
7f9f05e8 1144 my %wits;
1145 map { $wits{$_} = 1 } @_;
1146
1147 # If an a.c. wit is listed, remove it if the main wit is also listed.
1148 # Otherwise keep it for explicit listing.
1149 my $aclabel = $self->ac_label;
1150 my @disp_ac;
1151 foreach my $w ( sort keys %wits ) {
1152 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1153 if( exists $wits{$1} ) {
1154 delete $wits{$w};
1155 } else {
1156 push( @disp_ac, $w );
1157 }
1158 }
1159 }
1160
57560672 1161 if( $opts->{'explicit_wits'} ) {
7f9f05e8 1162 return join( ', ', sort keys %wits );
57560672 1163 } else {
1164 # See if we are in a majority situation.
1165 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1166 $maj = $maj > 5 ? $maj : 5;
1167 if( scalar keys %wits > $maj ) {
1168 unshift( @disp_ac, 'majority' );
1169 return join( ', ', @disp_ac );
1170 } else {
1171 return join( ', ', sort keys %wits );
1172 }
8f9cab7b 1173 }
1174}
1dd07bda 1175
bf6e338d 1176=head2 readings_at_rank( $rank )
1dd07bda 1177
bf6e338d 1178Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 1179
1180=cut
1181
bf6e338d 1182sub readings_at_rank {
1dd07bda 1183 my( $self, $rank ) = @_;
bf6e338d 1184 my $table = $self->alignment_table;
1185 # Table rank is real rank - 1.
1186 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1187 my %readings;
1188 foreach my $e ( @elements ) {
1189 next unless ref( $e ) eq 'HASH';
1190 next unless exists $e->{'t'};
1191 $readings{$e->{'t'}->id} = $e->{'t'};
1192 }
1193 return values %readings;
1dd07bda 1194}
8f9cab7b 1195
4e5a7b2c 1196=head2 as_graphml
8e1394aa 1197
4e5a7b2c 1198Returns a GraphML representation of the collation. The GraphML will contain
1199two graphs. The first expresses the attributes of the readings and the witness
1200paths that link them; the second expresses the relationships that link the
1201readings. This is the native transfer format for a tradition.
8e1394aa 1202
56eefa04 1203=begin testing
1204
1205use Text::Tradition;
951ddfe8 1206use TryCatch;
56eefa04 1207
1208my $READINGS = 311;
1209my $PATHS = 361;
1210
1211my $datafile = 't/data/florilegium_tei_ps.xml';
1212my $tradition = Text::Tradition->new( 'input' => 'TEI',
1213 'name' => 'test0',
1214 'file' => $datafile,
1215 'linear' => 1 );
1216
1217ok( $tradition, "Got a tradition object" );
1218is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1219ok( $tradition->collation, "Tradition has a collation" );
1220
1221my $c = $tradition->collation;
1222is( scalar $c->readings, $READINGS, "Collation has all readings" );
1223is( scalar $c->paths, $PATHS, "Collation has all paths" );
1224is( scalar $c->relationships, 0, "Collation has all relationships" );
1225
1226# Add a few relationships
1227$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1228$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
1229$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
1230
1231# Now write it to GraphML and parse it again.
1232
1233my $graphml = $c->as_graphml;
1234my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1235is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1236is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1237is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
1238
9fef629b 1239# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 1240SKIP: {
37bf09f4 1241 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 1242 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1243 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1244 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1245 $graphml = $c->as_graphml;
1246 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1247}
2a812726 1248
56eefa04 1249=end testing
1250
8e1394aa 1251=cut
1252
a445ce40 1253## TODO MOVE this to Tradition.pm and modularize it better
8e1394aa 1254sub as_graphml {
a30ca502 1255 my( $self, $options ) = @_;
3d14b48e 1256 $self->calculate_ranks unless $self->_graphcalc_done;
1257
a30ca502 1258 my $start = $options->{'from'}
1259 ? $self->reading( $options->{'from'} ) : $self->start;
1260 my $end = $options->{'to'}
1261 ? $self->reading( $options->{'to'} ) : $self->end;
1262 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1263 throw( 'Start node must be before end node' );
1264 }
1265 # The readings need to be ranked for this to work.
1266 $start = $self->start unless $start->has_rank;
1267 $end = $self->end unless $end->has_rank;
414cc046 1268 my $rankoffset = 0;
1269 unless( $start eq $self->start ) {
1270 $rankoffset = $start->rank - 1;
1271 }
a30ca502 1272 my %use_readings;
1273
8e1394aa 1274 # Some namespaces
1275 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1276 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1277 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 1278 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 1279
1280 # Create the document and root node
428bcf0b 1281 require XML::LibXML;
8e1394aa 1282 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1283 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1284 $graphml->setDocumentElement( $root );
1285 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1286 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 1287
1288 # List of attribute types to save on our objects and their corresponding
1289 # GraphML types
1290 my %save_types = (
1291 'Str' => 'string',
1292 'Int' => 'int',
1293 'Bool' => 'boolean',
10e4b1ac 1294 'ReadingID' => 'string',
bbd064a9 1295 'RelationshipType' => 'string',
1296 'RelationshipScope' => 'string',
1297 );
1298
bbd064a9 1299 # Add the data keys for the graph. Include an extra key 'version' for the
1300 # GraphML output version.
e309421a 1301 my %graph_data_keys;
1302 my $gdi = 0;
bbd064a9 1303 my %graph_attributes = ( 'version' => 'string' );
1304 # Graph attributes include those of Tradition and those of Collation.
1305 my %gattr_from;
f97ef19e 1306 # TODO Use meta introspection method from duplicate_reading to do this
1307 # instead of naming custom keys.
bbd064a9 1308 my $tmeta = $self->tradition->meta;
1309 my $cmeta = $self->meta;
1310 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1311 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1312 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1313 next if $attr->name =~ /^_/;
bbd064a9 1314 next unless $save_types{$attr->type_constraint->name};
1315 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1316 }
9fef629b 1317 # Extra custom keys for complex objects that should be saved in some form.
1318 # The subroutine should return a string, or undef/empty.
951ddfe8 1319 if( $tmeta->has_method('stemmata') ) {
1320 $graph_attributes{'stemmata'} = sub {
1321 my @stemstrs;
1322 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1323 $self->tradition->stemmata;
1324 join( "\n", @stemstrs );
1325 };
1326 }
1327
8943ff68 1328 if( $tmeta->has_method('user') ) {
1329 $graph_attributes{'user'} = sub {
1330 $self->tradition->user ? $self->tradition->user->id : undef
1331 };
1332 }
bbd064a9 1333
1334 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1335 $graph_data_keys{$datum} = 'dg'.$gdi++;
1336 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1337 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1338 : $graph_attributes{$datum};
e309421a 1339 $key->setAttribute( 'attr.name', $datum );
9fef629b 1340 $key->setAttribute( 'attr.type', $dtype );
e309421a 1341 $key->setAttribute( 'for', 'graph' );
1342 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1343 }
f6066bac 1344
bbd064a9 1345 # Add the data keys for reading nodes
1346 my %reading_attributes;
1347 my $rmeta = Text::Tradition::Collation::Reading->meta;
1348 foreach my $attr( $rmeta->get_all_attributes ) {
1349 next if $attr->name =~ /^_/;
bbd064a9 1350 next unless $save_types{$attr->type_constraint->name};
1351 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1352 }
a445ce40 1353 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1354 # Extra custom key for the reading morphology
1355 $reading_attributes{'lexemes'} = 'string';
1356 }
7cd9f181 1357
ef9d481f 1358 my %node_data_keys;
1359 my $ndi = 0;
bbd064a9 1360 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1361 $node_data_keys{$datum} = 'dn'.$ndi++;
1362 my $key = $root->addNewChild( $graphml_ns, 'key' );
1363 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1364 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1365 $key->setAttribute( 'for', 'node' );
1366 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1367 }
1368
bbd064a9 1369 # Add the data keys for edges, that is, paths and relationships. Path
1370 # data does not come from a Moose class so is here manually.
ef9d481f 1371 my $edi = 0;
1372 my %edge_data_keys;
bbd064a9 1373 my %edge_attributes = (
3a2ebbf4 1374 witness => 'string', # ID/label for a path
3a2ebbf4 1375 extra => 'boolean', # Path key
3a2ebbf4 1376 );
bbd064a9 1377 my @path_attributes = keys %edge_attributes; # track our manual additions
1378 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1379 foreach my $attr( $pmeta->get_all_attributes ) {
1380 next if $attr->name =~ /^_/;
bbd064a9 1381 next unless $save_types{$attr->type_constraint->name};
1382 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1383 }
1384 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1385 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1386 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1387 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1388 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1389 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1390 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1391 }
3a2ebbf4 1392
cc31ebaa 1393 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1394 my $xmlidname = $self->tradition->name;
1395 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1396 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1397 $xmlidname = '_'.$xmlidname;
1398 }
2c669bca 1399 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1400 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1401 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1402 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1403 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1404 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1405 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1406 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1407
2a812726 1408 # Tradition/collation attribute data
bbd064a9 1409 foreach my $datum ( keys %graph_attributes ) {
1410 my $value;
1411 if( $datum eq 'version' ) {
2a812726 1412 $value = '3.2';
9fef629b 1413 } elsif( ref( $graph_attributes{$datum} ) ) {
1414 my $sub = $graph_attributes{$datum};
1415 $value = &$sub();
bbd064a9 1416 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1417 $value = $self->tradition->$datum;
1418 } else {
1419 $value = $self->$datum;
1420 }
2c669bca 1421 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1422 }
8e1394aa 1423
1424 my $node_ctr = 0;
1425 my %node_hash;
22222af9 1426 # Add our readings to the graph
3a2ebbf4 1427 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1428 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1429 ( $n->rank < $start->rank || $n->rank > $end->rank );
1430 $use_readings{$n->id} = 1;
2c669bca 1431 # Add to the main graph
1432 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1433 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1434 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1435 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1436 foreach my $d ( keys %reading_attributes ) {
255875b8 1437 my $nval = $n->$d;
7cd9f181 1438 # Custom serialization
1439 if( $d eq 'lexemes' ) {
1440 # If nval is a true value, we have lexemes so we need to
1441 # serialize them. Otherwise set nval to undef so that the
1442 # key is excluded from this reading.
1443 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1444 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1445 $nval = undef;
7cd9f181 1446 }
cc31ebaa 1447 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1448 # Adjust the ranks within the subgraph.
cc31ebaa 1449 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1450 : $nval - $rankoffset;
414cc046 1451 }
255875b8 1452 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1453 if defined $nval;
1454 }
b15511bf 1455 }
1456
2c669bca 1457 # Add the path edges to the sequence graph
df6d9812 1458 my $edge_ctr = 0;
3a2ebbf4 1459 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1460 # We add an edge in the graphml for every witness in $e.
a30ca502 1461 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1462 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1463 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1464 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1465 # Skip any path from start to end; that witness is not in the subgraph.
1466 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1467 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1468 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1469 $node_hash{ $e->[0] },
1470 $node_hash{ $e->[1] } );
2c669bca 1471 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1472 $edge_el->setAttribute( 'source', $from );
1473 $edge_el->setAttribute( 'target', $to );
1474 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1475
1476 # It's a witness path, so add the witness
1477 my $base = $wit;
1478 my $key = $edge_data_keys{'witness'};
1479 # Is this an ante-corr witness?
1480 my $aclabel = $self->ac_label;
1481 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1482 # Keep the base witness
1483 $base = $1;
1484 # ...and record that this is an 'extra' reading path
1485 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1486 }
1487 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1488 }
1489 }
1490
cc31ebaa 1491 # Report the actual number of nodes and edges that went in
1492 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1493 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1494
22222af9 1495 # Add the relationship graph to the XML
bbd064a9 1496 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1497 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1498 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1499
94c00c71 1500 # Save and return the thing
1501 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1502 return $result;
df6d9812 1503}
1504
b15511bf 1505sub _add_graphml_data {
1506 my( $el, $key, $value ) = @_;
b15511bf 1507 return unless defined $value;
c9bf3dbf 1508 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1509 $data_el->setAttribute( 'key', $key );
1510 $data_el->appendText( $value );
8e1394aa 1511}
1512
4e5a7b2c 1513=head2 as_csv
910a0a6d 1514
1515Returns a CSV alignment table representation of the collation graph, one
2c669bca 1516row per witness (or witness uncorrected.)
910a0a6d 1517
ce5966fb 1518=head2 as_tsv
1519
1520Returns a tab-separated alignment table representation of the collation graph,
1521one row per witness (or witness uncorrected.)
1522
16203db5 1523=begin testing
1524
1525use Text::Tradition;
34ca808b 1526use Text::CSV;
16203db5 1527
1528my $READINGS = 311;
1529my $PATHS = 361;
34ca808b 1530my $WITS = 13;
1531my $WITAC = 4;
16203db5 1532
1533my $datafile = 't/data/florilegium_tei_ps.xml';
1534my $tradition = Text::Tradition->new( 'input' => 'TEI',
1535 'name' => 'test0',
1536 'file' => $datafile,
1537 'linear' => 1 );
1538
1539my $c = $tradition->collation;
1540# Export the thing to CSV
1541my $csvstr = $c->as_csv();
34ca808b 1542# Count the columns
1543my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1544my @lines = split(/\n/, $csvstr );
1545ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1546is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
cbc8e08f 1547my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1548ok( @q_ac, "Found a layered witness" );
1549
16203db5 1550my $t2 = Text::Tradition->new( input => 'Tabular',
1551 name => 'test2',
1552 string => $csvstr,
1553 sep_char => ',' );
1554is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1555is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1556
1557# Now do it with TSV
1558my $tsvstr = $c->as_tsv();
1559my $t3 = Text::Tradition->new( input => 'Tabular',
1560 name => 'test3',
1561 string => $tsvstr,
1562 sep_char => "\t" );
1563is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1564is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1565
4e64b669 1566my $table = $c->alignment_table;
34ca808b 1567my $noaccsv = $c->as_csv({ noac => 1 });
1568my @noaclines = split(/\n/, $noaccsv );
1569ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1570is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
4e64b669 1571is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
34ca808b 1572
cbc8e08f 1573my $safecsv = $c->as_csv({ safe_ac => 1});
1574my @safelines = split(/\n/, $safecsv );
1575ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1576is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1577@q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1578ok( @q_ac, "Found a sanitized layered witness" );
1579is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
16203db5 1580
1581=end testing
1582
910a0a6d 1583=cut
1584
ce5966fb 1585sub _tabular {
34ca808b 1586 my( $self, $opts ) = @_;
1587 my $table = $self->alignment_table( $opts );
16203db5 1588 my $csv_options = { binary => 1, quote_null => 0 };
34ca808b 1589 $csv_options->{'sep_char'} = $opts->{fieldsep};
1590 if( $opts->{fieldsep} eq "\t" ) {
16203db5 1591 # If it is really tab separated, nothing is an escape char.
1592 $csv_options->{'quote_char'} = undef;
1593 $csv_options->{'escape_char'} = '';
1594 }
1595 my $csv = Text::CSV->new( $csv_options );
910a0a6d 1596 my @result;
2c669bca 1597 # Make the header row
1598 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
43800a64 1599 push( @result, $csv->string );
2c669bca 1600 # Make the rest of the rows
1601 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1602 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1603 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1604 $csv->combine( @row );
43800a64 1605 push( @result, $csv->string );
910a0a6d 1606 }
3a2ebbf4 1607 return join( "\n", @result );
910a0a6d 1608}
1609
ce5966fb 1610sub as_csv {
1611 my $self = shift;
34ca808b 1612 my $opts = shift || {};
1613 $opts->{fieldsep} = ',';
1614 return $self->_tabular( $opts );
ce5966fb 1615}
1616
1617sub as_tsv {
1618 my $self = shift;
34ca808b 1619 my $opts = shift || {};
1620 $opts->{fieldsep} = "\t";
1621 return $self->_tabular( $opts );
ce5966fb 1622}
1623
248276a2 1624=head2 alignment_table
2c669bca 1625
566f4595 1626Return a reference to an alignment table, in a slightly enhanced CollateX
1627format which looks like this:
1628
1629 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1630 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1631 { witness => "SIG2",
4e5a7b2c 1632 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1633 ... ],
1634 length => TEXTLEN };
1635
2c669bca 1636=cut
9f3ba6f7 1637
1dd07bda 1638sub alignment_table {
34ca808b 1639 my( $self, $opts ) = @_;
4e64b669 1640 if( $self->has_cached_table ) {
cbc8e08f 1641 return $self->cached_table
1642 unless $opts->{noac} || $opts->{safe_ac};
4e64b669 1643 }
1dd07bda 1644
0ecb975c 1645 # Make sure we can do this
1646 throw( "Need a linear graph in order to make an alignment table" )
1647 unless $self->linear;
b39e7cb5 1648 $self->calculate_ranks()
1649 unless $self->_graphcalc_done && $self->end->has_rank;
1650
2c669bca 1651 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1652 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1653 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1654 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1655 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1656 my @row = _make_witness_row( \@wit_path, \@all_pos );
bed6ce83 1657 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1658 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1659 push( @{$table->{'alignment'}}, $witobj );
34ca808b 1660 if( $wit->is_layered && !$opts->{noac} ) {
1f7aa795 1661 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1662 $wit->sigil.$self->ac_label );
1dd07bda 1663 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
cbc8e08f 1664 my $witlabel = $opts->{safe_ac}
1665 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1666 my $witacobj = { 'witness' => $witlabel,
bed6ce83 1667 'tokens' => \@ac_row };
1668 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1669 push( @{$table->{'alignment'}}, $witacobj );
910a0a6d 1670 }
1671 }
cbc8e08f 1672 unless( $opts->{noac} || $opts->{safe_ac} ) {
4e64b669 1673 $self->cached_table( $table );
1674 }
1dd07bda 1675 return $table;
910a0a6d 1676}
1677
1678sub _make_witness_row {
1dd07bda 1679 my( $path, $positions ) = @_;
910a0a6d 1680 my %char_hash;
1681 map { $char_hash{$_} = undef } @$positions;
2c669bca 1682 my $debug = 0;
910a0a6d 1683 foreach my $rdg ( @$path ) {
6771a1b1 1684 say STDERR "rank " . $rdg->rank if $debug;
1685 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1686 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1687 }
1688 my @row = map { $char_hash{$_} } @$positions;
eca16057 1689 # Fill in lacuna markers for undef spots in the row
1690 my $last_el = shift @row;
1691 my @filled_row = ( $last_el );
1692 foreach my $el ( @row ) {
0e476982 1693 # If we are using node reference, make the lacuna node appear many times
1694 # in the table. If not, use the lacuna tag.
1dd07bda 1695 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1696 $el = $last_el;
eca16057 1697 }
1698 push( @filled_row, $el );
1699 $last_el = $el;
1700 }
1701 return @filled_row;
910a0a6d 1702}
1703
248276a2 1704
4e5a7b2c 1705=head1 NAVIGATION METHODS
910a0a6d 1706
4e5a7b2c 1707=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1708
1709Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1710with $last, for the witness given in $sigil. If a $backup sigil is
1711specified (e.g. when walking a layered witness), it will be used wherever
1712no $sigil path exists. If there is a base text reading, that will be
1713used wherever no path exists for $sigil or $backup.
e2902068 1714
1715=cut
1716
910a0a6d 1717# TODO Think about returning some lazy-eval iterator.
b0b4421a 1718# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1719
e2902068 1720sub reading_sequence {
861c3e27 1721 my( $self, $start, $end, $witness ) = @_;
e2902068 1722
930ff666 1723 $witness = $self->baselabel unless $witness;
e2902068 1724 my @readings = ( $start );
1725 my %seen;
1726 my $n = $start;
3a2ebbf4 1727 while( $n && $n->id ne $end->id ) {
1728 if( exists( $seen{$n->id} ) ) {
63778331 1729 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1730 }
3a2ebbf4 1731 $seen{$n->id} = 1;
910a0a6d 1732
861c3e27 1733 my $next = $self->next_reading( $n, $witness );
44771cf2 1734 unless( $next ) {
63778331 1735 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1736 }
910a0a6d 1737 push( @readings, $next );
1738 $n = $next;
e2902068 1739 }
1740 # Check that the last reading is our end reading.
1741 my $last = $readings[$#readings];
63778331 1742 throw( "Last reading found from " . $start->text .
1743 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1744 unless $last->id eq $end->id;
e2902068 1745
1746 return @readings;
1747}
1748
4e5a7b2c 1749=head2 next_reading( $reading, $sigil );
8e1394aa 1750
4a8828f0 1751Returns the reading that follows the given reading along the given witness
930ff666 1752path.
8e1394aa 1753
1754=cut
1755
4a8828f0 1756sub next_reading {
e2902068 1757 # Return the successor via the corresponding path.
8e1394aa 1758 my $self = shift;
3a2ebbf4 1759 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1760 return undef unless $answer;
3a2ebbf4 1761 return $self->reading( $answer );
8e1394aa 1762}
1763
4e5a7b2c 1764=head2 prior_reading( $reading, $sigil )
8e1394aa 1765
4a8828f0 1766Returns the reading that precedes the given reading along the given witness
930ff666 1767path.
8e1394aa 1768
1769=cut
1770
4a8828f0 1771sub prior_reading {
e2902068 1772 # Return the predecessor via the corresponding path.
8e1394aa 1773 my $self = shift;
3a2ebbf4 1774 my $answer = $self->_find_linked_reading( 'prior', @_ );
1775 return $self->reading( $answer );
8e1394aa 1776}
1777
4a8828f0 1778sub _find_linked_reading {
861c3e27 1779 my( $self, $direction, $node, $path ) = @_;
1780
1781 # Get a backup if we are dealing with a layered witness
1782 my $alt_path;
1783 my $aclabel = $self->ac_label;
1784 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1785 $alt_path = $1;
1786 }
1787
e2902068 1788 my @linked_paths = $direction eq 'next'
3a2ebbf4 1789 ? $self->sequence->edges_from( $node )
1790 : $self->sequence->edges_to( $node );
e2902068 1791 return undef unless scalar( @linked_paths );
8e1394aa 1792
e2902068 1793 # We have to find the linked path that contains all of the
1794 # witnesses supplied in $path.
1795 my( @path_wits, @alt_path_wits );
4e5a7b2c 1796 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1797 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1798 my $base_le;
1799 my $alt_le;
1800 foreach my $le ( @linked_paths ) {
3a2ebbf4 1801 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1802 $base_le = $le;
910a0a6d 1803 }
508fd430 1804 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1805 if( _is_within( \@path_wits, \@le_wits ) ) {
1806 # This is the right path.
1807 return $direction eq 'next' ? $le->[1] : $le->[0];
1808 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1809 $alt_le = $le;
1810 }
8e1394aa 1811 }
e2902068 1812 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1813 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1814 if $alt_le;
e2902068 1815
1816 # Got this far? Return the base path if it exists.
3a2ebbf4 1817 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1818 if $base_le;
e2902068 1819
1820 # Got this far? We have no appropriate path.
2c669bca 1821 warn "Could not find $direction node from " . $node->id
910a0a6d 1822 . " along path $path";
8e1394aa 1823 return undef;
1824}
1825
4a8828f0 1826# Some set logic.
1827sub _is_within {
1828 my( $set1, $set2 ) = @_;
7854e12e 1829 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1830 foreach my $el ( @$set1 ) {
910a0a6d 1831 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1832 }
1833 return $ret;
1834}
1835
4e5a7b2c 1836# Return the string that joins together a list of witnesses for
1837# display on a single path.
1838sub _witnesses_of_label {
1839 my( $self, $label ) = @_;
1840 my $regex = $self->wit_list_separator;
1841 my @answer = split( /\Q$regex\E/, $label );
1842 return @answer;
b0b4421a 1843}
1844
d4b75f44 1845=head2 common_readings
1846
1847Returns the list of common readings in the graph (i.e. those readings that are
1848shared by all non-lacunose witnesses.)
1849
1850=cut
1851
1852sub common_readings {
1853 my $self = shift;
1854 my @common = grep { $_->is_common } $self->readings;
1855 return @common;
1856}
1857
fae52efd 1858=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1859
1860Returns the text of a witness (plus its backup, if we are using a layer)
1861as stored in the collation. The text is returned as a string, where the
1862individual readings are joined with spaces and the meta-readings (e.g.
1863lacunae) are omitted. Optional specification of $start and $end allows
1864the generation of a subset of the witness text.
4e5a7b2c 1865
b0b4421a 1866=cut
1867
1868sub path_text {
861c3e27 1869 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1870 $start = $self->start unless $start;
1871 $end = $self->end unless $end;
861c3e27 1872 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1873 my $pathtext = '';
1874 my $last;
1875 foreach my $r ( @path ) {
6ad2ce78 1876 unless ( $r->join_prior || !$last || $last->join_next ) {
1877 $pathtext .= ' ';
1878 }
1879 $pathtext .= $r->text;
629e27b0 1880 $last = $r;
1881 }
1882 return $pathtext;
b0b4421a 1883}
4e5a7b2c 1884
1885=head1 INITIALIZATION METHODS
1886
1887These are mostly for use by parsers.
1888
1889=head2 make_witness_path( $witness )
1890
1891Link the array of readings contained in $witness->path (and in
1892$witness->uncorrected_path if it exists) into collation paths.
1893Clear out the arrays when finished.
de51424a 1894
4e5a7b2c 1895=head2 make_witness_paths
1896
1897Call make_witness_path for all witnesses in the tradition.
1898
1899=cut
930ff666 1900
7e450e44 1901# For use when a collation is constructed from a base text and an apparatus.
1902# We have the sequences of readings and just need to add path edges.
1f7aa795 1903# When we are done, clear out the witness path attributes, as they are no
1904# longer needed.
1905# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1906
6a222840 1907sub make_witness_paths {
1908 my( $self ) = @_;
910a0a6d 1909 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 1910 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 1911 $self->make_witness_path( $wit );
7854e12e 1912 }
7854e12e 1913}
1914
6a222840 1915sub make_witness_path {
7854e12e 1916 my( $self, $wit ) = @_;
1917 my @chain = @{$wit->path};
15d2d3df 1918 my $sig = $wit->sigil;
fae52efd 1919 # Add start and end if necessary
1920 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1921 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1922 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1923 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1924 }
1f7aa795 1925 if( $wit->is_layered ) {
d9e873d0 1926 @chain = @{$wit->uncorrected_path};
fae52efd 1927 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1928 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1929 foreach my $idx( 0 .. $#chain-1 ) {
1930 my $source = $chain[$idx];
1931 my $target = $chain[$idx+1];
1932 $self->add_path( $source, $target, $sig.$self->ac_label )
1933 unless $self->has_path( $source, $target, $sig );
1934 }
15d2d3df 1935 }
1f7aa795 1936 $wit->clear_path;
1937 $wit->clear_uncorrected_path;
e2902068 1938}
1939
4e5a7b2c 1940=head2 calculate_ranks
1941
1942Calculate the reading ranks (that is, their aligned positions relative
1943to each other) for the graph. This can only be called on linear collations.
1944
b365fbae 1945=begin testing
1946
1947use Text::Tradition;
1948
1949my $cxfile = 't/data/Collatex-16.xml';
1950my $t = Text::Tradition->new(
1951 'name' => 'inline',
1952 'input' => 'CollateX',
1953 'file' => $cxfile,
1954 );
1955my $c = $t->collation;
1956
1957# Make an svg
bfcbcecb 1958my $table = $c->alignment_table;
1959ok( $c->has_cached_table, "Alignment table was cached" );
1960is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1961$c->calculate_ranks;
bfcbcecb 1962is( $c->alignment_table, $table, "Cached table retained with no rank change" );
864ee4bf 1963$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
1964is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
1965$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
1966isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
b365fbae 1967
1968=end testing
1969
4e5a7b2c 1970=cut
1971
910a0a6d 1972sub calculate_ranks {
1973 my $self = shift;
b365fbae 1974 # Save the existing ranks, in case we need to invalidate the cached SVG.
8d9494a8 1975 throw( "Cannot calculate ranks on a non-linear graph" )
1976 unless $self->linear;
b365fbae 1977 my %existing_ranks;
ac4d7ac2 1978 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1979
1980 # Do the rankings based on the relationship equivalence graph, starting
1981 # with the start node.
56772e8c 1982 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1983
910a0a6d 1984 # Transfer our rankings from the topological graph to the real one.
1985 foreach my $r ( $self->readings ) {
cecbe56d 1986 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1987 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1988 } else {
63778331 1989 # Die. Find the last rank we calculated.
359944f7 1990 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1991 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1992 $self->readings;
1993 my $last = pop @all_defined;
1994 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1995 }
de51424a 1996 }
bfcbcecb 1997 # Do we need to invalidate the cached data?
be3af600 1998 if( $self->has_cached_table ) {
b365fbae 1999 foreach my $r ( $self->readings ) {
7c293912 2000 next if defined( $existing_ranks{$r} )
2001 && $existing_ranks{$r} == $r->rank;
c1915ab9 2002 # Something has changed, so clear the cache
bfcbcecb 2003 $self->_clear_cache;
c1915ab9 2004 # ...and recalculate the common readings.
2005 $self->calculate_common_readings();
b365fbae 2006 last;
2007 }
2008 }
c1915ab9 2009 # The graph calculation information is now up to date.
2010 $self->_graphcalc_done(1);
8e1394aa 2011}
3a1f2523 2012
c1915ab9 2013sub _clear_cache {
2014 my $self = shift;
c1915ab9 2015 $self->wipe_table if $self->has_cached_table;
2016}
2017
2018
4e5a7b2c 2019=head2 flatten_ranks
2020
2021A convenience method for parsing collation data. Searches the graph for readings
2022with the same text at the same rank, and merges any that are found.
2023
2024=cut
2025
0e476982 2026sub flatten_ranks {
4ef65ab4 2027 my ( $self, %args ) = shift;
0e476982 2028 my %unique_rank_rdg;
bf6e338d 2029 my $changed;
4ef65ab4 2030 foreach my $p ( $self->identical_readings( %args ) ) {
2031 # say STDERR "Combining readings at same rank: @$p";
2032 $changed = 1;
2033 $self->merge_readings( @$p );
2034 # TODO see if this now makes a common point.
7a0956c1 2035 }
2036 # If we merged readings, the ranks are still fine but the alignment
2037 # table is wrong. Wipe it.
2038 $self->wipe_table() if $changed;
2039}
2040
2041=head2 identical_readings
2042=head2 identical_readings( start => $startnode, end => $endnode )
2043=head2 identical_readings( startrank => $startrank, endrank => $endrank )
2044
2045Goes through the graph identifying all pairs of readings that appear to be
2046identical, and therefore able to be merged into a single reading. Returns the
2047relevant identical pairs. Can be restricted to run over only a part of the
2048graph, specified either by node or by rank.
2049
2050=cut
2051
2052sub identical_readings {
2053 my ( $self, %args ) = @_;
2054 # Find where we should start and end.
2055 my $startrank = $args{startrank} || 0;
2056 if( $args{start} ) {
2057 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2058 && $self->reading( $args{start} )->has_rank;
2059 $startrank = $self->reading( $args{start} )->rank;
2060 }
2061 my $endrank = $args{endrank} || $self->end->rank;
2062 if( $args{end} ) {
2063 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2064 && $self->reading( $args{end} )->has_rank;
3c234eb6 2065 $endrank = $self->reading( $args{end} )->rank;
7a0956c1 2066 }
2067
2068 # Make sure the ranks are correct.
2069 unless( $self->_graphcalc_done ) {
2070 $self->calculate_ranks;
2071 }
2072 # Go through the readings looking for duplicates.
2073 my %unique_rank_rdg;
2074 my @pairs;
0e476982 2075 foreach my $rdg ( $self->readings ) {
2076 next unless $rdg->has_rank;
7a0956c1 2077 my $rk = $rdg->rank;
2078 next if $rk > $endrank || $rk < $startrank;
2079 my $key = $rk . "||" . $rdg->text;
0e476982 2080 if( exists $unique_rank_rdg{$key} ) {
07e6765f 2081 # Make sure they don't have different grammatical forms
2082 my $ur = $unique_rank_rdg{$key};
a445ce40 2083 if( $rdg->is_identical( $ur ) ) {
7a0956c1 2084 push( @pairs, [ $ur, $rdg ] );
07e6765f 2085 }
0e476982 2086 } else {
2087 $unique_rank_rdg{$key} = $rdg;
2088 }
7a0956c1 2089 }
2090
2091 return @pairs;
0e476982 2092}
4633f9e4 2093
2094
d4b75f44 2095=head2 calculate_common_readings
2096
2097Goes through the graph identifying the readings that appear in every witness
2098(apart from those with lacunae at that spot.) Marks them as common and returns
2099the list.
2100
2101=begin testing
2102
2103use Text::Tradition;
2104
2105my $cxfile = 't/data/Collatex-16.xml';
2106my $t = Text::Tradition->new(
2107 'name' => 'inline',
2108 'input' => 'CollateX',
2109 'file' => $cxfile,
2110 );
2111my $c = $t->collation;
2112
2113my @common = $c->calculate_common_readings();
2114is( scalar @common, 8, "Found correct number of common readings" );
2115my @marked = sort $c->common_readings();
2116is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 2117my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 2118is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2119
2120=end testing
2121
2122=cut
2123
2124sub calculate_common_readings {
2125 my $self = shift;
2126 my @common;
c1915ab9 2127 map { $_->is_common( 0 ) } $self->readings;
2128 # Implicitly calls calculate_ranks
1dd07bda 2129 my $table = $self->alignment_table;
d4b75f44 2130 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 2131 my @row = map { $_->{'tokens'}->[$idx]
2132 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2133 @{$table->{'alignment'}};
d4b75f44 2134 my %hash;
2135 foreach my $r ( @row ) {
2136 if( $r ) {
2137 $hash{$r->id} = $r unless $r->is_meta;
2138 } else {
2139 $hash{'UNDEF'} = $r;
2140 }
2141 }
2142 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2143 my( $r ) = values %hash;
2144 $r->is_common( 1 );
2145 push( @common, $r );
2146 }
2147 }
2148 return @common;
2149}
2150
861c3e27 2151=head2 text_from_paths
2152
2153Calculate the text array for all witnesses from the path, for later consistency
2154checking. Only to be used if there is no non-graph-based way to know the
2155original texts.
2156
2157=cut
2158
2159sub text_from_paths {
2160 my $self = shift;
2161 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 2162 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2163 my @text;
2164 foreach my $r ( @readings ) {
2165 next if $r->is_meta;
2166 push( @text, $r->text );
2167 }
861c3e27 2168 $wit->text( \@text );
2169 if( $wit->is_layered ) {
5164a6f0 2170 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2171 $wit->sigil.$self->ac_label );
2172 my @uctext;
2173 foreach my $r ( @ucrdgs ) {
2174 next if $r->is_meta;
2175 push( @uctext, $r->text );
2176 }
2177 $wit->layertext( \@uctext );
861c3e27 2178 }
2179 }
2180}
0e476982 2181
4e5a7b2c 2182=head1 UTILITY FUNCTIONS
2183
2184=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 2185
4e5a7b2c 2186Find the last reading that occurs in sequence before both the given readings.
414cc046 2187At the very least this should be $self->start.
4e5a7b2c 2188
2189=head2 common_successor( $reading_a, $reading_b )
2190
2191Find the first reading that occurs in sequence after both the given readings.
414cc046 2192At the very least this should be $self->end.
4e5a7b2c 2193
22222af9 2194=begin testing
2195
2196use Text::Tradition;
2197
2198my $cxfile = 't/data/Collatex-16.xml';
2199my $t = Text::Tradition->new(
2200 'name' => 'inline',
2201 'input' => 'CollateX',
2202 'file' => $cxfile,
2203 );
2204my $c = $t->collation;
2205
679f17e1 2206is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 2207 'n20', "Found correct common predecessor" );
679f17e1 2208is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 2209 '__END__', "Found correct common successor" );
22222af9 2210
4e5a7b2c 2211is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 2212 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 2213is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 2214 '__END__', "Found correct common successor for readings on same path" );
22222af9 2215
2216=end testing
2217
2218=cut
2219
2220## Return the closest reading that is a predecessor of both the given readings.
2221sub common_predecessor {
2222 my $self = shift;
4e5a7b2c 2223 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 2224 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 2225}
2226
2227sub common_successor {
2228 my $self = shift;
4e5a7b2c 2229 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 2230 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 2231}
2232
414cc046 2233
2234# TODO think about how to do this without ranks...
027d819c 2235sub _common_in_path {
22222af9 2236 my( $self, $r1, $r2, $dir ) = @_;
414cc046 2237 my $iter = $self->end->rank;
22222af9 2238 my @candidates;
414cc046 2239 my @last_r1 = ( $r1 );
2240 my @last_r2 = ( $r2 );
2241 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 2242 my %all_seen;
6771a1b1 2243 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 2244 while( !@candidates ) {
414cc046 2245 last unless $iter--; # Avoid looping infinitely
2246 # Iterate separately down the graph from r1 and r2
2247 my( @new_lc1, @new_lc2 );
2248 foreach my $lc ( @last_r1 ) {
2249 foreach my $p ( $lc->$dir ) {
2250 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 2251 # say STDERR "Path candidate $p from $lc";
414cc046 2252 push( @candidates, $p );
002e3600 2253 } elsif( !$all_seen{$p->id} ) {
414cc046 2254 $all_seen{$p->id} = 'r1';
2255 push( @new_lc1, $p );
2256 }
2257 }
2258 }
2259 foreach my $lc ( @last_r2 ) {
22222af9 2260 foreach my $p ( $lc->$dir ) {
414cc046 2261 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 2262 # say STDERR "Path candidate $p from $lc";
22222af9 2263 push( @candidates, $p );
002e3600 2264 } elsif( !$all_seen{$p->id} ) {
414cc046 2265 $all_seen{$p->id} = 'r2';
2266 push( @new_lc2, $p );
22222af9 2267 }
2268 }
2269 }
414cc046 2270 @last_r1 = @new_lc1;
2271 @last_r2 = @new_lc2;
22222af9 2272 }
2273 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2274 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2275}
2276
63778331 2277sub throw {
2278 Text::Tradition::Error->throw(
2279 'ident' => 'Collation error',
2280 'message' => $_[0],
2281 );
2282}
2283
dd3b58b0 2284no Moose;
2285__PACKAGE__->meta->make_immutable;
e867486f 2286
a445ce40 2287=head1 BUGS/TODO
2288
2289=over
2290
2291=item * Rework XML serialization in a more modular way
2292
2293=back
2294
027d819c 2295=head1 LICENSE
e867486f 2296
027d819c 2297This package is free software and is provided "as is" without express
2298or implied warranty. You can redistribute it and/or modify it under
2299the same terms as Perl itself.
e867486f 2300
027d819c 2301=head1 AUTHOR
e867486f 2302
027d819c 2303Tara L Andrews E<lt>aurum@cpan.orgE<gt>