save Ternary data type to graphml too
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
82fa4d57 9use Text::CSV;
5c0072ef 10use Text::Tradition::Collation::Data;
b15511bf 11use Text::Tradition::Collation::Reading;
22222af9 12use Text::Tradition::Collation::RelationshipStore;
63778331 13use Text::Tradition::Error;
cc31ebaa 14use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
5c0072ef 15use XML::LibXML;
16use XML::LibXML::XPathContext;
dd3b58b0 17use Moose;
18
5c0072ef 19has _data => (
20 isa => 'Text::Tradition::Collation::Data',
21 is => 'ro',
22 required => 1,
23 handles => [ qw(
24 sequence
25 paths
26 _set_relations
27 relations
28 _set_start
29 _set_end
30 ac_label
31 has_cached_table
32 relationships
33 related_readings
34 get_relationship
35 del_relationship
36 equivalence
37 equivalence_graph
38 readings
39 reading
40 _add_reading
41 del_reading
42 has_reading
43 wit_list_separator
44 baselabel
45 linear
46 wordsep
47 start
48 end
49 cached_table
50 _graphcalc_done
51 has_cached_svg
52 wipe_table
53 )]
54);
dd3b58b0 55
3a2ebbf4 56has 'tradition' => (
57 is => 'ro',
d047cd52 58 isa => 'Text::Tradition',
8cfd99c4 59 writer => '_set_tradition',
8d9a1cd8 60 weak_ref => 1,
d047cd52 61 );
dd3b58b0 62
4e5a7b2c 63=head1 NAME
64
65Text::Tradition::Collation - a software model for a text collation
66
67=head1 SYNOPSIS
68
69 use Text::Tradition;
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
72 'input' => 'TEI',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
74
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
79
80 my $svg_variant_graph = $t->collation->as_svg();
81
82=head1 DESCRIPTION
83
84Text::Tradition is a library for representation and analysis of collated
85texts, particularly medieval ones. The Collation is the central feature of
86a Tradition, where the text, its sequence of readings, and its relationships
87between readings are actually kept.
88
89=head1 CONSTRUCTOR
90
91=head2 new
92
93The constructor. Takes a hash or hashref of the following arguments:
94
95=over
96
97=item * tradition - The Text::Tradition object to which the collation
98belongs. Required.
99
100=item * linear - Whether the collation should be linear; that is, whether
101transposed readings should be treated as two linked readings rather than one,
102and therefore whether the collation graph is acyclic. Defaults to true.
103
4e5a7b2c 104=item * baselabel - The default label for the path taken by a base text
105(if any). Defaults to 'base text'.
106
107=item * wit_list_separator - The string to join a list of witnesses for
108purposes of making labels in display graphs. Defaults to ', '.
109
110=item * ac_label - The extra label to tack onto a witness sigil when
111representing another layer of path for the given witness - that is, when
112a text has more than one possible reading due to scribal corrections or
113the like. Defaults to ' (a.c.)'.
114
4e483aa5 115=item * wordsep - The string used to separate words in the original text.
116Defaults to ' '.
117
4e5a7b2c 118=back
119
120=head1 ACCESSORS
121
122=head2 tradition
123
124=head2 linear
125
4e5a7b2c 126=head2 wit_list_separator
127
128=head2 baselabel
129
130=head2 ac_label
131
4e483aa5 132=head2 wordsep
133
4e5a7b2c 134Simple accessors for collation attributes.
135
136=head2 start
137
138The meta-reading at the start of every witness path.
139
140=head2 end
141
142The meta-reading at the end of every witness path.
143
144=head2 readings
145
146Returns all Reading objects in the graph.
147
148=head2 reading( $id )
149
150Returns the Reading object corresponding to the given ID.
151
152=head2 add_reading( $reading_args )
153
154Adds a new reading object to the collation.
155See L<Text::Tradition::Collation::Reading> for the available arguments.
156
157=head2 del_reading( $object_or_id )
158
159Removes the given reading from the collation, implicitly removing its
160paths and relationships.
161
4e5a7b2c 162=head2 has_reading( $id )
163
164Predicate to see whether a given reading ID is in the graph.
165
166=head2 reading_witnesses( $object_or_id )
167
168Returns a list of sigils whose witnesses contain the reading.
169
170=head2 paths
171
172Returns all reading paths within the document - that is, all edges in the
173collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
174
175=head2 add_path( $source, $target, $sigil )
176
177Links the given readings in the collation in sequence, under the given witness
178sigil. The readings may be specified by object or ID.
179
180=head2 del_path( $source, $target, $sigil )
181
182Links the given readings in the collation in sequence, under the given witness
183sigil. The readings may be specified by object or ID.
184
185=head2 has_path( $source, $target );
186
187Returns true if the two readings are linked in sequence in any witness.
188The readings may be specified by object or ID.
189
190=head2 relationships
191
192Returns all Relationship objects in the collation.
193
194=head2 add_relationship( $reading, $other_reading, $options )
195
196Adds a new relationship of the type given in $options between the two readings,
197which may be specified by object or ID. Returns a value of ( $status, @vectors)
198where $status is true on success, and @vectors is a list of relationship edges
199that were ultimately added.
200See L<Text::Tradition::Collation::Relationship> for the available options.
201
202=cut
dd3b58b0 203
5c0072ef 204sub BUILDARGS {
205 my ( $class, @args ) = @_;
206 my %args = @args == 1 ? %{ $args[0] } : @args;
207 # TODO determine these from the Moose::Meta object
208 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
209 linear wordsep start end cached_table _graphcalc_done);
210 my %data_args;
211 for my $attr (@delegate_attrs) {
212 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
213 }
214 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
215 return \%args;
216}
217
d047cd52 218sub BUILD {
3a2ebbf4 219 my $self = shift;
22222af9 220 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 221 $self->_set_start( $self->add_reading(
222 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
223 $self->_set_end( $self->add_reading(
224 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 225}
784877d9 226
24efa55d 227sub register_relationship_type {
228 my $self = shift;
229 my %args = @_ == 1 ? %{$_[0]} : @_;
230 if( $self->relations->has_type( $args{name} ) ) {
231 throw( 'Relationship type ' . $args{name} . ' already registered' );
232 }
233 $self->relations->add_type( %args );
234}
235
39d1ddc7 236sub get_relationship_type {
237 my( $self, $name ) = @_;
238 return $self->relations->has_type( $name )
239 ? $self->relations->type( $name ) : undef;
240}
241
3a2ebbf4 242### Reading construct/destruct functions
243
244sub add_reading {
245 my( $self, $reading ) = @_;
246 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
247 my %args = %$reading;
e4b73942 248 if( $args{'init'} ) {
249 # If we are initializing an empty collation, don't assume that we
250 # have set a tradition.
251 delete $args{'init'};
8943ff68 252 } elsif( $self->tradition->can('language') && $self->tradition->has_language
253 && !exists $args{'language'} ) {
6ad2ce78 254 $args{'language'} = $self->tradition->language;
255 }
3a2ebbf4 256 $reading = Text::Tradition::Collation::Reading->new(
257 'collation' => $self,
258 %args );
259 }
260 # First check to see if a reading with this ID exists.
261 if( $self->reading( $reading->id ) ) {
63778331 262 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 263 }
c1915ab9 264 $self->_graphcalc_done(0);
3a2ebbf4 265 $self->_add_reading( $reading->id => $reading );
266 # Once the reading has been added, put it in both graphs.
267 $self->sequence->add_vertex( $reading->id );
cecbe56d 268 $self->relations->add_reading( $reading->id );
3a2ebbf4 269 return $reading;
eca16057 270};
271
3a2ebbf4 272around del_reading => sub {
273 my $orig = shift;
274 my $self = shift;
275 my $arg = shift;
276
cecbe56d 277 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
278 $arg = $arg->id;
3a2ebbf4 279 }
3a2ebbf4 280 # Remove the reading from the graphs.
c1915ab9 281 $self->_graphcalc_done(0);
4e483aa5 282 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 283 $self->sequence->delete_vertex( $arg );
284 $self->relations->delete_reading( $arg );
3a2ebbf4 285
286 # Carry on.
cecbe56d 287 $self->$orig( $arg );
3a2ebbf4 288};
7854e12e 289
3c234eb6 290=head2 merge_readings( $main, $second, $concatenate, $with_str )
291
292Merges the $second reading into the $main one. If $concatenate is true, then
293the merged node will carry the text of both readings, concatenated with either
294$with_str (if specified) or a sensible default (the empty string if the
295appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
296
297The first two arguments may be either readings or reading IDs.
298
4e483aa5 299=begin testing
300
301use Text::Tradition;
58568d5c 302use TryCatch;
4e483aa5 303
304my $cxfile = 't/data/Collatex-16.xml';
305my $t = Text::Tradition->new(
306 'name' => 'inline',
307 'input' => 'CollateX',
308 'file' => $cxfile,
309 );
310my $c = $t->collation;
311
312my $rno = scalar $c->readings;
4ef65ab4 313# Split n21 ('unto') for testing purposes
4e483aa5 314my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
315my $old_r = $c->reading( 'n21' );
316$old_r->alter_text( 'to' );
317$c->del_path( 'n20', 'n21', 'A' );
318$c->add_path( 'n20', 'n21p0', 'A' );
319$c->add_path( 'n21p0', 'n21', 'A' );
7a0956c1 320$c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
4e483aa5 321$c->flatten_ranks();
322ok( $c->reading( 'n21p0' ), "New reading exists" );
323is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
324
679f17e1 325# Combine n3 and n4 ( with his )
4e483aa5 326$c->merge_readings( 'n3', 'n4', 1 );
327ok( !$c->reading('n4'), "Reading n4 is gone" );
328is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
329
679f17e1 330# Collapse n9 and n10 ( rood / root )
331$c->merge_readings( 'n9', 'n10' );
332ok( !$c->reading('n10'), "Reading n10 is gone" );
333is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 334
58568d5c 335# Try to combine n21 and n21p0. This should break.
4e483aa5 336my $remaining = $c->reading('n21');
337$remaining ||= $c->reading('n22'); # one of these should still exist
58568d5c 338try {
339 $c->merge_readings( 'n21p0', $remaining, 1 );
340 ok( 0, "Bad reading merge changed the graph" );
341} catch( Text::Tradition::Error $e ) {
342 like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
343} catch {
344 ok( 0, "Unexpected error on bad reading merge: $@" );
345}
346
347try {
348 $c->calculate_ranks();
349 ok( 1, "Graph is still evidently whole" );
350} catch( Text::Tradition::Error $e ) {
351 ok( 0, "Caught a rank exception: " . $e->message );
352}
4e483aa5 353
354=end testing
355
356=cut
7854e12e 357
3a2ebbf4 358sub merge_readings {
359 my $self = shift;
360
56772e8c 361 # Sanity check
362 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
363 my $mergemeta = $kept_obj->is_meta;
364 throw( "Cannot merge meta and non-meta reading" )
365 unless ( $mergemeta && $del_obj->is_meta )
366 || ( !$mergemeta && !$del_obj->is_meta );
367 if( $mergemeta ) {
368 throw( "Cannot merge with start or end node" )
369 if( $kept_obj eq $self->start || $kept_obj eq $self->end
370 || $del_obj eq $self->start || $del_obj eq $self->end );
a445ce40 371 throw( "Cannot combine text of meta readings" ) if $combine;
56772e8c 372 }
e4f00901 373 # We can only merge readings in a linear graph if:
374 # - they are contiguous with only one edge between them, OR
375 # - they are at equivalent ranks in the graph.
376 if( $self->linear ) {
377 my @delpred = $del_obj->predecessors;
378 my @keptsuc = $kept_obj->successors;
379 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
380 && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
381 my( $is_ok, $msg ) = $self->relations->relationship_valid(
382 $kept_obj, $del_obj, 'collated' );
383 unless( $is_ok ) {
384 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
385 }
386 }
387 }
388
3a2ebbf4 389 # We only need the IDs for adding paths to the graph, not the reading
390 # objects themselves.
56772e8c 391 my $kept = $kept_obj->id;
392 my $deleted = $del_obj->id;
c1915ab9 393 $self->_graphcalc_done(0);
10943ab0 394
3a2ebbf4 395 # The kept reading should inherit the paths and the relationships
396 # of the deleted reading.
397 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
398 my @vector = ( $kept );
399 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
400 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 401 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 402 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
403 $self->sequence->add_edge( @vector );
404 my $fwits = $self->sequence->get_edge_attributes( @vector );
405 @wits{keys %$fwits} = values %$fwits;
406 $self->sequence->set_edge_attributes( @vector, \%wits );
407 }
cecbe56d 408 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 409
410 # Do the deletion deed.
4e483aa5 411 if( $combine ) {
869a1ada 412 # Combine the text of the readings
4e483aa5 413 my $joinstr = $combine_char;
414 unless( defined $joinstr ) {
415 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
416 $joinstr = $self->wordsep unless defined $joinstr;
417 }
a445ce40 418 $kept_obj->_combine( $del_obj, $joinstr );
49d4f2ac 419 }
3a2ebbf4 420 $self->del_reading( $deleted );
421}
7854e12e 422
8d9494a8 423=head2 merge_related( @relationship_types )
424
425Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
426
427WARNING: This operation cannot be undone.
428
429=cut
430
431=begin testing
432
58568d5c 433use Test::Warn;
8d9494a8 434use Text::Tradition;
435use TryCatch;
436
58568d5c 437my $t;
438warnings_exist {
439 $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
440} [qr/Cannot set relationship on a meta reading/],
441 "Got expected relationship drop warning on parse";
442
8d9494a8 443my $c = $t->collation;
444
445my %rdg_ids;
446map { $rdg_ids{$_} = 1 } $c->readings;
447$c->merge_related( 'orthographic' );
448is( scalar( $c->readings ), keys( %rdg_ids ) - 8,
449 "Successfully collapsed orthographic variation" );
450map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.1 /;
451foreach my $rid ( keys %rdg_ids ) {
452 my $exp = $rdg_ids{$rid};
453 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
454 ( $exp ? "retained" : "removed" ) );
455}
456ok( $c->linear, "Graph is still linear" );
457try {
458 $c->calculate_ranks; # This should succeed
459 ok( 1, "Can still calculate ranks on the new graph" );
460} catch {
461 ok( 0, "Rank calculation on merged graph failed: $@" );
462}
463
464# Now add some transpositions
465$c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
466$c->merge_related( 'transposition' );
467is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
468 "Transposed relationship is merged away" );
469ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
470ok( !$c->linear, "Graph is no longer linear" );
471try {
472 $c->calculate_ranks; # This should fail
473 ok( 0, "Rank calculation happened on nonlinear graph?!" );
474} catch ( Text::Tradition::Error $e ) {
475 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
476 "Rank calculation on merged graph threw an error" );
477}
478
8d9494a8 479=end testing
480
481=cut
482
483# TODO: there should be a way to display merged without affecting the underlying data!
484
485sub merge_related {
486 my $self = shift;
487 my %reltypehash;
488 map { $reltypehash{$_} = 1 } @_;
489
490 # Set up the filter for finding related readings
491 my $filter = sub {
492 exists $reltypehash{$_[0]->type};
493 };
494
8d9494a8 495 # Go through all readings looking for related ones
496 foreach my $r ( $self->readings ) {
497 next unless $self->reading( "$r" ); # might have been deleted meanwhile
498 my @related = $self->related_readings( $r, $filter );
499 if( @related ) {
500 push( @related, $r );
501 @related = sort {
502 scalar $b->witnesses <=> scalar $a->witnesses
503 } @related;
504 my $keep = shift @related;
505 foreach my $delr ( @related ) {
58568d5c 506 $self->linear( 0 )
8d9494a8 507 unless( $self->get_relationship( $keep, $delr )->colocated );
508 $self->merge_readings( $keep, $delr );
509 }
510 }
511 }
8d9494a8 512}
513
6771a1b1 514=head2 compress_readings
515
516Where possible in the graph, compresses plain sequences of readings into a
517single reading. The sequences must consist of readings with no
518relationships to other readings, with only a single witness path between
519them and no other witness paths from either that would skip the other. The
520readings must also not be marked as nonsense or bad grammar.
521
522WARNING: This operation cannot be undone.
523
524=cut
525
526sub compress_readings {
527 my $self = shift;
528 # Anywhere in the graph that there is a reading that joins only to a single
529 # successor, and neither of these have any relationships, just join the two
530 # readings.
6771a1b1 531 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 532 # Now look for readings that can be joined to their successors.
a445ce40 533 next unless $rdg->is_combinable;
6771a1b1 534 my %seen;
535 while( $self->sequence->successors( $rdg ) == 1 ) {
536 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
537 throw( "Infinite loop" ) if $seen{$next->id};
538 $seen{$next->id} = 1;
539 last if $self->sequence->predecessors( $next ) > 1;
a445ce40 540 last unless $next->is_combinable;
6771a1b1 541 say "Joining readings $rdg and $next";
542 $self->merge_readings( $rdg, $next, 1 );
543 }
544 }
545 # Make sure we haven't screwed anything up
546 foreach my $wit ( $self->tradition->witnesses ) {
547 my $pathtext = $self->path_text( $wit->sigil );
548 my $origtext = join( ' ', @{$wit->text} );
549 throw( "Text differs for witness " . $wit->sigil )
550 unless $pathtext eq $origtext;
551 if( $wit->is_layered ) {
552 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
553 $origtext = join( ' ', @{$wit->layertext} );
554 throw( "Ante-corr text differs for witness " . $wit->sigil )
555 unless $pathtext eq $origtext;
556 }
557 }
558
559 $self->relations->rebuild_equivalence();
560 $self->calculate_ranks();
561}
3265b0ce 562
3a2ebbf4 563# Helper function for manipulating the graph.
564sub _stringify_args {
4e483aa5 565 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 566 $first = $first->id
567 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
568 $second = $second->id
569 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 570 return( $first, $second, @args );
3a2ebbf4 571}
df6d9812 572
4e5a7b2c 573# Helper function for manipulating the graph.
574sub _objectify_args {
575 my( $self, $first, $second, $arg ) = @_;
576 $first = $self->reading( $first )
577 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
578 $second = $self->reading( $second )
579 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
580 return( $first, $second, $arg );
581}
f97ef19e 582
583=head2 duplicate_reading( $reading, @witlist )
584
585Split the given reading into two, so that the new reading is in the path for
ef73c20a 586the witnesses given in @witlist. If the result is that certain non-colocated
587relationships (e.g. transpositions) are no longer valid, these will be removed.
588Returns the newly-created reading.
f97ef19e 589
590=begin testing
591
68e48c06 592use Test::More::UTF8;
f97ef19e 593use Text::Tradition;
68e48c06 594use TryCatch;
f97ef19e 595
596my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
597is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
598ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
599
600my $sc = $st->collation;
601my $numr = 17;
602ok( $sc->reading('n131'), "Tradition has the affected reading" );
603is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
604is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
605
606# Detach the erroneously collated reading
2dcb5d11 607my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
ef73c20a 608ok( $newr, "New reading was created" );
f97ef19e 609ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
610is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
611is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
3c234eb6 612my $csucc = $sc->common_successor( 'n131', 'n131_0' );
613is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
f97ef19e 614
615# Check that the bad transposition is gone
2dcb5d11 616is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
f97ef19e 617is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
618
e19635f8 619# The collation should not be fixed
620my @pairs = $sc->identical_readings();
621is( scalar @pairs, 0, "Not re-collated yet" );
f97ef19e 622# Fix the collation
3c234eb6 623ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
e19635f8 624@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
3c234eb6 625is( scalar @pairs, 3, "Found three more identical readings" );
e19635f8 626is( $sc->end->rank, 11, "The ranks shifted appropriately" );
3c234eb6 627$sc->flatten_ranks();
f97ef19e 628is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
629
68e48c06 630# Check that we can't "duplicate" a reading with no wits or with all wits
631try {
632 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
633 ok( 0, "Reading duplication without witnesses throws an error" );
634} catch( Text::Tradition::Error $e ) {
635 like( $e->message, qr/Must specify one or more witnesses/,
636 "Reading duplication without witnesses throws the expected error" );
637} catch {
638 ok( 0, "Reading duplication without witnesses threw the wrong error" );
639}
640
641try {
642 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
643 ok( 0, "Reading duplication with all witnesses throws an error" );
644} catch( Text::Tradition::Error $e ) {
645 like( $e->message, qr/Cannot join all witnesses/,
646 "Reading duplication with all witnesses throws the expected error" );
647} catch {
648 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
649}
650
58568d5c 651try {
652 $sc->calculate_ranks();
653 ok( 1, "Graph is still evidently whole" );
654} catch( Text::Tradition::Error $e ) {
655 ok( 0, "Caught a rank exception: " . $e->message );
656}
657
f97ef19e 658=end testing
659
660=cut
661
662sub duplicate_reading {
663 my( $self, $r, @wits ) = @_;
68e48c06 664 # Check that we are not doing anything unwise.
665 throw( "Must specify one or more witnesses for the duplicated reading" )
666 unless @wits;
f97ef19e 667 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
668 $r = $self->reading( $r );
669 }
670 throw( "Cannot duplicate a meta-reading" )
671 if $r->is_meta;
68e48c06 672 throw( "Cannot join all witnesses to the new reading" )
47cd4d16 673 if scalar( @wits ) == scalar( $r->witnesses );
68e48c06 674
f97ef19e 675 # Get all the reading attributes and duplicate them.
676 my $rmeta = Text::Tradition::Collation::Reading->meta;
677 my %args;
678 foreach my $attr( $rmeta->get_all_attributes ) {
679 next if $attr->name =~ /^_/;
680 my $acc = $attr->get_read_method;
681 if( !$acc && $attr->has_applied_traits ) {
682 my $tr = $attr->applied_traits;
683 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
684 my $which = $1;
685 my %methods = reverse %{$attr->handles};
686 $acc = $methods{elements};
687 $args{$attr->name} = $which eq 'Array'
688 ? [ $r->$acc ] : { $r->$acc };
689 }
690 } else {
691 $args{$attr->name} = $r->$acc if $acc;
692 }
693 }
694 # By definition the new reading will no longer be common.
695 $args{is_common} = 0;
696 # The new reading also needs its own ID.
697 $args{id} = $self->_generate_dup_id( $r->id );
698
699 # Try to make the new reading.
700 my $newr = $self->add_reading( \%args );
701 # The old reading is also no longer common.
702 $r->is_common( 0 );
703
704 # For each of the witnesses, dissociate from the old reading and
705 # associate with the new.
706 foreach my $wit ( @wits ) {
707 my $prior = $self->prior_reading( $r, $wit );
708 my $next = $self->next_reading( $r, $wit );
709 $self->del_path( $prior, $r, $wit );
710 $self->add_path( $prior, $newr, $wit );
711 $self->del_path( $r, $next, $wit );
712 $self->add_path( $newr, $next, $wit );
713 }
714
3c234eb6 715 # If the graph is ranked, we need to look for relationships that are now
716 # invalid (i.e. 'non-colocation' types that might now be colocated) and
717 # remove them. If not, we can skip it.
f97ef19e 718 my $succ;
3c234eb6 719 my %rrk;
2dcb5d11 720 my @deleted_relations;
f97ef19e 721 if( $self->end->has_rank ) {
3c234eb6 722 # Find the point where we can stop checking
f97ef19e 723 $succ = $self->common_successor( $r, $newr );
3c234eb6 724
725 # Hash the existing ranks
f97ef19e 726 foreach my $rdg ( $self->readings ) {
727 $rrk{$rdg->id} = $rdg->rank;
728 }
3c234eb6 729 # Calculate the new ranks
730 $self->calculate_ranks();
f97ef19e 731
3c234eb6 732 # Check for invalid non-colocated relationships among changed-rank readings
733 # from where the ranks start changing up to $succ
f97ef19e 734 my $lastrank = $succ->rank;
735 foreach my $rdg ( $self->readings ) {
736 next if $rdg->rank > $lastrank;
737 next if $rdg->rank == $rrk{$rdg->id};
738 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
739 next unless @noncolo;
740 foreach my $nc ( @noncolo ) {
2dcb5d11 741 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
f6a1d5f0 742 push( @deleted_relations, [ $rdg->id, $nc->id ] );
2dcb5d11 743 }
f97ef19e 744 }
745 }
ef73c20a 746 }
2dcb5d11 747 return ( $newr, @deleted_relations );
f97ef19e 748}
749
750sub _generate_dup_id {
751 my( $self, $rid ) = @_;
752 my $newid;
753 my $i = 0;
754 while( !$newid ) {
755 $newid = $rid."_$i";
756 if( $self->has_reading( $newid ) ) {
757 $newid = '';
758 $i++;
759 }
760 }
761 return $newid;
762}
763
3a2ebbf4 764### Path logic
765
766sub add_path {
767 my $self = shift;
768
769 # We only need the IDs for adding paths to the graph, not the reading
770 # objects themselves.
cecbe56d 771 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 772
c1915ab9 773 $self->_graphcalc_done(0);
3a2ebbf4 774 # Connect the readings
359944f7 775 unless( $self->sequence->has_edge( $source, $target ) ) {
776 $self->sequence->add_edge( $source, $target );
777 $self->relations->add_equivalence_edge( $source, $target );
778 }
3a2ebbf4 779 # Note the witness in question
780 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 781}
b15511bf 782
3a2ebbf4 783sub del_path {
784 my $self = shift;
49d4f2ac 785 my @args;
786 if( ref( $_[0] ) eq 'ARRAY' ) {
787 my $e = shift @_;
788 @args = ( @$e, @_ );
789 } else {
790 @args = @_;
791 }
3a2ebbf4 792
f97ef19e 793 # We only need the IDs for removing paths from the graph, not the reading
3a2ebbf4 794 # objects themselves.
49d4f2ac 795 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 796
c1915ab9 797 $self->_graphcalc_done(0);
3a2ebbf4 798 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 799 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 800 }
30897024 801 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
3a2ebbf4 802 $self->sequence->delete_edge( $source, $target );
359944f7 803 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 804 }
784877d9 805}
806
3a2ebbf4 807
15d2d3df 808# Extra graph-alike utility
809sub has_path {
3a2ebbf4 810 my $self = shift;
811 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
812 return undef unless $self->sequence->has_edge( $source, $target );
813 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 814}
815
4e5a7b2c 816=head2 clear_witness( @sigil_list )
3a2ebbf4 817
4e5a7b2c 818Clear the given witnesses out of the collation entirely, removing references
819to them in paths, and removing readings that belong only to them. Should only
820be called via $tradition->del_witness.
3a2ebbf4 821
822=cut
823
4e5a7b2c 824sub clear_witness {
825 my( $self, @sigils ) = @_;
826
c1915ab9 827 $self->_graphcalc_done(0);
4e5a7b2c 828 # Clear the witness(es) out of the paths
829 foreach my $e ( $self->paths ) {
830 foreach my $sig ( @sigils ) {
831 $self->del_path( $e, $sig );
832 }
833 }
834
835 # Clear out the newly unused readings
836 foreach my $r ( $self->readings ) {
837 unless( $self->reading_witnesses( $r ) ) {
838 $self->del_reading( $r );
839 }
840 }
841}
3a2ebbf4 842
843sub add_relationship {
844 my $self = shift;
22222af9 845 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 846 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
864ee4bf 847 foreach my $v ( @vectors ) {
848 next unless $self->get_relationship( $v )->colocated;
849 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
850 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
851 $self->_graphcalc_done(0);
852 $self->_clear_cache;
853 last;
854 }
855 }
63778331 856 return @vectors;
22222af9 857}
ef9d481f 858
ca6e6095 859around qw/ get_relationship del_relationship / => sub {
860 my $orig = shift;
861 my $self = shift;
862 my @args = @_;
863 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
864 @args = @{$_[0]};
865 }
e19635f8 866 my @stringargs = $self->_stringify_args( @args );
867 $self->$orig( @stringargs );
ca6e6095 868};
869
22222af9 870=head2 reading_witnesses( $reading )
910a0a6d 871
22222af9 872Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 873
22222af9 874=cut
1d310495 875
1d310495 876sub reading_witnesses {
877 my( $self, $reading ) = @_;
878 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 879 # arbitrarily chosen "incoming". Thus, special-case the start node.
880 if( $reading eq $self->start ) {
bed6ce83 881 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
96dc90ec 882 }
1d310495 883 my %all_witnesses;
884 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
885 my $wits = $self->sequence->get_edge_attributes( @$e );
886 @all_witnesses{ keys %$wits } = 1;
887 }
c12bb878 888 my $acstr = $self->ac_label;
889 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
890 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
891 }
1d310495 892 return keys %all_witnesses;
910a0a6d 893}
894
4e5a7b2c 895=head1 OUTPUT METHODS
8e1394aa 896
0ecb975c 897=head2 as_svg( \%options )
8e1394aa 898
0068967c 899Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 900See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 901
902=cut
903
904sub as_svg {
0ecb975c 905 my( $self, $opts ) = @_;
bfcbcecb 906 throw( "Need GraphViz installed to output SVG" )
907 unless File::Which::which( 'dot' );
e247aad1 908 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 909 $self->calculate_ranks()
910 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
be3af600 911 my @cmd = qw/dot -Tsvg/;
912 my( $svg, $err );
913 my $dotfile = File::Temp->new();
914 ## USE FOR DEBUGGING
915 # $dotfile->unlink_on_destroy(0);
916 binmode $dotfile, ':utf8';
917 print $dotfile $self->as_dot( $opts );
918 push( @cmd, $dotfile->filename );
919 run( \@cmd, ">", binary(), \$svg );
920 $svg = decode_utf8( $svg );
921 return $svg;
8e1394aa 922}
923
b22576c6 924
0ecb975c 925=head2 as_dot( \%options )
b22576c6 926
0ecb975c 927Returns a string that is the collation graph expressed in dot
928(i.e. GraphViz) format. Options include:
b22576c6 929
0ecb975c 930=over 4
b22576c6 931
0ecb975c 932=item * from
b22576c6 933
0ecb975c 934=item * to
df6d9812 935
0ecb975c 936=item * color_common
937
938=back
df6d9812 939
940=cut
941
942sub as_dot {
0ecb975c 943 my( $self, $opts ) = @_;
944 my $startrank = $opts->{'from'} if $opts;
945 my $endrank = $opts->{'to'} if $opts;
946 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 947 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
948 && $self->end->rank > 100;
6648ee3d 949 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 950
b22576c6 951 # Check the arguments
952 if( $startrank ) {
953 return if $endrank && $startrank > $endrank;
954 return if $startrank > $self->end->rank;
955 }
956 if( defined $endrank ) {
957 return if $endrank < 0;
f1b3b33a 958 $endrank = undef if $endrank == $self->end->rank;
b22576c6 959 }
960
67da8d6c 961 my $graph_name = $self->tradition->name;
962 $graph_name =~ s/[^\w\s]//g;
963 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 964
965 my %graph_attrs = (
966 'rankdir' => 'LR',
967 'bgcolor' => 'none',
968 );
969 my %node_attrs = (
b8990398 970 'fontsize' => 14,
f13b5582 971 'fillcolor' => 'white',
972 'style' => 'filled',
973 'shape' => 'ellipse'
974 );
975 my %edge_attrs = (
976 'arrowhead' => 'open',
977 'color' => '#000000',
978 'fontcolor' => '#000000',
979 );
980
67da8d6c 981 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 982 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
983 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 984
b22576c6 985 # Output substitute start/end readings if necessary
986 if( $startrank ) {
43c94341 987 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 988 }
989 if( $endrank ) {
43c94341 990 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 991 }
b365fbae 992 if( $STRAIGHTENHACK ) {
993 ## HACK part 1
43c94341 994 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
995 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 996 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
997 }
b22576c6 998 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 999 # Sort the readings by rank if we have ranks; this speeds layout.
1000 my @all_readings = $self->end->has_rank
1001 ? sort { $a->rank <=> $b->rank } $self->readings
1002 : $self->readings;
4633f9e4 1003 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 1004 foreach my $reading ( @all_readings ) {
b22576c6 1005 # Only output readings within our rank range.
1006 next if $startrank && $reading->rank < $startrank;
1007 next if $endrank && $reading->rank > $endrank;
1008 $used{$reading->id} = 1;
910a0a6d 1009 # Need not output nodes without separate labels
3a2ebbf4 1010 next if $reading->id eq $reading->text;
d4b75f44 1011 my $rattrs;
30f0df34 1012 my $label = $reading->text;
629e27b0 1013 $label .= '-' if $reading->join_next;
1014 $label = "-$label" if $reading->join_prior;
8f9cab7b 1015 $label =~ s/\"/\\\"/g;
d4b75f44 1016 $rattrs->{'label'} = $label;
10e4b1ac 1017 $rattrs->{'id'} = $reading->id;
0ecb975c 1018 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 1019 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 1020 }
3a2ebbf4 1021
30ddc24c 1022 # Add the real edges. Need to weight one edge per rank jump, in a
1023 # continuous line.
b365fbae 1024 # my $weighted = $self->_add_edge_weights;
b22576c6 1025 my @edges = $self->paths;
3bdec618 1026 my( %substart, %subend );
b22576c6 1027 foreach my $edge ( @edges ) {
1028 # Do we need to output this edge?
508fd430 1029 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
57560672 1030 my $label = $self->_path_display_label( $opts,
1031 $self->path_witnesses( $edge ) );
f13b5582 1032 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 1033
b22576c6 1034 # Account for the rank gap if necessary
30ddc24c 1035 my $rank0 = $self->reading( $edge->[0] )->rank
1036 if $self->reading( $edge->[0] )->has_rank;
1037 my $rank1 = $self->reading( $edge->[1] )->rank
1038 if $self->reading( $edge->[1] )->has_rank;
1039 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1040 $variables->{'minlen'} = $rank1 - $rank0;
1041 }
1042
1043 # Add the calculated edge weights
b365fbae 1044 # if( exists $weighted->{$edge->[0]}
e247aad1 1045 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
1046 # # $variables->{'color'} = 'red';
1047 # $variables->{'weight'} = 3.0;
1048 # }
30ddc24c 1049
508fd430 1050 # EXPERIMENTAL: make edge width reflect no. of witnesses
1051 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1052 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1053
f13b5582 1054 my $varopts = _dot_attr_string( $variables );
1055 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1056 $edge->[0], $edge->[1], $varopts );
3bdec618 1057 } elsif( $used{$edge->[0]} ) {
96ba0418 1058 $subend{$edge->[0]} = $edge->[1];
3bdec618 1059 } elsif( $used{$edge->[1]} ) {
96ba0418 1060 $substart{$edge->[1]} = $edge->[0];
b22576c6 1061 }
df6d9812 1062 }
bed6ce83 1063
1064 # If we are asked to, add relationship links
1065 if( exists $opts->{show_relations} ) {
1066 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1067 if( $filter eq 'transposition' ) {
1068 $filter =~ qr/^transposition$/;
1069 }
9e54e11b 1070 my %typecolors;
1071 my @types = sort( map { $_->name } $self->relations->types );
1072 if( exists $opts->{graphcolors} ) {
1073 foreach my $tdx ( 0 .. $#types ) {
1074 $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1075 }
1076 } else {
1077 map { $typecolors{$_} = '#FFA14F' } @types;
1078 }
bed6ce83 1079 foreach my $redge ( $self->relationships ) {
1080 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
9e54e11b 1081 my $rel = $self->get_relationship( $redge );
1082 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1083 my $variables = {
1084 arrowhead => 'none',
1085 color => $typecolors{$rel->type},
1086 constraint => 'false',
1087 penwidth => '3',
1088 };
1089 unless( exists $opts->{graphcolors} ) {
1090 $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
bed6ce83 1091 }
9e54e11b 1092 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1093 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
bed6ce83 1094 }
1095 }
1096 }
1097
3bdec618 1098 # Add substitute start and end edges if necessary
1099 foreach my $node ( keys %substart ) {
57560672 1100 my $witstr = $self->_path_display_label( $opts,
1101 $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 1102 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 1103 my $nrdg = $self->reading( $node );
1104 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1105 # Substart is actually one lower than $startrank
1106 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1107 }
f13b5582 1108 my $varopts = _dot_attr_string( $variables );
96ba0418 1109 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 1110 }
1111 foreach my $node ( keys %subend ) {
57560672 1112 my $witstr = $self->_path_display_label( $opts,
1113 $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 1114 my $variables = { %edge_attrs, 'label' => $witstr };
1115 my $varopts = _dot_attr_string( $variables );
96ba0418 1116 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 1117 }
b365fbae 1118 # HACK part 2
1119 if( $STRAIGHTENHACK ) {
43c94341 1120 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1121 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 1122 }
30ddc24c 1123
df6d9812 1124 $dot .= "}\n";
1125 return $dot;
1126}
1127
f13b5582 1128sub _dot_attr_string {
1129 my( $hash ) = @_;
1130 my @attrs;
1131 foreach my $k ( sort keys %$hash ) {
1132 my $v = $hash->{$k};
1133 push( @attrs, $k.'="'.$v.'"' );
1134 }
1135 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1136}
1137
30ddc24c 1138sub _add_edge_weights {
1139 my $self = shift;
1140 # Walk the graph from START to END, choosing the successor node with
1141 # the largest number of witness paths each time.
1142 my $weighted = {};
1143 my $curr = $self->start->id;
008fc8a6 1144 my $ranked = $self->end->has_rank;
30ddc24c 1145 while( $curr ne $self->end->id ) {
008fc8a6 1146 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 1147 my @succ = sort { $self->path_witnesses( $curr, $a )
1148 <=> $self->path_witnesses( $curr, $b ) }
1149 $self->sequence->successors( $curr );
1150 my $next = pop @succ;
008fc8a6 1151 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 1152 # Try to avoid lacunae in the weighted path.
008fc8a6 1153 while( @succ &&
1154 ( $self->reading( $next )->is_lacuna ||
1155 $nextrank - $rank > 1 ) ){
30ddc24c 1156 $next = pop @succ;
1157 }
1158 $weighted->{$curr} = $next;
1159 $curr = $next;
1160 }
1161 return $weighted;
1162}
1163
027d819c 1164=head2 path_witnesses( $edge )
1165
1166Returns the list of sigils whose witnesses are associated with the given edge.
1167The edge can be passed as either an array or an arrayref of ( $source, $target ).
1168
1169=cut
1170
3a2ebbf4 1171sub path_witnesses {
1172 my( $self, @edge ) = @_;
1173 # If edge is an arrayref, cope.
1174 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1175 my $e = shift @edge;
1176 @edge = @$e;
1177 }
1178 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 1179 return @wits;
3a2ebbf4 1180}
1181
7f9f05e8 1182# Helper function. Make a display label for the given witnesses, showing a.c.
1183# witnesses only where the main witness is not also in the list.
027d819c 1184sub _path_display_label {
508fd430 1185 my $self = shift;
57560672 1186 my $opts = shift;
7f9f05e8 1187 my %wits;
1188 map { $wits{$_} = 1 } @_;
1189
1190 # If an a.c. wit is listed, remove it if the main wit is also listed.
1191 # Otherwise keep it for explicit listing.
1192 my $aclabel = $self->ac_label;
1193 my @disp_ac;
1194 foreach my $w ( sort keys %wits ) {
1195 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1196 if( exists $wits{$1} ) {
1197 delete $wits{$w};
1198 } else {
1199 push( @disp_ac, $w );
1200 }
1201 }
1202 }
1203
57560672 1204 if( $opts->{'explicit_wits'} ) {
7f9f05e8 1205 return join( ', ', sort keys %wits );
57560672 1206 } else {
1207 # See if we are in a majority situation.
1208 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1209 $maj = $maj > 5 ? $maj : 5;
1210 if( scalar keys %wits > $maj ) {
1211 unshift( @disp_ac, 'majority' );
1212 return join( ', ', @disp_ac );
1213 } else {
1214 return join( ', ', sort keys %wits );
1215 }
8f9cab7b 1216 }
1217}
1dd07bda 1218
bf6e338d 1219=head2 readings_at_rank( $rank )
1dd07bda 1220
bf6e338d 1221Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 1222
1223=cut
1224
bf6e338d 1225sub readings_at_rank {
1dd07bda 1226 my( $self, $rank ) = @_;
bf6e338d 1227 my $table = $self->alignment_table;
1228 # Table rank is real rank - 1.
1229 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1230 my %readings;
1231 foreach my $e ( @elements ) {
1232 next unless ref( $e ) eq 'HASH';
1233 next unless exists $e->{'t'};
1234 $readings{$e->{'t'}->id} = $e->{'t'};
1235 }
1236 return values %readings;
1dd07bda 1237}
8f9cab7b 1238
4e5a7b2c 1239=head2 as_graphml
8e1394aa 1240
4e5a7b2c 1241Returns a GraphML representation of the collation. The GraphML will contain
1242two graphs. The first expresses the attributes of the readings and the witness
1243paths that link them; the second expresses the relationships that link the
1244readings. This is the native transfer format for a tradition.
8e1394aa 1245
56eefa04 1246=begin testing
1247
1248use Text::Tradition;
951ddfe8 1249use TryCatch;
56eefa04 1250
1251my $READINGS = 311;
1252my $PATHS = 361;
1253
1254my $datafile = 't/data/florilegium_tei_ps.xml';
1255my $tradition = Text::Tradition->new( 'input' => 'TEI',
1256 'name' => 'test0',
1257 'file' => $datafile,
1258 'linear' => 1 );
1259
1260ok( $tradition, "Got a tradition object" );
1261is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1262ok( $tradition->collation, "Tradition has a collation" );
1263
1264my $c = $tradition->collation;
1265is( scalar $c->readings, $READINGS, "Collation has all readings" );
1266is( scalar $c->paths, $PATHS, "Collation has all paths" );
1267is( scalar $c->relationships, 0, "Collation has all relationships" );
1268
1269# Add a few relationships
1270$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1271$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
b71e7ea8 1272$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1273 'is_significant' => 'yes' } );
56eefa04 1274
1275# Now write it to GraphML and parse it again.
1276
1277my $graphml = $c->as_graphml;
1278my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1279is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1280is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1281is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
b71e7ea8 1282my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1283is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
56eefa04 1284
9fef629b 1285# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 1286SKIP: {
37bf09f4 1287 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 1288 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1289 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1290 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1291 $graphml = $c->as_graphml;
1292 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1293}
2a812726 1294
56eefa04 1295=end testing
1296
8e1394aa 1297=cut
1298
a445ce40 1299## TODO MOVE this to Tradition.pm and modularize it better
8e1394aa 1300sub as_graphml {
a30ca502 1301 my( $self, $options ) = @_;
3d14b48e 1302 $self->calculate_ranks unless $self->_graphcalc_done;
1303
a30ca502 1304 my $start = $options->{'from'}
1305 ? $self->reading( $options->{'from'} ) : $self->start;
1306 my $end = $options->{'to'}
1307 ? $self->reading( $options->{'to'} ) : $self->end;
1308 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1309 throw( 'Start node must be before end node' );
1310 }
1311 # The readings need to be ranked for this to work.
1312 $start = $self->start unless $start->has_rank;
1313 $end = $self->end unless $end->has_rank;
414cc046 1314 my $rankoffset = 0;
1315 unless( $start eq $self->start ) {
1316 $rankoffset = $start->rank - 1;
1317 }
a30ca502 1318 my %use_readings;
1319
8e1394aa 1320 # Some namespaces
1321 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1322 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1323 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 1324 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 1325
1326 # Create the document and root node
428bcf0b 1327 require XML::LibXML;
8e1394aa 1328 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1329 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1330 $graphml->setDocumentElement( $root );
1331 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1332 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 1333
1334 # List of attribute types to save on our objects and their corresponding
1335 # GraphML types
1336 my %save_types = (
1337 'Str' => 'string',
1338 'Int' => 'int',
1339 'Bool' => 'boolean',
10e4b1ac 1340 'ReadingID' => 'string',
bbd064a9 1341 'RelationshipType' => 'string',
1342 'RelationshipScope' => 'string',
b71e7ea8 1343 'Ternary' => 'string',
bbd064a9 1344 );
1345
bbd064a9 1346 # Add the data keys for the graph. Include an extra key 'version' for the
1347 # GraphML output version.
e309421a 1348 my %graph_data_keys;
1349 my $gdi = 0;
bbd064a9 1350 my %graph_attributes = ( 'version' => 'string' );
1351 # Graph attributes include those of Tradition and those of Collation.
1352 my %gattr_from;
f97ef19e 1353 # TODO Use meta introspection method from duplicate_reading to do this
1354 # instead of naming custom keys.
bbd064a9 1355 my $tmeta = $self->tradition->meta;
1356 my $cmeta = $self->meta;
1357 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1358 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1359 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1360 next if $attr->name =~ /^_/;
bbd064a9 1361 next unless $save_types{$attr->type_constraint->name};
1362 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1363 }
9fef629b 1364 # Extra custom keys for complex objects that should be saved in some form.
1365 # The subroutine should return a string, or undef/empty.
951ddfe8 1366 if( $tmeta->has_method('stemmata') ) {
1367 $graph_attributes{'stemmata'} = sub {
1368 my @stemstrs;
1369 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1370 $self->tradition->stemmata;
1371 join( "\n", @stemstrs );
1372 };
1373 }
1374
8943ff68 1375 if( $tmeta->has_method('user') ) {
1376 $graph_attributes{'user'} = sub {
1377 $self->tradition->user ? $self->tradition->user->id : undef
1378 };
1379 }
bbd064a9 1380
1381 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1382 $graph_data_keys{$datum} = 'dg'.$gdi++;
1383 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1384 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1385 : $graph_attributes{$datum};
e309421a 1386 $key->setAttribute( 'attr.name', $datum );
9fef629b 1387 $key->setAttribute( 'attr.type', $dtype );
e309421a 1388 $key->setAttribute( 'for', 'graph' );
1389 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1390 }
f6066bac 1391
bbd064a9 1392 # Add the data keys for reading nodes
1393 my %reading_attributes;
1394 my $rmeta = Text::Tradition::Collation::Reading->meta;
1395 foreach my $attr( $rmeta->get_all_attributes ) {
1396 next if $attr->name =~ /^_/;
bbd064a9 1397 next unless $save_types{$attr->type_constraint->name};
1398 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1399 }
a445ce40 1400 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1401 # Extra custom key for the reading morphology
1402 $reading_attributes{'lexemes'} = 'string';
1403 }
7cd9f181 1404
ef9d481f 1405 my %node_data_keys;
1406 my $ndi = 0;
bbd064a9 1407 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1408 $node_data_keys{$datum} = 'dn'.$ndi++;
1409 my $key = $root->addNewChild( $graphml_ns, 'key' );
1410 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1411 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1412 $key->setAttribute( 'for', 'node' );
1413 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1414 }
1415
bbd064a9 1416 # Add the data keys for edges, that is, paths and relationships. Path
1417 # data does not come from a Moose class so is here manually.
ef9d481f 1418 my $edi = 0;
1419 my %edge_data_keys;
bbd064a9 1420 my %edge_attributes = (
3a2ebbf4 1421 witness => 'string', # ID/label for a path
3a2ebbf4 1422 extra => 'boolean', # Path key
3a2ebbf4 1423 );
bbd064a9 1424 my @path_attributes = keys %edge_attributes; # track our manual additions
1425 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1426 foreach my $attr( $pmeta->get_all_attributes ) {
1427 next if $attr->name =~ /^_/;
bbd064a9 1428 next unless $save_types{$attr->type_constraint->name};
1429 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1430 }
1431 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1432 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1433 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1434 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1435 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1436 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1437 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1438 }
3a2ebbf4 1439
cc31ebaa 1440 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1441 my $xmlidname = $self->tradition->name;
1442 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1443 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1444 $xmlidname = '_'.$xmlidname;
1445 }
2c669bca 1446 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1447 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1448 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1449 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1450 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1451 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1452 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1453 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1454
2a812726 1455 # Tradition/collation attribute data
bbd064a9 1456 foreach my $datum ( keys %graph_attributes ) {
1457 my $value;
1458 if( $datum eq 'version' ) {
2a812726 1459 $value = '3.2';
9fef629b 1460 } elsif( ref( $graph_attributes{$datum} ) ) {
1461 my $sub = $graph_attributes{$datum};
1462 $value = &$sub();
bbd064a9 1463 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1464 $value = $self->tradition->$datum;
1465 } else {
1466 $value = $self->$datum;
1467 }
2c669bca 1468 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1469 }
8e1394aa 1470
1471 my $node_ctr = 0;
1472 my %node_hash;
22222af9 1473 # Add our readings to the graph
3a2ebbf4 1474 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1475 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1476 ( $n->rank < $start->rank || $n->rank > $end->rank );
1477 $use_readings{$n->id} = 1;
2c669bca 1478 # Add to the main graph
1479 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1480 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1481 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1482 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1483 foreach my $d ( keys %reading_attributes ) {
255875b8 1484 my $nval = $n->$d;
7cd9f181 1485 # Custom serialization
1486 if( $d eq 'lexemes' ) {
1487 # If nval is a true value, we have lexemes so we need to
1488 # serialize them. Otherwise set nval to undef so that the
1489 # key is excluded from this reading.
1490 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1491 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1492 $nval = undef;
7cd9f181 1493 }
cc31ebaa 1494 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1495 # Adjust the ranks within the subgraph.
cc31ebaa 1496 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1497 : $nval - $rankoffset;
414cc046 1498 }
255875b8 1499 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1500 if defined $nval;
1501 }
b15511bf 1502 }
1503
2c669bca 1504 # Add the path edges to the sequence graph
df6d9812 1505 my $edge_ctr = 0;
3a2ebbf4 1506 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1507 # We add an edge in the graphml for every witness in $e.
a30ca502 1508 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1509 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1510 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1511 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1512 # Skip any path from start to end; that witness is not in the subgraph.
1513 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1514 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1515 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1516 $node_hash{ $e->[0] },
1517 $node_hash{ $e->[1] } );
2c669bca 1518 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1519 $edge_el->setAttribute( 'source', $from );
1520 $edge_el->setAttribute( 'target', $to );
1521 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1522
1523 # It's a witness path, so add the witness
1524 my $base = $wit;
1525 my $key = $edge_data_keys{'witness'};
1526 # Is this an ante-corr witness?
1527 my $aclabel = $self->ac_label;
1528 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1529 # Keep the base witness
1530 $base = $1;
1531 # ...and record that this is an 'extra' reading path
1532 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1533 }
1534 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1535 }
1536 }
1537
cc31ebaa 1538 # Report the actual number of nodes and edges that went in
1539 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1540 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1541
22222af9 1542 # Add the relationship graph to the XML
bbd064a9 1543 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1544 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1545 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1546
94c00c71 1547 # Save and return the thing
1548 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1549 return $result;
df6d9812 1550}
1551
b15511bf 1552sub _add_graphml_data {
1553 my( $el, $key, $value ) = @_;
b15511bf 1554 return unless defined $value;
c9bf3dbf 1555 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1556 $data_el->setAttribute( 'key', $key );
1557 $data_el->appendText( $value );
8e1394aa 1558}
1559
4e5a7b2c 1560=head2 as_csv
910a0a6d 1561
1562Returns a CSV alignment table representation of the collation graph, one
2c669bca 1563row per witness (or witness uncorrected.)
910a0a6d 1564
ce5966fb 1565=head2 as_tsv
1566
1567Returns a tab-separated alignment table representation of the collation graph,
1568one row per witness (or witness uncorrected.)
1569
16203db5 1570=begin testing
1571
1572use Text::Tradition;
34ca808b 1573use Text::CSV;
16203db5 1574
1575my $READINGS = 311;
1576my $PATHS = 361;
34ca808b 1577my $WITS = 13;
1578my $WITAC = 4;
16203db5 1579
1580my $datafile = 't/data/florilegium_tei_ps.xml';
1581my $tradition = Text::Tradition->new( 'input' => 'TEI',
1582 'name' => 'test0',
1583 'file' => $datafile,
1584 'linear' => 1 );
1585
1586my $c = $tradition->collation;
1587# Export the thing to CSV
1588my $csvstr = $c->as_csv();
34ca808b 1589# Count the columns
1590my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1591my @lines = split(/\n/, $csvstr );
1592ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1593is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
cbc8e08f 1594my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1595ok( @q_ac, "Found a layered witness" );
1596
16203db5 1597my $t2 = Text::Tradition->new( input => 'Tabular',
1598 name => 'test2',
1599 string => $csvstr,
1600 sep_char => ',' );
1601is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1602is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1603
1604# Now do it with TSV
1605my $tsvstr = $c->as_tsv();
1606my $t3 = Text::Tradition->new( input => 'Tabular',
1607 name => 'test3',
1608 string => $tsvstr,
1609 sep_char => "\t" );
1610is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1611is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1612
4e64b669 1613my $table = $c->alignment_table;
34ca808b 1614my $noaccsv = $c->as_csv({ noac => 1 });
1615my @noaclines = split(/\n/, $noaccsv );
1616ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1617is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
4e64b669 1618is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
34ca808b 1619
cbc8e08f 1620my $safecsv = $c->as_csv({ safe_ac => 1});
1621my @safelines = split(/\n/, $safecsv );
1622ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1623is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1624@q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1625ok( @q_ac, "Found a sanitized layered witness" );
1626is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
16203db5 1627
1628=end testing
1629
910a0a6d 1630=cut
1631
ce5966fb 1632sub _tabular {
34ca808b 1633 my( $self, $opts ) = @_;
1634 my $table = $self->alignment_table( $opts );
16203db5 1635 my $csv_options = { binary => 1, quote_null => 0 };
34ca808b 1636 $csv_options->{'sep_char'} = $opts->{fieldsep};
1637 if( $opts->{fieldsep} eq "\t" ) {
16203db5 1638 # If it is really tab separated, nothing is an escape char.
1639 $csv_options->{'quote_char'} = undef;
1640 $csv_options->{'escape_char'} = '';
1641 }
1642 my $csv = Text::CSV->new( $csv_options );
910a0a6d 1643 my @result;
2c669bca 1644 # Make the header row
1645 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
43800a64 1646 push( @result, $csv->string );
2c669bca 1647 # Make the rest of the rows
1648 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1649 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1650 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1651 $csv->combine( @row );
43800a64 1652 push( @result, $csv->string );
910a0a6d 1653 }
3a2ebbf4 1654 return join( "\n", @result );
910a0a6d 1655}
1656
ce5966fb 1657sub as_csv {
1658 my $self = shift;
34ca808b 1659 my $opts = shift || {};
1660 $opts->{fieldsep} = ',';
1661 return $self->_tabular( $opts );
ce5966fb 1662}
1663
1664sub as_tsv {
1665 my $self = shift;
34ca808b 1666 my $opts = shift || {};
1667 $opts->{fieldsep} = "\t";
1668 return $self->_tabular( $opts );
ce5966fb 1669}
1670
248276a2 1671=head2 alignment_table
2c669bca 1672
566f4595 1673Return a reference to an alignment table, in a slightly enhanced CollateX
1674format which looks like this:
1675
1676 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1677 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1678 { witness => "SIG2",
4e5a7b2c 1679 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1680 ... ],
1681 length => TEXTLEN };
1682
2c669bca 1683=cut
9f3ba6f7 1684
1dd07bda 1685sub alignment_table {
34ca808b 1686 my( $self, $opts ) = @_;
4e64b669 1687 if( $self->has_cached_table ) {
cbc8e08f 1688 return $self->cached_table
1689 unless $opts->{noac} || $opts->{safe_ac};
4e64b669 1690 }
1dd07bda 1691
0ecb975c 1692 # Make sure we can do this
1693 throw( "Need a linear graph in order to make an alignment table" )
1694 unless $self->linear;
b39e7cb5 1695 $self->calculate_ranks()
1696 unless $self->_graphcalc_done && $self->end->has_rank;
1697
2c669bca 1698 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1699 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1700 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1701 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1702 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1703 my @row = _make_witness_row( \@wit_path, \@all_pos );
bed6ce83 1704 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1705 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1706 push( @{$table->{'alignment'}}, $witobj );
34ca808b 1707 if( $wit->is_layered && !$opts->{noac} ) {
1f7aa795 1708 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1709 $wit->sigil.$self->ac_label );
1dd07bda 1710 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
cbc8e08f 1711 my $witlabel = $opts->{safe_ac}
1712 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1713 my $witacobj = { 'witness' => $witlabel,
bed6ce83 1714 'tokens' => \@ac_row };
1715 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1716 push( @{$table->{'alignment'}}, $witacobj );
910a0a6d 1717 }
1718 }
cbc8e08f 1719 unless( $opts->{noac} || $opts->{safe_ac} ) {
4e64b669 1720 $self->cached_table( $table );
1721 }
1dd07bda 1722 return $table;
910a0a6d 1723}
1724
1725sub _make_witness_row {
1dd07bda 1726 my( $path, $positions ) = @_;
910a0a6d 1727 my %char_hash;
1728 map { $char_hash{$_} = undef } @$positions;
2c669bca 1729 my $debug = 0;
910a0a6d 1730 foreach my $rdg ( @$path ) {
6771a1b1 1731 say STDERR "rank " . $rdg->rank if $debug;
1732 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1733 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1734 }
1735 my @row = map { $char_hash{$_} } @$positions;
eca16057 1736 # Fill in lacuna markers for undef spots in the row
1737 my $last_el = shift @row;
1738 my @filled_row = ( $last_el );
1739 foreach my $el ( @row ) {
0e476982 1740 # If we are using node reference, make the lacuna node appear many times
1741 # in the table. If not, use the lacuna tag.
1dd07bda 1742 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1743 $el = $last_el;
eca16057 1744 }
1745 push( @filled_row, $el );
1746 $last_el = $el;
1747 }
1748 return @filled_row;
910a0a6d 1749}
1750
248276a2 1751
4e5a7b2c 1752=head1 NAVIGATION METHODS
910a0a6d 1753
4e5a7b2c 1754=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1755
1756Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1757with $last, for the witness given in $sigil. If a $backup sigil is
1758specified (e.g. when walking a layered witness), it will be used wherever
1759no $sigil path exists. If there is a base text reading, that will be
1760used wherever no path exists for $sigil or $backup.
e2902068 1761
1762=cut
1763
910a0a6d 1764# TODO Think about returning some lazy-eval iterator.
b0b4421a 1765# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1766
e2902068 1767sub reading_sequence {
861c3e27 1768 my( $self, $start, $end, $witness ) = @_;
e2902068 1769
930ff666 1770 $witness = $self->baselabel unless $witness;
e2902068 1771 my @readings = ( $start );
1772 my %seen;
1773 my $n = $start;
3a2ebbf4 1774 while( $n && $n->id ne $end->id ) {
1775 if( exists( $seen{$n->id} ) ) {
63778331 1776 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1777 }
3a2ebbf4 1778 $seen{$n->id} = 1;
910a0a6d 1779
861c3e27 1780 my $next = $self->next_reading( $n, $witness );
44771cf2 1781 unless( $next ) {
63778331 1782 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1783 }
910a0a6d 1784 push( @readings, $next );
1785 $n = $next;
e2902068 1786 }
1787 # Check that the last reading is our end reading.
1788 my $last = $readings[$#readings];
63778331 1789 throw( "Last reading found from " . $start->text .
1790 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1791 unless $last->id eq $end->id;
e2902068 1792
1793 return @readings;
1794}
1795
4e5a7b2c 1796=head2 next_reading( $reading, $sigil );
8e1394aa 1797
4a8828f0 1798Returns the reading that follows the given reading along the given witness
930ff666 1799path.
8e1394aa 1800
1801=cut
1802
4a8828f0 1803sub next_reading {
e2902068 1804 # Return the successor via the corresponding path.
8e1394aa 1805 my $self = shift;
3a2ebbf4 1806 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1807 return undef unless $answer;
3a2ebbf4 1808 return $self->reading( $answer );
8e1394aa 1809}
1810
4e5a7b2c 1811=head2 prior_reading( $reading, $sigil )
8e1394aa 1812
4a8828f0 1813Returns the reading that precedes the given reading along the given witness
930ff666 1814path.
8e1394aa 1815
1816=cut
1817
4a8828f0 1818sub prior_reading {
e2902068 1819 # Return the predecessor via the corresponding path.
8e1394aa 1820 my $self = shift;
3a2ebbf4 1821 my $answer = $self->_find_linked_reading( 'prior', @_ );
1822 return $self->reading( $answer );
8e1394aa 1823}
1824
4a8828f0 1825sub _find_linked_reading {
861c3e27 1826 my( $self, $direction, $node, $path ) = @_;
1827
1828 # Get a backup if we are dealing with a layered witness
1829 my $alt_path;
1830 my $aclabel = $self->ac_label;
1831 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1832 $alt_path = $1;
1833 }
1834
e2902068 1835 my @linked_paths = $direction eq 'next'
3a2ebbf4 1836 ? $self->sequence->edges_from( $node )
1837 : $self->sequence->edges_to( $node );
e2902068 1838 return undef unless scalar( @linked_paths );
8e1394aa 1839
e2902068 1840 # We have to find the linked path that contains all of the
1841 # witnesses supplied in $path.
1842 my( @path_wits, @alt_path_wits );
4e5a7b2c 1843 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1844 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1845 my $base_le;
1846 my $alt_le;
1847 foreach my $le ( @linked_paths ) {
3a2ebbf4 1848 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1849 $base_le = $le;
910a0a6d 1850 }
508fd430 1851 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1852 if( _is_within( \@path_wits, \@le_wits ) ) {
1853 # This is the right path.
1854 return $direction eq 'next' ? $le->[1] : $le->[0];
1855 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1856 $alt_le = $le;
1857 }
8e1394aa 1858 }
e2902068 1859 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1860 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1861 if $alt_le;
e2902068 1862
1863 # Got this far? Return the base path if it exists.
3a2ebbf4 1864 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1865 if $base_le;
e2902068 1866
1867 # Got this far? We have no appropriate path.
2c669bca 1868 warn "Could not find $direction node from " . $node->id
910a0a6d 1869 . " along path $path";
8e1394aa 1870 return undef;
1871}
1872
4a8828f0 1873# Some set logic.
1874sub _is_within {
1875 my( $set1, $set2 ) = @_;
7854e12e 1876 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1877 foreach my $el ( @$set1 ) {
910a0a6d 1878 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1879 }
1880 return $ret;
1881}
1882
4e5a7b2c 1883# Return the string that joins together a list of witnesses for
1884# display on a single path.
1885sub _witnesses_of_label {
1886 my( $self, $label ) = @_;
1887 my $regex = $self->wit_list_separator;
1888 my @answer = split( /\Q$regex\E/, $label );
1889 return @answer;
b0b4421a 1890}
1891
d4b75f44 1892=head2 common_readings
1893
1894Returns the list of common readings in the graph (i.e. those readings that are
1895shared by all non-lacunose witnesses.)
1896
1897=cut
1898
1899sub common_readings {
1900 my $self = shift;
1901 my @common = grep { $_->is_common } $self->readings;
1902 return @common;
1903}
1904
fae52efd 1905=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1906
1907Returns the text of a witness (plus its backup, if we are using a layer)
1908as stored in the collation. The text is returned as a string, where the
1909individual readings are joined with spaces and the meta-readings (e.g.
1910lacunae) are omitted. Optional specification of $start and $end allows
1911the generation of a subset of the witness text.
4e5a7b2c 1912
b0b4421a 1913=cut
1914
1915sub path_text {
861c3e27 1916 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1917 $start = $self->start unless $start;
1918 $end = $self->end unless $end;
861c3e27 1919 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1920 my $pathtext = '';
1921 my $last;
1922 foreach my $r ( @path ) {
6ad2ce78 1923 unless ( $r->join_prior || !$last || $last->join_next ) {
1924 $pathtext .= ' ';
1925 }
1926 $pathtext .= $r->text;
629e27b0 1927 $last = $r;
1928 }
1929 return $pathtext;
b0b4421a 1930}
4e5a7b2c 1931
1932=head1 INITIALIZATION METHODS
1933
1934These are mostly for use by parsers.
1935
1936=head2 make_witness_path( $witness )
1937
1938Link the array of readings contained in $witness->path (and in
1939$witness->uncorrected_path if it exists) into collation paths.
1940Clear out the arrays when finished.
de51424a 1941
4e5a7b2c 1942=head2 make_witness_paths
1943
1944Call make_witness_path for all witnesses in the tradition.
1945
1946=cut
930ff666 1947
7e450e44 1948# For use when a collation is constructed from a base text and an apparatus.
1949# We have the sequences of readings and just need to add path edges.
1f7aa795 1950# When we are done, clear out the witness path attributes, as they are no
1951# longer needed.
1952# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1953
6a222840 1954sub make_witness_paths {
1955 my( $self ) = @_;
910a0a6d 1956 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 1957 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 1958 $self->make_witness_path( $wit );
7854e12e 1959 }
7854e12e 1960}
1961
6a222840 1962sub make_witness_path {
7854e12e 1963 my( $self, $wit ) = @_;
1964 my @chain = @{$wit->path};
15d2d3df 1965 my $sig = $wit->sigil;
fae52efd 1966 # Add start and end if necessary
1967 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1968 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1969 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1970 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1971 }
1f7aa795 1972 if( $wit->is_layered ) {
d9e873d0 1973 @chain = @{$wit->uncorrected_path};
fae52efd 1974 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1975 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1976 foreach my $idx( 0 .. $#chain-1 ) {
1977 my $source = $chain[$idx];
1978 my $target = $chain[$idx+1];
1979 $self->add_path( $source, $target, $sig.$self->ac_label )
1980 unless $self->has_path( $source, $target, $sig );
1981 }
15d2d3df 1982 }
1f7aa795 1983 $wit->clear_path;
1984 $wit->clear_uncorrected_path;
e2902068 1985}
1986
4e5a7b2c 1987=head2 calculate_ranks
1988
1989Calculate the reading ranks (that is, their aligned positions relative
1990to each other) for the graph. This can only be called on linear collations.
1991
b365fbae 1992=begin testing
1993
1994use Text::Tradition;
1995
1996my $cxfile = 't/data/Collatex-16.xml';
1997my $t = Text::Tradition->new(
1998 'name' => 'inline',
1999 'input' => 'CollateX',
2000 'file' => $cxfile,
2001 );
2002my $c = $t->collation;
2003
2004# Make an svg
bfcbcecb 2005my $table = $c->alignment_table;
2006ok( $c->has_cached_table, "Alignment table was cached" );
2007is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 2008$c->calculate_ranks;
bfcbcecb 2009is( $c->alignment_table, $table, "Cached table retained with no rank change" );
864ee4bf 2010$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2011is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2012$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2013isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
b365fbae 2014
2015=end testing
2016
4e5a7b2c 2017=cut
2018
910a0a6d 2019sub calculate_ranks {
2020 my $self = shift;
b365fbae 2021 # Save the existing ranks, in case we need to invalidate the cached SVG.
8d9494a8 2022 throw( "Cannot calculate ranks on a non-linear graph" )
2023 unless $self->linear;
b365fbae 2024 my %existing_ranks;
ac4d7ac2 2025 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 2026
2027 # Do the rankings based on the relationship equivalence graph, starting
2028 # with the start node.
56772e8c 2029 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2030
910a0a6d 2031 # Transfer our rankings from the topological graph to the real one.
2032 foreach my $r ( $self->readings ) {
cecbe56d 2033 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 2034 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 2035 } else {
63778331 2036 # Die. Find the last rank we calculated.
359944f7 2037 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2038 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 2039 $self->readings;
2040 my $last = pop @all_defined;
2041 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 2042 }
de51424a 2043 }
bfcbcecb 2044 # Do we need to invalidate the cached data?
be3af600 2045 if( $self->has_cached_table ) {
b365fbae 2046 foreach my $r ( $self->readings ) {
7c293912 2047 next if defined( $existing_ranks{$r} )
2048 && $existing_ranks{$r} == $r->rank;
c1915ab9 2049 # Something has changed, so clear the cache
bfcbcecb 2050 $self->_clear_cache;
c1915ab9 2051 # ...and recalculate the common readings.
2052 $self->calculate_common_readings();
b365fbae 2053 last;
2054 }
2055 }
c1915ab9 2056 # The graph calculation information is now up to date.
2057 $self->_graphcalc_done(1);
8e1394aa 2058}
3a1f2523 2059
c1915ab9 2060sub _clear_cache {
2061 my $self = shift;
c1915ab9 2062 $self->wipe_table if $self->has_cached_table;
2063}
2064
2065
4e5a7b2c 2066=head2 flatten_ranks
2067
2068A convenience method for parsing collation data. Searches the graph for readings
2069with the same text at the same rank, and merges any that are found.
2070
2071=cut
2072
0e476982 2073sub flatten_ranks {
4ef65ab4 2074 my ( $self, %args ) = shift;
0e476982 2075 my %unique_rank_rdg;
bf6e338d 2076 my $changed;
4ef65ab4 2077 foreach my $p ( $self->identical_readings( %args ) ) {
2078 # say STDERR "Combining readings at same rank: @$p";
2079 $changed = 1;
2080 $self->merge_readings( @$p );
2081 # TODO see if this now makes a common point.
7a0956c1 2082 }
2083 # If we merged readings, the ranks are still fine but the alignment
2084 # table is wrong. Wipe it.
2085 $self->wipe_table() if $changed;
2086}
2087
2088=head2 identical_readings
2089=head2 identical_readings( start => $startnode, end => $endnode )
2090=head2 identical_readings( startrank => $startrank, endrank => $endrank )
2091
2092Goes through the graph identifying all pairs of readings that appear to be
2093identical, and therefore able to be merged into a single reading. Returns the
2094relevant identical pairs. Can be restricted to run over only a part of the
2095graph, specified either by node or by rank.
2096
2097=cut
2098
2099sub identical_readings {
2100 my ( $self, %args ) = @_;
2101 # Find where we should start and end.
2102 my $startrank = $args{startrank} || 0;
2103 if( $args{start} ) {
2104 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2105 && $self->reading( $args{start} )->has_rank;
2106 $startrank = $self->reading( $args{start} )->rank;
2107 }
2108 my $endrank = $args{endrank} || $self->end->rank;
2109 if( $args{end} ) {
2110 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2111 && $self->reading( $args{end} )->has_rank;
3c234eb6 2112 $endrank = $self->reading( $args{end} )->rank;
7a0956c1 2113 }
2114
2115 # Make sure the ranks are correct.
2116 unless( $self->_graphcalc_done ) {
2117 $self->calculate_ranks;
2118 }
2119 # Go through the readings looking for duplicates.
2120 my %unique_rank_rdg;
2121 my @pairs;
0e476982 2122 foreach my $rdg ( $self->readings ) {
2123 next unless $rdg->has_rank;
7a0956c1 2124 my $rk = $rdg->rank;
2125 next if $rk > $endrank || $rk < $startrank;
2126 my $key = $rk . "||" . $rdg->text;
0e476982 2127 if( exists $unique_rank_rdg{$key} ) {
07e6765f 2128 # Make sure they don't have different grammatical forms
2129 my $ur = $unique_rank_rdg{$key};
a445ce40 2130 if( $rdg->is_identical( $ur ) ) {
7a0956c1 2131 push( @pairs, [ $ur, $rdg ] );
07e6765f 2132 }
0e476982 2133 } else {
2134 $unique_rank_rdg{$key} = $rdg;
2135 }
7a0956c1 2136 }
2137
2138 return @pairs;
0e476982 2139}
4633f9e4 2140
2141
d4b75f44 2142=head2 calculate_common_readings
2143
2144Goes through the graph identifying the readings that appear in every witness
2145(apart from those with lacunae at that spot.) Marks them as common and returns
2146the list.
2147
2148=begin testing
2149
2150use Text::Tradition;
2151
2152my $cxfile = 't/data/Collatex-16.xml';
2153my $t = Text::Tradition->new(
2154 'name' => 'inline',
2155 'input' => 'CollateX',
2156 'file' => $cxfile,
2157 );
2158my $c = $t->collation;
2159
2160my @common = $c->calculate_common_readings();
2161is( scalar @common, 8, "Found correct number of common readings" );
2162my @marked = sort $c->common_readings();
2163is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 2164my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 2165is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2166
2167=end testing
2168
2169=cut
2170
2171sub calculate_common_readings {
2172 my $self = shift;
2173 my @common;
c1915ab9 2174 map { $_->is_common( 0 ) } $self->readings;
2175 # Implicitly calls calculate_ranks
1dd07bda 2176 my $table = $self->alignment_table;
d4b75f44 2177 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 2178 my @row = map { $_->{'tokens'}->[$idx]
2179 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2180 @{$table->{'alignment'}};
d4b75f44 2181 my %hash;
2182 foreach my $r ( @row ) {
2183 if( $r ) {
2184 $hash{$r->id} = $r unless $r->is_meta;
2185 } else {
2186 $hash{'UNDEF'} = $r;
2187 }
2188 }
2189 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2190 my( $r ) = values %hash;
2191 $r->is_common( 1 );
2192 push( @common, $r );
2193 }
2194 }
2195 return @common;
2196}
2197
861c3e27 2198=head2 text_from_paths
2199
2200Calculate the text array for all witnesses from the path, for later consistency
2201checking. Only to be used if there is no non-graph-based way to know the
2202original texts.
2203
2204=cut
2205
2206sub text_from_paths {
2207 my $self = shift;
2208 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 2209 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2210 my @text;
2211 foreach my $r ( @readings ) {
2212 next if $r->is_meta;
2213 push( @text, $r->text );
2214 }
861c3e27 2215 $wit->text( \@text );
2216 if( $wit->is_layered ) {
5164a6f0 2217 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2218 $wit->sigil.$self->ac_label );
2219 my @uctext;
2220 foreach my $r ( @ucrdgs ) {
2221 next if $r->is_meta;
2222 push( @uctext, $r->text );
2223 }
2224 $wit->layertext( \@uctext );
861c3e27 2225 }
2226 }
2227}
0e476982 2228
4e5a7b2c 2229=head1 UTILITY FUNCTIONS
2230
2231=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 2232
4e5a7b2c 2233Find the last reading that occurs in sequence before both the given readings.
414cc046 2234At the very least this should be $self->start.
4e5a7b2c 2235
2236=head2 common_successor( $reading_a, $reading_b )
2237
2238Find the first reading that occurs in sequence after both the given readings.
414cc046 2239At the very least this should be $self->end.
4e5a7b2c 2240
22222af9 2241=begin testing
2242
2243use Text::Tradition;
2244
2245my $cxfile = 't/data/Collatex-16.xml';
2246my $t = Text::Tradition->new(
2247 'name' => 'inline',
2248 'input' => 'CollateX',
2249 'file' => $cxfile,
2250 );
2251my $c = $t->collation;
2252
679f17e1 2253is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 2254 'n20', "Found correct common predecessor" );
679f17e1 2255is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 2256 '__END__', "Found correct common successor" );
22222af9 2257
4e5a7b2c 2258is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 2259 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 2260is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 2261 '__END__', "Found correct common successor for readings on same path" );
22222af9 2262
2263=end testing
2264
2265=cut
2266
2267## Return the closest reading that is a predecessor of both the given readings.
2268sub common_predecessor {
2269 my $self = shift;
4e5a7b2c 2270 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 2271 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 2272}
2273
2274sub common_successor {
2275 my $self = shift;
4e5a7b2c 2276 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 2277 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 2278}
2279
414cc046 2280
2281# TODO think about how to do this without ranks...
027d819c 2282sub _common_in_path {
22222af9 2283 my( $self, $r1, $r2, $dir ) = @_;
414cc046 2284 my $iter = $self->end->rank;
22222af9 2285 my @candidates;
414cc046 2286 my @last_r1 = ( $r1 );
2287 my @last_r2 = ( $r2 );
2288 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 2289 my %all_seen;
6771a1b1 2290 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 2291 while( !@candidates ) {
414cc046 2292 last unless $iter--; # Avoid looping infinitely
2293 # Iterate separately down the graph from r1 and r2
2294 my( @new_lc1, @new_lc2 );
2295 foreach my $lc ( @last_r1 ) {
2296 foreach my $p ( $lc->$dir ) {
2297 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 2298 # say STDERR "Path candidate $p from $lc";
414cc046 2299 push( @candidates, $p );
002e3600 2300 } elsif( !$all_seen{$p->id} ) {
414cc046 2301 $all_seen{$p->id} = 'r1';
2302 push( @new_lc1, $p );
2303 }
2304 }
2305 }
2306 foreach my $lc ( @last_r2 ) {
22222af9 2307 foreach my $p ( $lc->$dir ) {
414cc046 2308 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 2309 # say STDERR "Path candidate $p from $lc";
22222af9 2310 push( @candidates, $p );
002e3600 2311 } elsif( !$all_seen{$p->id} ) {
414cc046 2312 $all_seen{$p->id} = 'r2';
2313 push( @new_lc2, $p );
22222af9 2314 }
2315 }
2316 }
414cc046 2317 @last_r1 = @new_lc1;
2318 @last_r2 = @new_lc2;
22222af9 2319 }
2320 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2321 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2322}
2323
63778331 2324sub throw {
2325 Text::Tradition::Error->throw(
2326 'ident' => 'Collation error',
2327 'message' => $_[0],
2328 );
2329}
2330
dd3b58b0 2331no Moose;
2332__PACKAGE__->meta->make_immutable;
e867486f 2333
a445ce40 2334=head1 BUGS/TODO
2335
2336=over
2337
2338=item * Rework XML serialization in a more modular way
2339
2340=back
2341
027d819c 2342=head1 LICENSE
e867486f 2343
027d819c 2344This package is free software and is provided "as is" without express
2345or implied warranty. You can redistribute it and/or modify it under
2346the same terms as Perl itself.
e867486f 2347
027d819c 2348=head1 AUTHOR
e867486f 2349
027d819c 2350Tara L Andrews E<lt>aurum@cpan.orgE<gt>