fix bugs in adjacency list generation; add tests
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
522c1314 9use JSON qw/ to_json /;
82fa4d57 10use Text::CSV;
5c0072ef 11use Text::Tradition::Collation::Data;
b15511bf 12use Text::Tradition::Collation::Reading;
22222af9 13use Text::Tradition::Collation::RelationshipStore;
63778331 14use Text::Tradition::Error;
cc31ebaa 15use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
5c0072ef 16use XML::LibXML;
17use XML::LibXML::XPathContext;
dd3b58b0 18use Moose;
19
5c0072ef 20has _data => (
21 isa => 'Text::Tradition::Collation::Data',
22 is => 'ro',
23 required => 1,
24 handles => [ qw(
25 sequence
26 paths
27 _set_relations
28 relations
29 _set_start
30 _set_end
31 ac_label
32 has_cached_table
33 relationships
0bbea47d 34 relationship_types
5c0072ef 35 related_readings
36 get_relationship
37 del_relationship
38 equivalence
39 equivalence_graph
40 readings
41 reading
42 _add_reading
43 del_reading
44 has_reading
45 wit_list_separator
46 baselabel
47 linear
48 wordsep
49 start
50 end
51 cached_table
52 _graphcalc_done
53 has_cached_svg
54 wipe_table
55 )]
56);
dd3b58b0 57
3a2ebbf4 58has 'tradition' => (
59 is => 'ro',
d047cd52 60 isa => 'Text::Tradition',
8cfd99c4 61 writer => '_set_tradition',
8d9a1cd8 62 weak_ref => 1,
d047cd52 63 );
dd3b58b0 64
6b6c7098 65=encoding utf8
66
4e5a7b2c 67=head1 NAME
68
69Text::Tradition::Collation - a software model for a text collation
70
71=head1 SYNOPSIS
72
73 use Text::Tradition;
74 my $t = Text::Tradition->new(
75 'name' => 'this is a text',
76 'input' => 'TEI',
77 'file' => '/path/to/tei_parallel_seg_file.xml' );
78
79 my $c = $t->collation;
80 my @readings = $c->readings;
81 my @paths = $c->paths;
82 my @relationships = $c->relationships;
83
84 my $svg_variant_graph = $t->collation->as_svg();
85
86=head1 DESCRIPTION
87
88Text::Tradition is a library for representation and analysis of collated
89texts, particularly medieval ones. The Collation is the central feature of
90a Tradition, where the text, its sequence of readings, and its relationships
91between readings are actually kept.
92
93=head1 CONSTRUCTOR
94
95=head2 new
96
97The constructor. Takes a hash or hashref of the following arguments:
98
99=over
100
101=item * tradition - The Text::Tradition object to which the collation
102belongs. Required.
103
104=item * linear - Whether the collation should be linear; that is, whether
105transposed readings should be treated as two linked readings rather than one,
106and therefore whether the collation graph is acyclic. Defaults to true.
107
4e5a7b2c 108=item * baselabel - The default label for the path taken by a base text
109(if any). Defaults to 'base text'.
110
111=item * wit_list_separator - The string to join a list of witnesses for
112purposes of making labels in display graphs. Defaults to ', '.
113
114=item * ac_label - The extra label to tack onto a witness sigil when
115representing another layer of path for the given witness - that is, when
116a text has more than one possible reading due to scribal corrections or
117the like. Defaults to ' (a.c.)'.
118
4e483aa5 119=item * wordsep - The string used to separate words in the original text.
120Defaults to ' '.
121
4e5a7b2c 122=back
123
124=head1 ACCESSORS
125
126=head2 tradition
127
128=head2 linear
129
4e5a7b2c 130=head2 wit_list_separator
131
132=head2 baselabel
133
134=head2 ac_label
135
4e483aa5 136=head2 wordsep
137
4e5a7b2c 138Simple accessors for collation attributes.
139
140=head2 start
141
142The meta-reading at the start of every witness path.
143
144=head2 end
145
146The meta-reading at the end of every witness path.
147
148=head2 readings
149
150Returns all Reading objects in the graph.
151
152=head2 reading( $id )
153
154Returns the Reading object corresponding to the given ID.
155
156=head2 add_reading( $reading_args )
157
158Adds a new reading object to the collation.
159See L<Text::Tradition::Collation::Reading> for the available arguments.
160
161=head2 del_reading( $object_or_id )
162
163Removes the given reading from the collation, implicitly removing its
164paths and relationships.
165
4e5a7b2c 166=head2 has_reading( $id )
167
168Predicate to see whether a given reading ID is in the graph.
169
170=head2 reading_witnesses( $object_or_id )
171
172Returns a list of sigils whose witnesses contain the reading.
173
174=head2 paths
175
176Returns all reading paths within the document - that is, all edges in the
177collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
178
179=head2 add_path( $source, $target, $sigil )
180
181Links the given readings in the collation in sequence, under the given witness
182sigil. The readings may be specified by object or ID.
183
184=head2 del_path( $source, $target, $sigil )
185
186Links the given readings in the collation in sequence, under the given witness
187sigil. The readings may be specified by object or ID.
188
189=head2 has_path( $source, $target );
190
191Returns true if the two readings are linked in sequence in any witness.
192The readings may be specified by object or ID.
193
194=head2 relationships
195
196Returns all Relationship objects in the collation.
197
198=head2 add_relationship( $reading, $other_reading, $options )
199
200Adds a new relationship of the type given in $options between the two readings,
201which may be specified by object or ID. Returns a value of ( $status, @vectors)
202where $status is true on success, and @vectors is a list of relationship edges
203that were ultimately added.
204See L<Text::Tradition::Collation::Relationship> for the available options.
205
206=cut
dd3b58b0 207
5c0072ef 208sub BUILDARGS {
209 my ( $class, @args ) = @_;
210 my %args = @args == 1 ? %{ $args[0] } : @args;
211 # TODO determine these from the Moose::Meta object
212 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
213 linear wordsep start end cached_table _graphcalc_done);
214 my %data_args;
215 for my $attr (@delegate_attrs) {
216 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
217 }
218 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
219 return \%args;
220}
221
d047cd52 222sub BUILD {
3a2ebbf4 223 my $self = shift;
22222af9 224 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 225 $self->_set_start( $self->add_reading(
226 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
227 $self->_set_end( $self->add_reading(
228 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 229}
784877d9 230
b34fd19d 231=head2 register_relationship_type( %relationship_definition )
232
233Add a relationship type definition to this collation. The argument can be either a
234hash or a hashref, defining the properties of the relationship. For relationship types
235and their properties, see L<Text::Tradition::Collation::RelationshipType>.
236
237=head2 get_relationship_type( $relationship_name )
238
239Retrieve the RelationshipType object for the relationship with the given name.
240
241=cut
242
24efa55d 243sub register_relationship_type {
244 my $self = shift;
245 my %args = @_ == 1 ? %{$_[0]} : @_;
246 if( $self->relations->has_type( $args{name} ) ) {
247 throw( 'Relationship type ' . $args{name} . ' already registered' );
248 }
249 $self->relations->add_type( %args );
250}
251
39d1ddc7 252sub get_relationship_type {
253 my( $self, $name ) = @_;
254 return $self->relations->has_type( $name )
255 ? $self->relations->type( $name ) : undef;
256}
257
3a2ebbf4 258### Reading construct/destruct functions
259
260sub add_reading {
261 my( $self, $reading ) = @_;
262 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
263 my %args = %$reading;
e4b73942 264 if( $args{'init'} ) {
265 # If we are initializing an empty collation, don't assume that we
266 # have set a tradition.
267 delete $args{'init'};
8943ff68 268 } elsif( $self->tradition->can('language') && $self->tradition->has_language
269 && !exists $args{'language'} ) {
6ad2ce78 270 $args{'language'} = $self->tradition->language;
271 }
3a2ebbf4 272 $reading = Text::Tradition::Collation::Reading->new(
273 'collation' => $self,
274 %args );
275 }
276 # First check to see if a reading with this ID exists.
277 if( $self->reading( $reading->id ) ) {
63778331 278 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 279 }
c1915ab9 280 $self->_graphcalc_done(0);
3a2ebbf4 281 $self->_add_reading( $reading->id => $reading );
282 # Once the reading has been added, put it in both graphs.
283 $self->sequence->add_vertex( $reading->id );
cecbe56d 284 $self->relations->add_reading( $reading->id );
3a2ebbf4 285 return $reading;
eca16057 286};
287
3a2ebbf4 288around del_reading => sub {
289 my $orig = shift;
290 my $self = shift;
291 my $arg = shift;
292
cecbe56d 293 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
294 $arg = $arg->id;
3a2ebbf4 295 }
3a2ebbf4 296 # Remove the reading from the graphs.
c1915ab9 297 $self->_graphcalc_done(0);
4e483aa5 298 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 299 $self->sequence->delete_vertex( $arg );
300 $self->relations->delete_reading( $arg );
3a2ebbf4 301
302 # Carry on.
cecbe56d 303 $self->$orig( $arg );
3a2ebbf4 304};
7854e12e 305
3c234eb6 306=head2 merge_readings( $main, $second, $concatenate, $with_str )
307
308Merges the $second reading into the $main one. If $concatenate is true, then
309the merged node will carry the text of both readings, concatenated with either
310$with_str (if specified) or a sensible default (the empty string if the
311appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
312
313The first two arguments may be either readings or reading IDs.
314
4e483aa5 315=begin testing
316
317use Text::Tradition;
58568d5c 318use TryCatch;
4e483aa5 319
320my $cxfile = 't/data/Collatex-16.xml';
321my $t = Text::Tradition->new(
322 'name' => 'inline',
323 'input' => 'CollateX',
324 'file' => $cxfile,
325 );
326my $c = $t->collation;
327
328my $rno = scalar $c->readings;
4ef65ab4 329# Split n21 ('unto') for testing purposes
4e483aa5 330my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
331my $old_r = $c->reading( 'n21' );
332$old_r->alter_text( 'to' );
333$c->del_path( 'n20', 'n21', 'A' );
334$c->add_path( 'n20', 'n21p0', 'A' );
335$c->add_path( 'n21p0', 'n21', 'A' );
7a0956c1 336$c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
4e483aa5 337$c->flatten_ranks();
338ok( $c->reading( 'n21p0' ), "New reading exists" );
339is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
340
679f17e1 341# Combine n3 and n4 ( with his )
4e483aa5 342$c->merge_readings( 'n3', 'n4', 1 );
343ok( !$c->reading('n4'), "Reading n4 is gone" );
344is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
345
679f17e1 346# Collapse n9 and n10 ( rood / root )
347$c->merge_readings( 'n9', 'n10' );
348ok( !$c->reading('n10'), "Reading n10 is gone" );
349is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 350
58568d5c 351# Try to combine n21 and n21p0. This should break.
4e483aa5 352my $remaining = $c->reading('n21');
353$remaining ||= $c->reading('n22'); # one of these should still exist
58568d5c 354try {
355 $c->merge_readings( 'n21p0', $remaining, 1 );
356 ok( 0, "Bad reading merge changed the graph" );
357} catch( Text::Tradition::Error $e ) {
358 like( $e->message, qr/neither concatenated nor collated/, "Expected exception from bad concatenation" );
359} catch {
360 ok( 0, "Unexpected error on bad reading merge: $@" );
361}
362
363try {
364 $c->calculate_ranks();
365 ok( 1, "Graph is still evidently whole" );
366} catch( Text::Tradition::Error $e ) {
367 ok( 0, "Caught a rank exception: " . $e->message );
368}
4e483aa5 369
370=end testing
371
372=cut
7854e12e 373
3a2ebbf4 374sub merge_readings {
375 my $self = shift;
376
56772e8c 377 # Sanity check
378 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
379 my $mergemeta = $kept_obj->is_meta;
380 throw( "Cannot merge meta and non-meta reading" )
381 unless ( $mergemeta && $del_obj->is_meta )
382 || ( !$mergemeta && !$del_obj->is_meta );
383 if( $mergemeta ) {
384 throw( "Cannot merge with start or end node" )
385 if( $kept_obj eq $self->start || $kept_obj eq $self->end
386 || $del_obj eq $self->start || $del_obj eq $self->end );
a445ce40 387 throw( "Cannot combine text of meta readings" ) if $combine;
56772e8c 388 }
e4f00901 389 # We can only merge readings in a linear graph if:
390 # - they are contiguous with only one edge between them, OR
391 # - they are at equivalent ranks in the graph.
392 if( $self->linear ) {
393 my @delpred = $del_obj->predecessors;
394 my @keptsuc = $kept_obj->successors;
395 unless ( @delpred == 1 && $delpred[0] eq $kept_obj
396 && @keptsuc == 1 && $keptsuc[0] eq $del_obj ) {
397 my( $is_ok, $msg ) = $self->relations->relationship_valid(
398 $kept_obj, $del_obj, 'collated' );
399 unless( $is_ok ) {
400 throw( "Readings $kept_obj and $del_obj can be neither concatenated nor collated" );
401 }
402 }
403 }
404
3a2ebbf4 405 # We only need the IDs for adding paths to the graph, not the reading
406 # objects themselves.
56772e8c 407 my $kept = $kept_obj->id;
408 my $deleted = $del_obj->id;
c1915ab9 409 $self->_graphcalc_done(0);
10943ab0 410
3a2ebbf4 411 # The kept reading should inherit the paths and the relationships
412 # of the deleted reading.
413 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
414 my @vector = ( $kept );
415 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
416 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 417 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 418 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
419 $self->sequence->add_edge( @vector );
420 my $fwits = $self->sequence->get_edge_attributes( @vector );
421 @wits{keys %$fwits} = values %$fwits;
422 $self->sequence->set_edge_attributes( @vector, \%wits );
423 }
cecbe56d 424 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 425
426 # Do the deletion deed.
4e483aa5 427 if( $combine ) {
869a1ada 428 # Combine the text of the readings
4e483aa5 429 my $joinstr = $combine_char;
430 unless( defined $joinstr ) {
431 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
432 $joinstr = $self->wordsep unless defined $joinstr;
433 }
a445ce40 434 $kept_obj->_combine( $del_obj, $joinstr );
49d4f2ac 435 }
3a2ebbf4 436 $self->del_reading( $deleted );
437}
7854e12e 438
8d9494a8 439=head2 merge_related( @relationship_types )
440
441Merge all readings linked with the relationship types given. If any of the selected type(s) is not a colocation, the graph will no longer be linear. The majority/plurality reading in each case will be the one kept.
442
443WARNING: This operation cannot be undone.
444
445=cut
446
447=begin testing
448
58568d5c 449use Test::Warn;
8d9494a8 450use Text::Tradition;
451use TryCatch;
452
58568d5c 453my $t;
454warnings_exist {
455 $t = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
456} [qr/Cannot set relationship on a meta reading/],
457 "Got expected relationship drop warning on parse";
458
8d9494a8 459my $c = $t->collation;
460
461my %rdg_ids;
462map { $rdg_ids{$_} = 1 } $c->readings;
463$c->merge_related( 'orthographic' );
751ee528 464is( scalar( $c->readings ), keys( %rdg_ids ) - 9,
8d9494a8 465 "Successfully collapsed orthographic variation" );
751ee528 466map { $rdg_ids{$_} = undef } qw/ r13.3 r11.4 r8.5 r8.2 r7.7 r7.5 r7.4 r7.3 r7.1 /;
8d9494a8 467foreach my $rid ( keys %rdg_ids ) {
468 my $exp = $rdg_ids{$rid};
469 is( !$c->reading( $rid ), !$exp, "Reading $rid correctly " .
470 ( $exp ? "retained" : "removed" ) );
471}
472ok( $c->linear, "Graph is still linear" );
473try {
474 $c->calculate_ranks; # This should succeed
475 ok( 1, "Can still calculate ranks on the new graph" );
476} catch {
477 ok( 0, "Rank calculation on merged graph failed: $@" );
478}
479
480# Now add some transpositions
481$c->add_relationship( 'r8.4', 'r10.4', { type => 'transposition' } );
482$c->merge_related( 'transposition' );
751ee528 483is( scalar( $c->readings ), keys( %rdg_ids ) - 10,
8d9494a8 484 "Transposed relationship is merged away" );
485ok( !$c->reading('r8.4'), "Correct transposed reading removed" );
486ok( !$c->linear, "Graph is no longer linear" );
487try {
488 $c->calculate_ranks; # This should fail
489 ok( 0, "Rank calculation happened on nonlinear graph?!" );
490} catch ( Text::Tradition::Error $e ) {
491 is( $e->message, 'Cannot calculate ranks on a non-linear graph',
492 "Rank calculation on merged graph threw an error" );
493}
494
8d9494a8 495=end testing
496
497=cut
498
499# TODO: there should be a way to display merged without affecting the underlying data!
500
501sub merge_related {
502 my $self = shift;
503 my %reltypehash;
504 map { $reltypehash{$_} = 1 } @_;
505
506 # Set up the filter for finding related readings
507 my $filter = sub {
508 exists $reltypehash{$_[0]->type};
509 };
510
8d9494a8 511 # Go through all readings looking for related ones
512 foreach my $r ( $self->readings ) {
513 next unless $self->reading( "$r" ); # might have been deleted meanwhile
751ee528 514 while( my @related = $self->related_readings( $r, $filter ) ) {
8d9494a8 515 push( @related, $r );
516 @related = sort {
517 scalar $b->witnesses <=> scalar $a->witnesses
518 } @related;
519 my $keep = shift @related;
520 foreach my $delr ( @related ) {
58568d5c 521 $self->linear( 0 )
8d9494a8 522 unless( $self->get_relationship( $keep, $delr )->colocated );
523 $self->merge_readings( $keep, $delr );
524 }
525 }
526 }
8d9494a8 527}
528
6771a1b1 529=head2 compress_readings
530
531Where possible in the graph, compresses plain sequences of readings into a
532single reading. The sequences must consist of readings with no
533relationships to other readings, with only a single witness path between
534them and no other witness paths from either that would skip the other. The
535readings must also not be marked as nonsense or bad grammar.
536
537WARNING: This operation cannot be undone.
538
539=cut
540
541sub compress_readings {
542 my $self = shift;
f5e442f9 543 # Sanity check: first save the original text of each witness.
544 my %origtext;
545 foreach my $wit ( $self->tradition->witnesses ) {
546 $origtext{$wit->sigil} = $self->path_text( $wit->sigil );
547 if( $wit->is_layered ) {
548 my $acsig = $wit->sigil . $self->ac_label;
549 $origtext{$acsig} = $self->path_text( $acsig );
550 }
551 }
552
553 # Now do the deed.
6771a1b1 554 # Anywhere in the graph that there is a reading that joins only to a single
555 # successor, and neither of these have any relationships, just join the two
556 # readings.
6771a1b1 557 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 558 # Now look for readings that can be joined to their successors.
a445ce40 559 next unless $rdg->is_combinable;
6771a1b1 560 my %seen;
561 while( $self->sequence->successors( $rdg ) == 1 ) {
562 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
563 throw( "Infinite loop" ) if $seen{$next->id};
564 $seen{$next->id} = 1;
565 last if $self->sequence->predecessors( $next ) > 1;
a445ce40 566 last unless $next->is_combinable;
6771a1b1 567 say "Joining readings $rdg and $next";
568 $self->merge_readings( $rdg, $next, 1 );
569 }
570 }
f5e442f9 571
572 # Finally, make sure we haven't screwed anything up.
6771a1b1 573 foreach my $wit ( $self->tradition->witnesses ) {
574 my $pathtext = $self->path_text( $wit->sigil );
6771a1b1 575 throw( "Text differs for witness " . $wit->sigil )
f5e442f9 576 unless $pathtext eq $origtext{$wit->sigil};
6771a1b1 577 if( $wit->is_layered ) {
f5e442f9 578 my $acsig = $wit->sigil . $self->ac_label;
579 $pathtext = $self->path_text( $acsig );
580 throw( "Layered text differs for witness " . $wit->sigil )
581 unless $pathtext eq $origtext{$acsig};
6771a1b1 582 }
583 }
584
585 $self->relations->rebuild_equivalence();
586 $self->calculate_ranks();
587}
3265b0ce 588
3a2ebbf4 589# Helper function for manipulating the graph.
590sub _stringify_args {
4e483aa5 591 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 592 $first = $first->id
593 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
594 $second = $second->id
595 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 596 return( $first, $second, @args );
3a2ebbf4 597}
df6d9812 598
4e5a7b2c 599# Helper function for manipulating the graph.
600sub _objectify_args {
601 my( $self, $first, $second, $arg ) = @_;
602 $first = $self->reading( $first )
603 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
604 $second = $self->reading( $second )
605 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
606 return( $first, $second, $arg );
607}
f97ef19e 608
609=head2 duplicate_reading( $reading, @witlist )
610
611Split the given reading into two, so that the new reading is in the path for
ef73c20a 612the witnesses given in @witlist. If the result is that certain non-colocated
613relationships (e.g. transpositions) are no longer valid, these will be removed.
614Returns the newly-created reading.
f97ef19e 615
616=begin testing
617
68e48c06 618use Test::More::UTF8;
f97ef19e 619use Text::Tradition;
68e48c06 620use TryCatch;
f97ef19e 621
622my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
623is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
624ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
625
626my $sc = $st->collation;
627my $numr = 17;
628ok( $sc->reading('n131'), "Tradition has the affected reading" );
629is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
630is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
631
632# Detach the erroneously collated reading
2dcb5d11 633my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
ef73c20a 634ok( $newr, "New reading was created" );
f97ef19e 635ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
636is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
637is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
3c234eb6 638my $csucc = $sc->common_successor( 'n131', 'n131_0' );
639is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
f97ef19e 640
641# Check that the bad transposition is gone
2dcb5d11 642is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
f97ef19e 643is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
644
e19635f8 645# The collation should not be fixed
646my @pairs = $sc->identical_readings();
647is( scalar @pairs, 0, "Not re-collated yet" );
f97ef19e 648# Fix the collation
3c234eb6 649ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
e19635f8 650@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
3c234eb6 651is( scalar @pairs, 3, "Found three more identical readings" );
e19635f8 652is( $sc->end->rank, 11, "The ranks shifted appropriately" );
3c234eb6 653$sc->flatten_ranks();
f97ef19e 654is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
655
68e48c06 656# Check that we can't "duplicate" a reading with no wits or with all wits
657try {
658 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124' );
659 ok( 0, "Reading duplication without witnesses throws an error" );
660} catch( Text::Tradition::Error $e ) {
661 like( $e->message, qr/Must specify one or more witnesses/,
662 "Reading duplication without witnesses throws the expected error" );
663} catch {
664 ok( 0, "Reading duplication without witnesses threw the wrong error" );
665}
666
667try {
668 my( $badr, @del_rdgs ) = $sc->duplicate_reading( 'n124', 'Ba96', 'Mü11475' );
669 ok( 0, "Reading duplication with all witnesses throws an error" );
670} catch( Text::Tradition::Error $e ) {
671 like( $e->message, qr/Cannot join all witnesses/,
672 "Reading duplication with all witnesses throws the expected error" );
673} catch {
674 ok( 0, "Reading duplication with all witnesses threw the wrong error" );
675}
676
58568d5c 677try {
678 $sc->calculate_ranks();
679 ok( 1, "Graph is still evidently whole" );
680} catch( Text::Tradition::Error $e ) {
681 ok( 0, "Caught a rank exception: " . $e->message );
682}
683
f97ef19e 684=end testing
685
686=cut
687
688sub duplicate_reading {
689 my( $self, $r, @wits ) = @_;
68e48c06 690 # Check that we are not doing anything unwise.
691 throw( "Must specify one or more witnesses for the duplicated reading" )
692 unless @wits;
f97ef19e 693 unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
694 $r = $self->reading( $r );
695 }
696 throw( "Cannot duplicate a meta-reading" )
697 if $r->is_meta;
68e48c06 698 throw( "Cannot join all witnesses to the new reading" )
47cd4d16 699 if scalar( @wits ) == scalar( $r->witnesses );
68e48c06 700
f97ef19e 701 # Get all the reading attributes and duplicate them.
702 my $rmeta = Text::Tradition::Collation::Reading->meta;
703 my %args;
704 foreach my $attr( $rmeta->get_all_attributes ) {
705 next if $attr->name =~ /^_/;
706 my $acc = $attr->get_read_method;
707 if( !$acc && $attr->has_applied_traits ) {
708 my $tr = $attr->applied_traits;
709 if( $tr->[0] =~ /::(Array|Hash)$/ ) {
710 my $which = $1;
711 my %methods = reverse %{$attr->handles};
712 $acc = $methods{elements};
713 $args{$attr->name} = $which eq 'Array'
714 ? [ $r->$acc ] : { $r->$acc };
715 }
716 } else {
717 $args{$attr->name} = $r->$acc if $acc;
718 }
719 }
720 # By definition the new reading will no longer be common.
721 $args{is_common} = 0;
722 # The new reading also needs its own ID.
723 $args{id} = $self->_generate_dup_id( $r->id );
724
725 # Try to make the new reading.
726 my $newr = $self->add_reading( \%args );
727 # The old reading is also no longer common.
728 $r->is_common( 0 );
729
730 # For each of the witnesses, dissociate from the old reading and
731 # associate with the new.
732 foreach my $wit ( @wits ) {
733 my $prior = $self->prior_reading( $r, $wit );
734 my $next = $self->next_reading( $r, $wit );
735 $self->del_path( $prior, $r, $wit );
736 $self->add_path( $prior, $newr, $wit );
737 $self->del_path( $r, $next, $wit );
738 $self->add_path( $newr, $next, $wit );
739 }
740
3c234eb6 741 # If the graph is ranked, we need to look for relationships that are now
742 # invalid (i.e. 'non-colocation' types that might now be colocated) and
743 # remove them. If not, we can skip it.
f97ef19e 744 my $succ;
3c234eb6 745 my %rrk;
2dcb5d11 746 my @deleted_relations;
f97ef19e 747 if( $self->end->has_rank ) {
3c234eb6 748 # Find the point where we can stop checking
f97ef19e 749 $succ = $self->common_successor( $r, $newr );
3c234eb6 750
751 # Hash the existing ranks
f97ef19e 752 foreach my $rdg ( $self->readings ) {
753 $rrk{$rdg->id} = $rdg->rank;
754 }
3c234eb6 755 # Calculate the new ranks
756 $self->calculate_ranks();
f97ef19e 757
3c234eb6 758 # Check for invalid non-colocated relationships among changed-rank readings
759 # from where the ranks start changing up to $succ
f97ef19e 760 my $lastrank = $succ->rank;
761 foreach my $rdg ( $self->readings ) {
762 next if $rdg->rank > $lastrank;
763 next if $rdg->rank == $rrk{$rdg->id};
764 my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
765 next unless @noncolo;
766 foreach my $nc ( @noncolo ) {
2dcb5d11 767 unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
f6a1d5f0 768 push( @deleted_relations, [ $rdg->id, $nc->id ] );
2dcb5d11 769 }
f97ef19e 770 }
771 }
ef73c20a 772 }
2dcb5d11 773 return ( $newr, @deleted_relations );
f97ef19e 774}
775
776sub _generate_dup_id {
777 my( $self, $rid ) = @_;
778 my $newid;
779 my $i = 0;
780 while( !$newid ) {
781 $newid = $rid."_$i";
782 if( $self->has_reading( $newid ) ) {
783 $newid = '';
784 $i++;
785 }
786 }
787 return $newid;
788}
789
3a2ebbf4 790### Path logic
791
792sub add_path {
793 my $self = shift;
794
795 # We only need the IDs for adding paths to the graph, not the reading
796 # objects themselves.
cecbe56d 797 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 798
c1915ab9 799 $self->_graphcalc_done(0);
3a2ebbf4 800 # Connect the readings
359944f7 801 unless( $self->sequence->has_edge( $source, $target ) ) {
802 $self->sequence->add_edge( $source, $target );
803 $self->relations->add_equivalence_edge( $source, $target );
804 }
3a2ebbf4 805 # Note the witness in question
806 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 807}
b15511bf 808
3a2ebbf4 809sub del_path {
810 my $self = shift;
49d4f2ac 811 my @args;
812 if( ref( $_[0] ) eq 'ARRAY' ) {
813 my $e = shift @_;
814 @args = ( @$e, @_ );
815 } else {
816 @args = @_;
817 }
3a2ebbf4 818
f97ef19e 819 # We only need the IDs for removing paths from the graph, not the reading
3a2ebbf4 820 # objects themselves.
49d4f2ac 821 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 822
c1915ab9 823 $self->_graphcalc_done(0);
3a2ebbf4 824 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 825 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 826 }
30897024 827 unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
3a2ebbf4 828 $self->sequence->delete_edge( $source, $target );
359944f7 829 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 830 }
784877d9 831}
832
3a2ebbf4 833
15d2d3df 834# Extra graph-alike utility
835sub has_path {
3a2ebbf4 836 my $self = shift;
837 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
838 return undef unless $self->sequence->has_edge( $source, $target );
839 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 840}
841
4e5a7b2c 842=head2 clear_witness( @sigil_list )
3a2ebbf4 843
4e5a7b2c 844Clear the given witnesses out of the collation entirely, removing references
845to them in paths, and removing readings that belong only to them. Should only
846be called via $tradition->del_witness.
3a2ebbf4 847
848=cut
849
4e5a7b2c 850sub clear_witness {
851 my( $self, @sigils ) = @_;
852
c1915ab9 853 $self->_graphcalc_done(0);
4e5a7b2c 854 # Clear the witness(es) out of the paths
855 foreach my $e ( $self->paths ) {
856 foreach my $sig ( @sigils ) {
857 $self->del_path( $e, $sig );
858 }
859 }
860
861 # Clear out the newly unused readings
862 foreach my $r ( $self->readings ) {
863 unless( $self->reading_witnesses( $r ) ) {
864 $self->del_reading( $r );
865 }
866 }
867}
3a2ebbf4 868
869sub add_relationship {
870 my $self = shift;
22222af9 871 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 872 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
864ee4bf 873 foreach my $v ( @vectors ) {
874 next unless $self->get_relationship( $v )->colocated;
875 if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
876 && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
877 $self->_graphcalc_done(0);
878 $self->_clear_cache;
879 last;
880 }
881 }
63778331 882 return @vectors;
22222af9 883}
ef9d481f 884
ca6e6095 885around qw/ get_relationship del_relationship / => sub {
886 my $orig = shift;
887 my $self = shift;
888 my @args = @_;
889 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
890 @args = @{$_[0]};
891 }
e19635f8 892 my @stringargs = $self->_stringify_args( @args );
893 $self->$orig( @stringargs );
ca6e6095 894};
895
22222af9 896=head2 reading_witnesses( $reading )
910a0a6d 897
22222af9 898Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 899
22222af9 900=cut
1d310495 901
1d310495 902sub reading_witnesses {
903 my( $self, $reading ) = @_;
904 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 905 # arbitrarily chosen "incoming". Thus, special-case the start node.
906 if( $reading eq $self->start ) {
bed6ce83 907 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
96dc90ec 908 }
1d310495 909 my %all_witnesses;
910 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
911 my $wits = $self->sequence->get_edge_attributes( @$e );
912 @all_witnesses{ keys %$wits } = 1;
913 }
c12bb878 914 my $acstr = $self->ac_label;
915 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
916 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
917 }
1d310495 918 return keys %all_witnesses;
910a0a6d 919}
920
4e5a7b2c 921=head1 OUTPUT METHODS
8e1394aa 922
0ecb975c 923=head2 as_svg( \%options )
8e1394aa 924
0068967c 925Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 926See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 927
928=cut
929
930sub as_svg {
0ecb975c 931 my( $self, $opts ) = @_;
bfcbcecb 932 throw( "Need GraphViz installed to output SVG" )
933 unless File::Which::which( 'dot' );
e247aad1 934 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 935 $self->calculate_ranks()
936 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
be3af600 937 my @cmd = qw/dot -Tsvg/;
938 my( $svg, $err );
939 my $dotfile = File::Temp->new();
940 ## USE FOR DEBUGGING
941 # $dotfile->unlink_on_destroy(0);
942 binmode $dotfile, ':utf8';
943 print $dotfile $self->as_dot( $opts );
944 push( @cmd, $dotfile->filename );
945 run( \@cmd, ">", binary(), \$svg );
946 $svg = decode_utf8( $svg );
947 return $svg;
8e1394aa 948}
949
b22576c6 950
0ecb975c 951=head2 as_dot( \%options )
b22576c6 952
0ecb975c 953Returns a string that is the collation graph expressed in dot
954(i.e. GraphViz) format. Options include:
b22576c6 955
0ecb975c 956=over 4
b22576c6 957
0ecb975c 958=item * from
b22576c6 959
0ecb975c 960=item * to
df6d9812 961
0ecb975c 962=item * color_common
963
964=back
df6d9812 965
966=cut
967
968sub as_dot {
0ecb975c 969 my( $self, $opts ) = @_;
970 my $startrank = $opts->{'from'} if $opts;
971 my $endrank = $opts->{'to'} if $opts;
972 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 973 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
974 && $self->end->rank > 100;
6648ee3d 975 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 976
b22576c6 977 # Check the arguments
978 if( $startrank ) {
979 return if $endrank && $startrank > $endrank;
980 return if $startrank > $self->end->rank;
981 }
982 if( defined $endrank ) {
983 return if $endrank < 0;
f1b3b33a 984 $endrank = undef if $endrank == $self->end->rank;
b22576c6 985 }
986
67da8d6c 987 my $graph_name = $self->tradition->name;
988 $graph_name =~ s/[^\w\s]//g;
989 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 990
991 my %graph_attrs = (
992 'rankdir' => 'LR',
993 'bgcolor' => 'none',
994 );
995 my %node_attrs = (
b8990398 996 'fontsize' => 14,
f13b5582 997 'fillcolor' => 'white',
998 'style' => 'filled',
999 'shape' => 'ellipse'
1000 );
1001 my %edge_attrs = (
1002 'arrowhead' => 'open',
1003 'color' => '#000000',
1004 'fontcolor' => '#000000',
1005 );
1006
67da8d6c 1007 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 1008 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
1009 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 1010
b22576c6 1011 # Output substitute start/end readings if necessary
1012 if( $startrank ) {
43c94341 1013 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 1014 }
1015 if( $endrank ) {
43c94341 1016 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 1017 }
b365fbae 1018 if( $STRAIGHTENHACK ) {
1019 ## HACK part 1
43c94341 1020 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
1021 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 1022 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
1023 }
b22576c6 1024 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 1025 # Sort the readings by rank if we have ranks; this speeds layout.
1026 my @all_readings = $self->end->has_rank
1027 ? sort { $a->rank <=> $b->rank } $self->readings
1028 : $self->readings;
4633f9e4 1029 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 1030 foreach my $reading ( @all_readings ) {
b22576c6 1031 # Only output readings within our rank range.
1032 next if $startrank && $reading->rank < $startrank;
1033 next if $endrank && $reading->rank > $endrank;
1034 $used{$reading->id} = 1;
910a0a6d 1035 # Need not output nodes without separate labels
3a2ebbf4 1036 next if $reading->id eq $reading->text;
d4b75f44 1037 my $rattrs;
30f0df34 1038 my $label = $reading->text;
6b6c7098 1039 unless( $label =~ /^[[:punct:]]+$/ ) {
1040 $label .= '-' if $reading->join_next;
1041 $label = "-$label" if $reading->join_prior;
1042 }
8f9cab7b 1043 $label =~ s/\"/\\\"/g;
d4b75f44 1044 $rattrs->{'label'} = $label;
10e4b1ac 1045 $rattrs->{'id'} = $reading->id;
0ecb975c 1046 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 1047 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 1048 }
3a2ebbf4 1049
522c1314 1050 # Add the real edges.
b22576c6 1051 my @edges = $self->paths;
3bdec618 1052 my( %substart, %subend );
b22576c6 1053 foreach my $edge ( @edges ) {
1054 # Do we need to output this edge?
508fd430 1055 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
57560672 1056 my $label = $self->_path_display_label( $opts,
1057 $self->path_witnesses( $edge ) );
f13b5582 1058 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 1059
b22576c6 1060 # Account for the rank gap if necessary
30ddc24c 1061 my $rank0 = $self->reading( $edge->[0] )->rank
1062 if $self->reading( $edge->[0] )->has_rank;
1063 my $rank1 = $self->reading( $edge->[1] )->rank
1064 if $self->reading( $edge->[1] )->has_rank;
1065 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
1066 $variables->{'minlen'} = $rank1 - $rank0;
1067 }
1068
508fd430 1069 # EXPERIMENTAL: make edge width reflect no. of witnesses
1070 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
1071 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
1072
f13b5582 1073 my $varopts = _dot_attr_string( $variables );
1074 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1075 $edge->[0], $edge->[1], $varopts );
3bdec618 1076 } elsif( $used{$edge->[0]} ) {
96ba0418 1077 $subend{$edge->[0]} = $edge->[1];
3bdec618 1078 } elsif( $used{$edge->[1]} ) {
96ba0418 1079 $substart{$edge->[1]} = $edge->[0];
b22576c6 1080 }
df6d9812 1081 }
bed6ce83 1082
1083 # If we are asked to, add relationship links
1084 if( exists $opts->{show_relations} ) {
1085 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
1086 if( $filter eq 'transposition' ) {
1087 $filter =~ qr/^transposition$/;
1088 }
9e54e11b 1089 my %typecolors;
1090 my @types = sort( map { $_->name } $self->relations->types );
1091 if( exists $opts->{graphcolors} ) {
1092 foreach my $tdx ( 0 .. $#types ) {
1093 $typecolors{$types[$tdx]} = $opts->{graphcolors}->[$tdx];
1094 }
1095 } else {
1096 map { $typecolors{$_} = '#FFA14F' } @types;
1097 }
bed6ce83 1098 foreach my $redge ( $self->relationships ) {
1099 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
9e54e11b 1100 my $rel = $self->get_relationship( $redge );
1101 next unless $filter eq 'all' || $rel->type =~ /$filter/;
1102 my $variables = {
1103 arrowhead => 'none',
1104 color => $typecolors{$rel->type},
1105 constraint => 'false',
1106 penwidth => '3',
1107 };
1108 unless( exists $opts->{graphcolors} ) {
1109 $variables->{label} = uc( substr( $rel->type, 0, 4 ) ),
bed6ce83 1110 }
9e54e11b 1111 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
1112 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
bed6ce83 1113 }
1114 }
1115 }
1116
3bdec618 1117 # Add substitute start and end edges if necessary
1118 foreach my $node ( keys %substart ) {
57560672 1119 my $witstr = $self->_path_display_label( $opts,
1120 $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 1121 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 1122 my $nrdg = $self->reading( $node );
1123 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
1124 # Substart is actually one lower than $startrank
1125 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
1126 }
f13b5582 1127 my $varopts = _dot_attr_string( $variables );
96ba0418 1128 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 1129 }
1130 foreach my $node ( keys %subend ) {
57560672 1131 my $witstr = $self->_path_display_label( $opts,
1132 $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 1133 my $variables = { %edge_attrs, 'label' => $witstr };
1134 my $varopts = _dot_attr_string( $variables );
96ba0418 1135 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 1136 }
b365fbae 1137 # HACK part 2
1138 if( $STRAIGHTENHACK ) {
43c94341 1139 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
1140 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 1141 }
30ddc24c 1142
df6d9812 1143 $dot .= "}\n";
1144 return $dot;
1145}
1146
f13b5582 1147sub _dot_attr_string {
1148 my( $hash ) = @_;
1149 my @attrs;
1150 foreach my $k ( sort keys %$hash ) {
1151 my $v = $hash->{$k};
1152 push( @attrs, $k.'="'.$v.'"' );
1153 }
1154 return( '[ ' . join( ', ', @attrs ) . ' ]' );
1155}
1156
027d819c 1157=head2 path_witnesses( $edge )
1158
1159Returns the list of sigils whose witnesses are associated with the given edge.
1160The edge can be passed as either an array or an arrayref of ( $source, $target ).
1161
1162=cut
1163
3a2ebbf4 1164sub path_witnesses {
1165 my( $self, @edge ) = @_;
1166 # If edge is an arrayref, cope.
1167 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
1168 my $e = shift @edge;
1169 @edge = @$e;
1170 }
1171 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 1172 return @wits;
3a2ebbf4 1173}
1174
7f9f05e8 1175# Helper function. Make a display label for the given witnesses, showing a.c.
1176# witnesses only where the main witness is not also in the list.
027d819c 1177sub _path_display_label {
508fd430 1178 my $self = shift;
57560672 1179 my $opts = shift;
7f9f05e8 1180 my %wits;
1181 map { $wits{$_} = 1 } @_;
1182
1183 # If an a.c. wit is listed, remove it if the main wit is also listed.
1184 # Otherwise keep it for explicit listing.
1185 my $aclabel = $self->ac_label;
1186 my @disp_ac;
1187 foreach my $w ( sort keys %wits ) {
1188 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
1189 if( exists $wits{$1} ) {
1190 delete $wits{$w};
1191 } else {
1192 push( @disp_ac, $w );
1193 }
1194 }
1195 }
1196
57560672 1197 if( $opts->{'explicit_wits'} ) {
7f9f05e8 1198 return join( ', ', sort keys %wits );
57560672 1199 } else {
1200 # See if we are in a majority situation.
1201 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1202 $maj = $maj > 5 ? $maj : 5;
1203 if( scalar keys %wits > $maj ) {
1204 unshift( @disp_ac, 'majority' );
1205 return join( ', ', @disp_ac );
1206 } else {
1207 return join( ', ', sort keys %wits );
1208 }
8f9cab7b 1209 }
1210}
1dd07bda 1211
522c1314 1212=head2 as_adjacency_list
1dd07bda 1213
522c1314 1214Returns a JSON structure that represents the collation sequence graph.
1dd07bda 1215
8a9a8200 1216=begin testing
1217
1218use JSON qw/ from_json /;
1219use Text::Tradition;
1220
1221my $t = Text::Tradition->new(
1222 'input' => 'Self',
1223 'file' => 't/data/florilegium_graphml.xml' );
1224my $c = $t->collation;
1225
1226# Make a connection so we can test rank preservation
1227$c->add_relationship( 'w91', 'w92', { type => 'grammatical' } );
1228
1229# Create an adjacency list of the whole thing; test the output.
1230my $adj_whole = from_json( $c->as_adjacency_list() );
1231is( scalar @$adj_whole, scalar $c->readings(),
1232 "Same number of nodes in graph and adjacency list" );
1233my @adj_whole_edges;
1234map { push( @adj_whole_edges, @{$_->{adjacent}} ) } @$adj_whole;
1235is( scalar @adj_whole_edges, scalar $c->sequence->edges,
1236 "Same number of edges in graph and adjacency list" );
1237# Find the reading whose rank should be preserved
1238my( $test_rdg ) = grep { $_->{id} eq 'w89' } @$adj_whole;
1239my( $test_edge ) = grep { $_->{id} eq 'w92' } @{$test_rdg->{adjacent}};
1240is( $test_edge->{minlen}, 2, "Rank of test reading is preserved" );
1241
1242# Now create an adjacency list of just a portion. w76 to w122
1243my $adj_part = from_json( $c->as_adjacency_list(
1244 { from => $c->reading('w76')->rank,
1245 to => $c->reading('w122')->rank }));
1246is( scalar @$adj_part, 48, "Correct number of nodes in partial graph" );
1247my @adj_part_edges;
1248map { push( @adj_part_edges, @{$_->{adjacent}} ) } @$adj_part;
1249is( scalar @adj_part_edges, 58,
1250 "Same number of edges in partial graph and adjacency list" );
1251# Check for consistency
1252my %part_nodes;
1253map { $part_nodes{$_->{id}} = 1 } @$adj_part;
1254foreach my $edge ( @adj_part_edges ) {
1255 my $testid = $edge->{id};
1256 ok( $part_nodes{$testid}, "ID $testid referenced in edge is given as node" );
1257}
1258
1259=end testing
1260
1dd07bda 1261=cut
1262
522c1314 1263sub as_adjacency_list {
1264 my( $self, $opts ) = @_;
1265 # Make a structure that contains all the nodes, the nodes they point to,
1266 # and the attributes of the edges that connect them.
1267 # [ { id: 'n0', label: 'Gallia', adjacent: [
1268 # { id: 'n1', label: 'P Q' } ,
1269 # { id: 'n2', label: 'R S', minlen: 2 } ] },
1270 # { id: 'n1', label: 'est', adjacent: [ ... ] },
1271 # ... ]
1272 my $startrank = $opts->{'from'} || 0;
1273 my $endrank = $opts->{'to'} || $self->end->rank;
1274
8a9a8200 1275 $self->calculate_ranks()
1276 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
522c1314 1277 my $list = [];
1278 foreach my $rdg ( $self->readings ) {
1279 my @successors;
1280 my $phony = '';
8a9a8200 1281 # Figure out what the node's successors should be.
522c1314 1282 if( $rdg eq $self->start && $startrank > 0 ) {
1283 # Connect the start node with all the nodes at startrank.
8a9a8200 1284 # Lacunas should be included only if the node really has that rank.
1285 @successors = $self->readings_at_rank( $startrank, 1 );
522c1314 1286 $phony = 'start';
1287 } elsif( $rdg->rank < $startrank
1288 || $rdg->rank > $endrank && $rdg ne $self->end ) {
1289 next;
522c1314 1290 } else {
1291 @successors = $rdg->successors;
1292 }
8a9a8200 1293 # Make sure that the end node is at the end of the successors
1294 # list if it is needed.
1295 if( grep { $_ eq $self->end } @successors ) {
1296 my @ts = grep { $_ ne $self->end } @successors;
1297 @successors = ( @ts, $self->end );
1298 } elsif ( grep { $_->rank > $endrank } @successors ) {
1299 push( @successors, $self->end );
1300 }
522c1314 1301
1302 my $listitem = { id => $rdg->id, label => $rdg->text };
1303 my $adjacent = [];
8a9a8200 1304 my @endwits;
522c1314 1305 foreach my $succ ( @successors ) {
1306 my @edgewits;
1307 if( $phony eq 'start' ) {
1308 @edgewits = $succ->witnesses;
8a9a8200 1309 } elsif( $self->sequence->has_edge( $rdg->id, $succ->id ) ) {
522c1314 1310 @edgewits = $self->path_witnesses( $rdg->id, $succ->id );
1311 }
8a9a8200 1312
1313 if( $succ eq $self->end ) {
1314 @edgewits = @endwits;
1315 } elsif( $succ->rank > $endrank ) {
1316 # These witnesses will point to 'end' instead, not to the
1317 # actual successor.
1318 push( @endwits, @edgewits );
1319 next;
1320 }
522c1314 1321 my $edgelabel = $self->_path_display_label( $opts, @edgewits );
1322 my $edgedef = { id => $succ->id, label => $edgelabel };
1323 my $rankoffset = $succ->rank - $rdg->rank;
1324 if( $rankoffset > 1 and $succ ne $self->end ) {
1325 $edgedef->{minlen} = $rankoffset;
1326 }
1327 push( @$adjacent, $edgedef );
1328 }
1329 $listitem->{adjacent} = $adjacent;
1330 push( @$list, $listitem );
bf6e338d 1331 }
522c1314 1332 return to_json( $list );
1333}
8f9cab7b 1334
4e5a7b2c 1335=head2 as_graphml
8e1394aa 1336
4e5a7b2c 1337Returns a GraphML representation of the collation. The GraphML will contain
1338two graphs. The first expresses the attributes of the readings and the witness
1339paths that link them; the second expresses the relationships that link the
1340readings. This is the native transfer format for a tradition.
8e1394aa 1341
56eefa04 1342=begin testing
1343
1344use Text::Tradition;
951ddfe8 1345use TryCatch;
56eefa04 1346
1347my $READINGS = 311;
1348my $PATHS = 361;
1349
1350my $datafile = 't/data/florilegium_tei_ps.xml';
1351my $tradition = Text::Tradition->new( 'input' => 'TEI',
1352 'name' => 'test0',
1353 'file' => $datafile,
1354 'linear' => 1 );
1355
1356ok( $tradition, "Got a tradition object" );
1357is( scalar $tradition->witnesses, 13, "Found all witnesses" );
1358ok( $tradition->collation, "Tradition has a collation" );
1359
1360my $c = $tradition->collation;
1361is( scalar $c->readings, $READINGS, "Collation has all readings" );
1362is( scalar $c->paths, $PATHS, "Collation has all paths" );
1363is( scalar $c->relationships, 0, "Collation has all relationships" );
1364
1365# Add a few relationships
1366$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
1367$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
b71e7ea8 1368$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition',
1369 'is_significant' => 'yes' } );
56eefa04 1370
1371# Now write it to GraphML and parse it again.
1372
1373my $graphml = $c->as_graphml;
1374my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
1375is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
1376is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
1377is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
b71e7ea8 1378my $sigrel = $st->collation->get_relationship( 'w257', 'w262' );
1379is( $sigrel->is_significant, 'yes', "Ternary attribute value was restored" );
56eefa04 1380
9fef629b 1381# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 1382SKIP: {
37bf09f4 1383 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 1384 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
1385 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
1386 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
1387 $graphml = $c->as_graphml;
1388 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
1389}
2a812726 1390
56eefa04 1391=end testing
1392
8e1394aa 1393=cut
1394
a445ce40 1395## TODO MOVE this to Tradition.pm and modularize it better
8e1394aa 1396sub as_graphml {
a30ca502 1397 my( $self, $options ) = @_;
3d14b48e 1398 $self->calculate_ranks unless $self->_graphcalc_done;
1399
a30ca502 1400 my $start = $options->{'from'}
1401 ? $self->reading( $options->{'from'} ) : $self->start;
1402 my $end = $options->{'to'}
1403 ? $self->reading( $options->{'to'} ) : $self->end;
1404 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
1405 throw( 'Start node must be before end node' );
1406 }
1407 # The readings need to be ranked for this to work.
1408 $start = $self->start unless $start->has_rank;
1409 $end = $self->end unless $end->has_rank;
414cc046 1410 my $rankoffset = 0;
1411 unless( $start eq $self->start ) {
1412 $rankoffset = $start->rank - 1;
1413 }
a30ca502 1414 my %use_readings;
1415
8e1394aa 1416 # Some namespaces
1417 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
1418 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
1419 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 1420 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 1421
1422 # Create the document and root node
428bcf0b 1423 require XML::LibXML;
8e1394aa 1424 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
1425 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
1426 $graphml->setDocumentElement( $root );
1427 $root->setNamespace( $xsi_ns, 'xsi', 0 );
1428 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 1429
1430 # List of attribute types to save on our objects and their corresponding
1431 # GraphML types
1432 my %save_types = (
1433 'Str' => 'string',
1434 'Int' => 'int',
1435 'Bool' => 'boolean',
10e4b1ac 1436 'ReadingID' => 'string',
bbd064a9 1437 'RelationshipType' => 'string',
1438 'RelationshipScope' => 'string',
b71e7ea8 1439 'Ternary' => 'string',
bbd064a9 1440 );
1441
bbd064a9 1442 # Add the data keys for the graph. Include an extra key 'version' for the
1443 # GraphML output version.
e309421a 1444 my %graph_data_keys;
1445 my $gdi = 0;
bbd064a9 1446 my %graph_attributes = ( 'version' => 'string' );
1447 # Graph attributes include those of Tradition and those of Collation.
1448 my %gattr_from;
f97ef19e 1449 # TODO Use meta introspection method from duplicate_reading to do this
1450 # instead of naming custom keys.
bbd064a9 1451 my $tmeta = $self->tradition->meta;
1452 my $cmeta = $self->meta;
1453 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1454 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1455 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1456 next if $attr->name =~ /^_/;
bbd064a9 1457 next unless $save_types{$attr->type_constraint->name};
1458 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1459 }
9fef629b 1460 # Extra custom keys for complex objects that should be saved in some form.
1461 # The subroutine should return a string, or undef/empty.
951ddfe8 1462 if( $tmeta->has_method('stemmata') ) {
1463 $graph_attributes{'stemmata'} = sub {
1464 my @stemstrs;
1465 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1466 $self->tradition->stemmata;
1467 join( "\n", @stemstrs );
1468 };
1469 }
1470
8943ff68 1471 if( $tmeta->has_method('user') ) {
1472 $graph_attributes{'user'} = sub {
1473 $self->tradition->user ? $self->tradition->user->id : undef
1474 };
1475 }
bbd064a9 1476
1477 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1478 $graph_data_keys{$datum} = 'dg'.$gdi++;
1479 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1480 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1481 : $graph_attributes{$datum};
e309421a 1482 $key->setAttribute( 'attr.name', $datum );
9fef629b 1483 $key->setAttribute( 'attr.type', $dtype );
e309421a 1484 $key->setAttribute( 'for', 'graph' );
1485 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1486 }
f6066bac 1487
bbd064a9 1488 # Add the data keys for reading nodes
1489 my %reading_attributes;
1490 my $rmeta = Text::Tradition::Collation::Reading->meta;
1491 foreach my $attr( $rmeta->get_all_attributes ) {
1492 next if $attr->name =~ /^_/;
bbd064a9 1493 next unless $save_types{$attr->type_constraint->name};
1494 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1495 }
a445ce40 1496 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1497 # Extra custom key for the reading morphology
1498 $reading_attributes{'lexemes'} = 'string';
1499 }
7cd9f181 1500
ef9d481f 1501 my %node_data_keys;
1502 my $ndi = 0;
bbd064a9 1503 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1504 $node_data_keys{$datum} = 'dn'.$ndi++;
1505 my $key = $root->addNewChild( $graphml_ns, 'key' );
1506 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1507 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1508 $key->setAttribute( 'for', 'node' );
1509 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1510 }
1511
bbd064a9 1512 # Add the data keys for edges, that is, paths and relationships. Path
1513 # data does not come from a Moose class so is here manually.
ef9d481f 1514 my $edi = 0;
1515 my %edge_data_keys;
bbd064a9 1516 my %edge_attributes = (
3a2ebbf4 1517 witness => 'string', # ID/label for a path
3a2ebbf4 1518 extra => 'boolean', # Path key
3a2ebbf4 1519 );
bbd064a9 1520 my @path_attributes = keys %edge_attributes; # track our manual additions
1521 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1522 foreach my $attr( $pmeta->get_all_attributes ) {
1523 next if $attr->name =~ /^_/;
bbd064a9 1524 next unless $save_types{$attr->type_constraint->name};
1525 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1526 }
1527 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1528 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1529 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1530 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1531 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1532 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1533 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1534 }
3a2ebbf4 1535
cc31ebaa 1536 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1537 my $xmlidname = $self->tradition->name;
1538 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1539 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1540 $xmlidname = '_'.$xmlidname;
1541 }
2c669bca 1542 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1543 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1544 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1545 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1546 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1547 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1548 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1549 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1550
2a812726 1551 # Tradition/collation attribute data
bbd064a9 1552 foreach my $datum ( keys %graph_attributes ) {
1553 my $value;
1554 if( $datum eq 'version' ) {
2a812726 1555 $value = '3.2';
9fef629b 1556 } elsif( ref( $graph_attributes{$datum} ) ) {
1557 my $sub = $graph_attributes{$datum};
1558 $value = &$sub();
bbd064a9 1559 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1560 $value = $self->tradition->$datum;
1561 } else {
1562 $value = $self->$datum;
1563 }
2c669bca 1564 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1565 }
8e1394aa 1566
1567 my $node_ctr = 0;
1568 my %node_hash;
22222af9 1569 # Add our readings to the graph
3a2ebbf4 1570 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1571 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1572 ( $n->rank < $start->rank || $n->rank > $end->rank );
1573 $use_readings{$n->id} = 1;
2c669bca 1574 # Add to the main graph
1575 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1576 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1577 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1578 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1579 foreach my $d ( keys %reading_attributes ) {
255875b8 1580 my $nval = $n->$d;
7cd9f181 1581 # Custom serialization
1582 if( $d eq 'lexemes' ) {
1583 # If nval is a true value, we have lexemes so we need to
1584 # serialize them. Otherwise set nval to undef so that the
1585 # key is excluded from this reading.
1586 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1587 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1588 $nval = undef;
7cd9f181 1589 }
cc31ebaa 1590 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1591 # Adjust the ranks within the subgraph.
cc31ebaa 1592 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1593 : $nval - $rankoffset;
414cc046 1594 }
255875b8 1595 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1596 if defined $nval;
1597 }
b15511bf 1598 }
1599
2c669bca 1600 # Add the path edges to the sequence graph
df6d9812 1601 my $edge_ctr = 0;
3a2ebbf4 1602 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1603 # We add an edge in the graphml for every witness in $e.
a30ca502 1604 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1605 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1606 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1607 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1608 # Skip any path from start to end; that witness is not in the subgraph.
1609 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1610 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1611 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1612 $node_hash{ $e->[0] },
1613 $node_hash{ $e->[1] } );
2c669bca 1614 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1615 $edge_el->setAttribute( 'source', $from );
1616 $edge_el->setAttribute( 'target', $to );
1617 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1618
1619 # It's a witness path, so add the witness
1620 my $base = $wit;
1621 my $key = $edge_data_keys{'witness'};
1622 # Is this an ante-corr witness?
1623 my $aclabel = $self->ac_label;
1624 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1625 # Keep the base witness
1626 $base = $1;
1627 # ...and record that this is an 'extra' reading path
1628 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1629 }
1630 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1631 }
1632 }
1633
cc31ebaa 1634 # Report the actual number of nodes and edges that went in
1635 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1636 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1637
22222af9 1638 # Add the relationship graph to the XML
bbd064a9 1639 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1640 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1641 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1642
94c00c71 1643 # Save and return the thing
1644 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1645 return $result;
df6d9812 1646}
1647
b15511bf 1648sub _add_graphml_data {
1649 my( $el, $key, $value ) = @_;
b15511bf 1650 return unless defined $value;
c9bf3dbf 1651 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1652 $data_el->setAttribute( 'key', $key );
1653 $data_el->appendText( $value );
8e1394aa 1654}
1655
4e5a7b2c 1656=head2 as_csv
910a0a6d 1657
1658Returns a CSV alignment table representation of the collation graph, one
2c669bca 1659row per witness (or witness uncorrected.)
910a0a6d 1660
ce5966fb 1661=head2 as_tsv
1662
1663Returns a tab-separated alignment table representation of the collation graph,
1664one row per witness (or witness uncorrected.)
1665
16203db5 1666=begin testing
1667
1668use Text::Tradition;
34ca808b 1669use Text::CSV;
16203db5 1670
1671my $READINGS = 311;
1672my $PATHS = 361;
34ca808b 1673my $WITS = 13;
1674my $WITAC = 4;
16203db5 1675
1676my $datafile = 't/data/florilegium_tei_ps.xml';
1677my $tradition = Text::Tradition->new( 'input' => 'TEI',
1678 'name' => 'test0',
1679 'file' => $datafile,
1680 'linear' => 1 );
1681
1682my $c = $tradition->collation;
1683# Export the thing to CSV
1684my $csvstr = $c->as_csv();
34ca808b 1685# Count the columns
1686my $csv = Text::CSV->new({ sep_char => ',', binary => 1 });
1687my @lines = split(/\n/, $csvstr );
1688ok( $csv->parse( $lines[0] ), "Successfully parsed first line of CSV" );
1689is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
cbc8e08f 1690my @q_ac = grep { $_ eq 'Q'.$c->ac_label } $csv->fields;
1691ok( @q_ac, "Found a layered witness" );
1692
16203db5 1693my $t2 = Text::Tradition->new( input => 'Tabular',
1694 name => 'test2',
1695 string => $csvstr,
1696 sep_char => ',' );
1697is( scalar $t2->collation->readings, $READINGS, "Reparsed CSV collation has all readings" );
1698is( scalar $t2->collation->paths, $PATHS, "Reparsed CSV collation has all paths" );
1699
1700# Now do it with TSV
1701my $tsvstr = $c->as_tsv();
1702my $t3 = Text::Tradition->new( input => 'Tabular',
1703 name => 'test3',
1704 string => $tsvstr,
1705 sep_char => "\t" );
1706is( scalar $t3->collation->readings, $READINGS, "Reparsed TSV collation has all readings" );
1707is( scalar $t3->collation->paths, $PATHS, "Reparsed TSV collation has all paths" );
1708
4e64b669 1709my $table = $c->alignment_table;
34ca808b 1710my $noaccsv = $c->as_csv({ noac => 1 });
1711my @noaclines = split(/\n/, $noaccsv );
1712ok( $csv->parse( $noaclines[0] ), "Successfully parsed first line of no-ac CSV" );
1713is( scalar( $csv->fields ), $WITS, "CSV has correct number of witness columns" );
4e64b669 1714is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
34ca808b 1715
cbc8e08f 1716my $safecsv = $c->as_csv({ safe_ac => 1});
1717my @safelines = split(/\n/, $safecsv );
1718ok( $csv->parse( $safelines[0] ), "Successfully parsed first line of safe CSV" );
1719is( scalar( $csv->fields ), $WITS + $WITAC, "CSV has correct number of witness columns" );
1720@q_ac = grep { $_ eq 'Q__L' } $csv->fields;
1721ok( @q_ac, "Found a sanitized layered witness" );
1722is( $c->alignment_table, $table, "Request for CSV did not alter the alignment table" );
16203db5 1723
de20588d 1724# Test relationship collapse
1725$c->add_relationship( $c->readings_at_rank( 37 ), { type => 'spelling' } );
1726$c->add_relationship( $c->readings_at_rank( 60 ), { type => 'spelling' } );
1727
1728my $mergedtsv = $c->as_tsv({mergetypes => [ 'spelling', 'orthographic' ] });
1729my $t4 = Text::Tradition->new( input => 'Tabular',
1730 name => 'test4',
1731 string => $mergedtsv,
1732 sep_char => "\t" );
1733is( scalar $t4->collation->readings, $READINGS - 2, "Reparsed TSV merge collation has fewer readings" );
1734is( scalar $t4->collation->paths, $PATHS - 4, "Reparsed TSV merge collation has fewer paths" );
1735
16203db5 1736=end testing
1737
910a0a6d 1738=cut
1739
ce5966fb 1740sub _tabular {
34ca808b 1741 my( $self, $opts ) = @_;
1742 my $table = $self->alignment_table( $opts );
16203db5 1743 my $csv_options = { binary => 1, quote_null => 0 };
34ca808b 1744 $csv_options->{'sep_char'} = $opts->{fieldsep};
1745 if( $opts->{fieldsep} eq "\t" ) {
16203db5 1746 # If it is really tab separated, nothing is an escape char.
1747 $csv_options->{'quote_char'} = undef;
1748 $csv_options->{'escape_char'} = '';
1749 }
1750 my $csv = Text::CSV->new( $csv_options );
910a0a6d 1751 my @result;
2c669bca 1752 # Make the header row
1753 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
43800a64 1754 push( @result, $csv->string );
2c669bca 1755 # Make the rest of the rows
1756 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1757 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1758 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
de20588d 1759 # Quick and dirty collapse of requested relationship types
1760 if( ref( $opts->{mergetypes} ) eq 'ARRAY' ) {
1761 # Now substitute the reading in the relevant index of @row
1762 # for its merge-related reading
1763 my %substitutes;
1764 while( @rowobjs ) {
1765 my $thisr = shift @rowobjs;
1766 next unless $thisr;
1767 next if exists $substitutes{$thisr->{t}->text};
1768 # Make sure we don't have A <-> B substitutions.
1769 $substitutes{$thisr->{t}->text} = $thisr->{t}->text;
1770 foreach my $thatr ( @rowobjs ) {
1771 next unless $thatr;
1772 next if exists $substitutes{$thatr->{t}->text};
1773 my $ttrel = $self->get_relationship( $thisr->{t}, $thatr->{t} );
1774 next unless $ttrel;
1775 next unless grep { $ttrel->type eq $_ } @{$opts->{mergetypes}};
1776 # If we have got this far then we need to merge them.
1777 $substitutes{$thatr->{t}->text} = $thisr->{t}->text;
1778 }
1779 }
1780 @row = map { $_ && exists $substitutes{$_} ? $substitutes{$_} : $_ } @row;
1781 }
2c669bca 1782 $csv->combine( @row );
43800a64 1783 push( @result, $csv->string );
910a0a6d 1784 }
3a2ebbf4 1785 return join( "\n", @result );
910a0a6d 1786}
1787
ce5966fb 1788sub as_csv {
1789 my $self = shift;
34ca808b 1790 my $opts = shift || {};
1791 $opts->{fieldsep} = ',';
1792 return $self->_tabular( $opts );
ce5966fb 1793}
1794
1795sub as_tsv {
1796 my $self = shift;
34ca808b 1797 my $opts = shift || {};
1798 $opts->{fieldsep} = "\t";
1799 return $self->_tabular( $opts );
ce5966fb 1800}
1801
248276a2 1802=head2 alignment_table
2c669bca 1803
566f4595 1804Return a reference to an alignment table, in a slightly enhanced CollateX
1805format which looks like this:
1806
1807 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1808 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1809 { witness => "SIG2",
4e5a7b2c 1810 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1811 ... ],
1812 length => TEXTLEN };
1813
2c669bca 1814=cut
9f3ba6f7 1815
1dd07bda 1816sub alignment_table {
34ca808b 1817 my( $self, $opts ) = @_;
4e64b669 1818 if( $self->has_cached_table ) {
cbc8e08f 1819 return $self->cached_table
1820 unless $opts->{noac} || $opts->{safe_ac};
4e64b669 1821 }
1dd07bda 1822
0ecb975c 1823 # Make sure we can do this
1824 throw( "Need a linear graph in order to make an alignment table" )
1825 unless $self->linear;
b39e7cb5 1826 $self->calculate_ranks()
1827 unless $self->_graphcalc_done && $self->end->has_rank;
1828
2c669bca 1829 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1830 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1831 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1832 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1833 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1834 my @row = _make_witness_row( \@wit_path, \@all_pos );
bed6ce83 1835 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1836 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1837 push( @{$table->{'alignment'}}, $witobj );
34ca808b 1838 if( $wit->is_layered && !$opts->{noac} ) {
1f7aa795 1839 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1840 $wit->sigil.$self->ac_label );
1dd07bda 1841 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
cbc8e08f 1842 my $witlabel = $opts->{safe_ac}
1843 ? $wit->sigil . '__L' : $wit->sigil.$self->ac_label;
1844 my $witacobj = { 'witness' => $witlabel,
bed6ce83 1845 'tokens' => \@ac_row };
1846 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1847 push( @{$table->{'alignment'}}, $witacobj );
910a0a6d 1848 }
1849 }
cbc8e08f 1850 unless( $opts->{noac} || $opts->{safe_ac} ) {
4e64b669 1851 $self->cached_table( $table );
1852 }
1dd07bda 1853 return $table;
910a0a6d 1854}
1855
1856sub _make_witness_row {
1dd07bda 1857 my( $path, $positions ) = @_;
910a0a6d 1858 my %char_hash;
1859 map { $char_hash{$_} = undef } @$positions;
2c669bca 1860 my $debug = 0;
910a0a6d 1861 foreach my $rdg ( @$path ) {
6771a1b1 1862 say STDERR "rank " . $rdg->rank if $debug;
1863 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1864 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1865 }
1866 my @row = map { $char_hash{$_} } @$positions;
eca16057 1867 # Fill in lacuna markers for undef spots in the row
1868 my $last_el = shift @row;
1869 my @filled_row = ( $last_el );
1870 foreach my $el ( @row ) {
0e476982 1871 # If we are using node reference, make the lacuna node appear many times
1872 # in the table. If not, use the lacuna tag.
1dd07bda 1873 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1874 $el = $last_el;
eca16057 1875 }
1876 push( @filled_row, $el );
1877 $last_el = $el;
1878 }
1879 return @filled_row;
910a0a6d 1880}
1881
248276a2 1882
4e5a7b2c 1883=head1 NAVIGATION METHODS
910a0a6d 1884
4e5a7b2c 1885=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1886
1887Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1888with $last, for the witness given in $sigil. If a $backup sigil is
1889specified (e.g. when walking a layered witness), it will be used wherever
1890no $sigil path exists. If there is a base text reading, that will be
1891used wherever no path exists for $sigil or $backup.
e2902068 1892
1893=cut
1894
910a0a6d 1895# TODO Think about returning some lazy-eval iterator.
b0b4421a 1896# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1897
e2902068 1898sub reading_sequence {
861c3e27 1899 my( $self, $start, $end, $witness ) = @_;
e2902068 1900
930ff666 1901 $witness = $self->baselabel unless $witness;
e2902068 1902 my @readings = ( $start );
1903 my %seen;
1904 my $n = $start;
3a2ebbf4 1905 while( $n && $n->id ne $end->id ) {
1906 if( exists( $seen{$n->id} ) ) {
63778331 1907 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1908 }
3a2ebbf4 1909 $seen{$n->id} = 1;
910a0a6d 1910
861c3e27 1911 my $next = $self->next_reading( $n, $witness );
44771cf2 1912 unless( $next ) {
63778331 1913 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1914 }
910a0a6d 1915 push( @readings, $next );
1916 $n = $next;
e2902068 1917 }
1918 # Check that the last reading is our end reading.
1919 my $last = $readings[$#readings];
63778331 1920 throw( "Last reading found from " . $start->text .
1921 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1922 unless $last->id eq $end->id;
e2902068 1923
1924 return @readings;
1925}
1926
522c1314 1927=head2 readings_at_rank( $rank )
1928
1929Returns a list of readings at a given rank, taken from the alignment table.
1930
1931=cut
1932
1933sub readings_at_rank {
8a9a8200 1934 my( $self, $rank, $nolacuna ) = @_;
522c1314 1935 my $table = $self->alignment_table;
1936 # Table rank is real rank - 1.
1937 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
1938 my %readings;
1939 foreach my $e ( @elements ) {
1940 next unless ref( $e ) eq 'HASH';
1941 next unless exists $e->{'t'};
8a9a8200 1942 my $rdg = $e->{'t'};
1943 next if $nolacuna && $rdg->is_lacuna && $rdg->rank ne $rank;
522c1314 1944 $readings{$e->{'t'}->id} = $e->{'t'};
1945 }
1946 return values %readings;
1947}
1948
4e5a7b2c 1949=head2 next_reading( $reading, $sigil );
8e1394aa 1950
4a8828f0 1951Returns the reading that follows the given reading along the given witness
930ff666 1952path.
8e1394aa 1953
1954=cut
1955
4a8828f0 1956sub next_reading {
e2902068 1957 # Return the successor via the corresponding path.
8e1394aa 1958 my $self = shift;
3a2ebbf4 1959 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1960 return undef unless $answer;
3a2ebbf4 1961 return $self->reading( $answer );
8e1394aa 1962}
1963
4e5a7b2c 1964=head2 prior_reading( $reading, $sigil )
8e1394aa 1965
4a8828f0 1966Returns the reading that precedes the given reading along the given witness
930ff666 1967path.
8e1394aa 1968
1969=cut
1970
4a8828f0 1971sub prior_reading {
e2902068 1972 # Return the predecessor via the corresponding path.
8e1394aa 1973 my $self = shift;
3a2ebbf4 1974 my $answer = $self->_find_linked_reading( 'prior', @_ );
1975 return $self->reading( $answer );
8e1394aa 1976}
1977
4a8828f0 1978sub _find_linked_reading {
861c3e27 1979 my( $self, $direction, $node, $path ) = @_;
1980
1981 # Get a backup if we are dealing with a layered witness
1982 my $alt_path;
1983 my $aclabel = $self->ac_label;
1984 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1985 $alt_path = $1;
1986 }
1987
e2902068 1988 my @linked_paths = $direction eq 'next'
3a2ebbf4 1989 ? $self->sequence->edges_from( $node )
1990 : $self->sequence->edges_to( $node );
e2902068 1991 return undef unless scalar( @linked_paths );
8e1394aa 1992
e2902068 1993 # We have to find the linked path that contains all of the
1994 # witnesses supplied in $path.
1995 my( @path_wits, @alt_path_wits );
4e5a7b2c 1996 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1997 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1998 my $base_le;
1999 my $alt_le;
2000 foreach my $le ( @linked_paths ) {
3a2ebbf4 2001 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 2002 $base_le = $le;
910a0a6d 2003 }
508fd430 2004 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 2005 if( _is_within( \@path_wits, \@le_wits ) ) {
2006 # This is the right path.
2007 return $direction eq 'next' ? $le->[1] : $le->[0];
2008 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
2009 $alt_le = $le;
2010 }
8e1394aa 2011 }
e2902068 2012 # Got this far? Return the alternate path if it exists.
3a2ebbf4 2013 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 2014 if $alt_le;
e2902068 2015
2016 # Got this far? Return the base path if it exists.
3a2ebbf4 2017 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 2018 if $base_le;
e2902068 2019
2020 # Got this far? We have no appropriate path.
2c669bca 2021 warn "Could not find $direction node from " . $node->id
910a0a6d 2022 . " along path $path";
8e1394aa 2023 return undef;
2024}
2025
4a8828f0 2026# Some set logic.
2027sub _is_within {
2028 my( $set1, $set2 ) = @_;
7854e12e 2029 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 2030 foreach my $el ( @$set1 ) {
910a0a6d 2031 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 2032 }
2033 return $ret;
2034}
2035
4e5a7b2c 2036# Return the string that joins together a list of witnesses for
2037# display on a single path.
2038sub _witnesses_of_label {
2039 my( $self, $label ) = @_;
2040 my $regex = $self->wit_list_separator;
2041 my @answer = split( /\Q$regex\E/, $label );
2042 return @answer;
b0b4421a 2043}
2044
d4b75f44 2045=head2 common_readings
2046
2047Returns the list of common readings in the graph (i.e. those readings that are
2048shared by all non-lacunose witnesses.)
2049
2050=cut
2051
2052sub common_readings {
2053 my $self = shift;
2054 my @common = grep { $_->is_common } $self->readings;
2055 return @common;
2056}
2057
fae52efd 2058=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 2059
2060Returns the text of a witness (plus its backup, if we are using a layer)
2061as stored in the collation. The text is returned as a string, where the
2062individual readings are joined with spaces and the meta-readings (e.g.
2063lacunae) are omitted. Optional specification of $start and $end allows
2064the generation of a subset of the witness text.
4e5a7b2c 2065
b0b4421a 2066=cut
2067
2068sub path_text {
861c3e27 2069 my( $self, $wit, $start, $end ) = @_;
b0b4421a 2070 $start = $self->start unless $start;
2071 $end = $self->end unless $end;
861c3e27 2072 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 2073 my $pathtext = '';
2074 my $last;
2075 foreach my $r ( @path ) {
6ad2ce78 2076 unless ( $r->join_prior || !$last || $last->join_next ) {
2077 $pathtext .= ' ';
2078 }
2079 $pathtext .= $r->text;
629e27b0 2080 $last = $r;
2081 }
2082 return $pathtext;
b0b4421a 2083}
4e5a7b2c 2084
2085=head1 INITIALIZATION METHODS
2086
2087These are mostly for use by parsers.
2088
2089=head2 make_witness_path( $witness )
2090
2091Link the array of readings contained in $witness->path (and in
2092$witness->uncorrected_path if it exists) into collation paths.
2093Clear out the arrays when finished.
de51424a 2094
4e5a7b2c 2095=head2 make_witness_paths
2096
2097Call make_witness_path for all witnesses in the tradition.
2098
2099=cut
930ff666 2100
7e450e44 2101# For use when a collation is constructed from a base text and an apparatus.
2102# We have the sequences of readings and just need to add path edges.
1f7aa795 2103# When we are done, clear out the witness path attributes, as they are no
2104# longer needed.
2105# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 2106
6a222840 2107sub make_witness_paths {
2108 my( $self ) = @_;
910a0a6d 2109 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 2110 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 2111 $self->make_witness_path( $wit );
7854e12e 2112 }
7854e12e 2113}
2114
6a222840 2115sub make_witness_path {
7854e12e 2116 my( $self, $wit ) = @_;
2117 my @chain = @{$wit->path};
15d2d3df 2118 my $sig = $wit->sigil;
fae52efd 2119 # Add start and end if necessary
2120 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2121 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 2122 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 2123 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 2124 }
1f7aa795 2125 if( $wit->is_layered ) {
d9e873d0 2126 @chain = @{$wit->uncorrected_path};
fae52efd 2127 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
2128 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 2129 foreach my $idx( 0 .. $#chain-1 ) {
2130 my $source = $chain[$idx];
2131 my $target = $chain[$idx+1];
2132 $self->add_path( $source, $target, $sig.$self->ac_label )
2133 unless $self->has_path( $source, $target, $sig );
2134 }
15d2d3df 2135 }
1f7aa795 2136 $wit->clear_path;
2137 $wit->clear_uncorrected_path;
e2902068 2138}
2139
4e5a7b2c 2140=head2 calculate_ranks
2141
2142Calculate the reading ranks (that is, their aligned positions relative
2143to each other) for the graph. This can only be called on linear collations.
2144
b365fbae 2145=begin testing
2146
2147use Text::Tradition;
2148
2149my $cxfile = 't/data/Collatex-16.xml';
2150my $t = Text::Tradition->new(
2151 'name' => 'inline',
2152 'input' => 'CollateX',
2153 'file' => $cxfile,
2154 );
2155my $c = $t->collation;
2156
2157# Make an svg
bfcbcecb 2158my $table = $c->alignment_table;
2159ok( $c->has_cached_table, "Alignment table was cached" );
2160is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 2161$c->calculate_ranks;
bfcbcecb 2162is( $c->alignment_table, $table, "Cached table retained with no rank change" );
864ee4bf 2163$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
2164is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
2165$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
2166isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
b365fbae 2167
2168=end testing
2169
4e5a7b2c 2170=cut
2171
910a0a6d 2172sub calculate_ranks {
2173 my $self = shift;
b365fbae 2174 # Save the existing ranks, in case we need to invalidate the cached SVG.
8d9494a8 2175 throw( "Cannot calculate ranks on a non-linear graph" )
2176 unless $self->linear;
b365fbae 2177 my %existing_ranks;
ac4d7ac2 2178 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 2179
2180 # Do the rankings based on the relationship equivalence graph, starting
2181 # with the start node.
56772e8c 2182 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
2183
910a0a6d 2184 # Transfer our rankings from the topological graph to the real one.
2185 foreach my $r ( $self->readings ) {
cecbe56d 2186 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 2187 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 2188 } else {
63778331 2189 # Die. Find the last rank we calculated.
359944f7 2190 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
2191 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 2192 $self->readings;
2193 my $last = pop @all_defined;
2194 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 2195 }
de51424a 2196 }
bfcbcecb 2197 # Do we need to invalidate the cached data?
be3af600 2198 if( $self->has_cached_table ) {
b365fbae 2199 foreach my $r ( $self->readings ) {
7c293912 2200 next if defined( $existing_ranks{$r} )
2201 && $existing_ranks{$r} == $r->rank;
c1915ab9 2202 # Something has changed, so clear the cache
bfcbcecb 2203 $self->_clear_cache;
c1915ab9 2204 # ...and recalculate the common readings.
2205 $self->calculate_common_readings();
b365fbae 2206 last;
2207 }
2208 }
c1915ab9 2209 # The graph calculation information is now up to date.
2210 $self->_graphcalc_done(1);
8e1394aa 2211}
3a1f2523 2212
c1915ab9 2213sub _clear_cache {
2214 my $self = shift;
c1915ab9 2215 $self->wipe_table if $self->has_cached_table;
2216}
2217
2218
4e5a7b2c 2219=head2 flatten_ranks
2220
2221A convenience method for parsing collation data. Searches the graph for readings
2222with the same text at the same rank, and merges any that are found.
2223
2224=cut
2225
0e476982 2226sub flatten_ranks {
4ef65ab4 2227 my ( $self, %args ) = shift;
0e476982 2228 my %unique_rank_rdg;
bf6e338d 2229 my $changed;
4ef65ab4 2230 foreach my $p ( $self->identical_readings( %args ) ) {
2231 # say STDERR "Combining readings at same rank: @$p";
2232 $changed = 1;
2233 $self->merge_readings( @$p );
2234 # TODO see if this now makes a common point.
7a0956c1 2235 }
2236 # If we merged readings, the ranks are still fine but the alignment
2237 # table is wrong. Wipe it.
2238 $self->wipe_table() if $changed;
2239}
2240
2241=head2 identical_readings
2242=head2 identical_readings( start => $startnode, end => $endnode )
2243=head2 identical_readings( startrank => $startrank, endrank => $endrank )
2244
2245Goes through the graph identifying all pairs of readings that appear to be
2246identical, and therefore able to be merged into a single reading. Returns the
2247relevant identical pairs. Can be restricted to run over only a part of the
2248graph, specified either by node or by rank.
2249
2250=cut
2251
2252sub identical_readings {
2253 my ( $self, %args ) = @_;
2254 # Find where we should start and end.
2255 my $startrank = $args{startrank} || 0;
2256 if( $args{start} ) {
2257 throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
2258 && $self->reading( $args{start} )->has_rank;
2259 $startrank = $self->reading( $args{start} )->rank;
2260 }
2261 my $endrank = $args{endrank} || $self->end->rank;
2262 if( $args{end} ) {
2263 throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
2264 && $self->reading( $args{end} )->has_rank;
3c234eb6 2265 $endrank = $self->reading( $args{end} )->rank;
7a0956c1 2266 }
2267
2268 # Make sure the ranks are correct.
2269 unless( $self->_graphcalc_done ) {
2270 $self->calculate_ranks;
2271 }
2272 # Go through the readings looking for duplicates.
2273 my %unique_rank_rdg;
2274 my @pairs;
0e476982 2275 foreach my $rdg ( $self->readings ) {
2276 next unless $rdg->has_rank;
7a0956c1 2277 my $rk = $rdg->rank;
2278 next if $rk > $endrank || $rk < $startrank;
2279 my $key = $rk . "||" . $rdg->text;
0e476982 2280 if( exists $unique_rank_rdg{$key} ) {
07e6765f 2281 # Make sure they don't have different grammatical forms
2282 my $ur = $unique_rank_rdg{$key};
a445ce40 2283 if( $rdg->is_identical( $ur ) ) {
7a0956c1 2284 push( @pairs, [ $ur, $rdg ] );
07e6765f 2285 }
0e476982 2286 } else {
2287 $unique_rank_rdg{$key} = $rdg;
2288 }
7a0956c1 2289 }
2290
2291 return @pairs;
0e476982 2292}
4633f9e4 2293
2294
d4b75f44 2295=head2 calculate_common_readings
2296
2297Goes through the graph identifying the readings that appear in every witness
2298(apart from those with lacunae at that spot.) Marks them as common and returns
2299the list.
2300
2301=begin testing
2302
2303use Text::Tradition;
2304
2305my $cxfile = 't/data/Collatex-16.xml';
2306my $t = Text::Tradition->new(
2307 'name' => 'inline',
2308 'input' => 'CollateX',
2309 'file' => $cxfile,
2310 );
2311my $c = $t->collation;
2312
2313my @common = $c->calculate_common_readings();
2314is( scalar @common, 8, "Found correct number of common readings" );
2315my @marked = sort $c->common_readings();
2316is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 2317my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 2318is_deeply( \@marked, \@expected, "Found correct list of common readings" );
2319
2320=end testing
2321
2322=cut
2323
2324sub calculate_common_readings {
2325 my $self = shift;
2326 my @common;
c1915ab9 2327 map { $_->is_common( 0 ) } $self->readings;
2328 # Implicitly calls calculate_ranks
1dd07bda 2329 my $table = $self->alignment_table;
d4b75f44 2330 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 2331 my @row = map { $_->{'tokens'}->[$idx]
2332 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
2333 @{$table->{'alignment'}};
d4b75f44 2334 my %hash;
2335 foreach my $r ( @row ) {
2336 if( $r ) {
2337 $hash{$r->id} = $r unless $r->is_meta;
2338 } else {
2339 $hash{'UNDEF'} = $r;
2340 }
2341 }
2342 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
2343 my( $r ) = values %hash;
2344 $r->is_common( 1 );
2345 push( @common, $r );
2346 }
2347 }
2348 return @common;
2349}
2350
861c3e27 2351=head2 text_from_paths
2352
2353Calculate the text array for all witnesses from the path, for later consistency
2354checking. Only to be used if there is no non-graph-based way to know the
2355original texts.
2356
2357=cut
2358
2359sub text_from_paths {
2360 my $self = shift;
2361 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 2362 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
2363 my @text;
2364 foreach my $r ( @readings ) {
2365 next if $r->is_meta;
2366 push( @text, $r->text );
2367 }
861c3e27 2368 $wit->text( \@text );
2369 if( $wit->is_layered ) {
5164a6f0 2370 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
2371 $wit->sigil.$self->ac_label );
2372 my @uctext;
2373 foreach my $r ( @ucrdgs ) {
2374 next if $r->is_meta;
2375 push( @uctext, $r->text );
2376 }
2377 $wit->layertext( \@uctext );
861c3e27 2378 }
2379 }
2380}
0e476982 2381
4e5a7b2c 2382=head1 UTILITY FUNCTIONS
2383
2384=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 2385
4e5a7b2c 2386Find the last reading that occurs in sequence before both the given readings.
414cc046 2387At the very least this should be $self->start.
4e5a7b2c 2388
2389=head2 common_successor( $reading_a, $reading_b )
2390
2391Find the first reading that occurs in sequence after both the given readings.
414cc046 2392At the very least this should be $self->end.
4e5a7b2c 2393
22222af9 2394=begin testing
2395
2396use Text::Tradition;
2397
2398my $cxfile = 't/data/Collatex-16.xml';
2399my $t = Text::Tradition->new(
2400 'name' => 'inline',
2401 'input' => 'CollateX',
2402 'file' => $cxfile,
2403 );
2404my $c = $t->collation;
2405
679f17e1 2406is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 2407 'n20', "Found correct common predecessor" );
679f17e1 2408is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 2409 '__END__', "Found correct common successor" );
22222af9 2410
4e5a7b2c 2411is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 2412 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 2413is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 2414 '__END__', "Found correct common successor for readings on same path" );
22222af9 2415
2416=end testing
2417
2418=cut
2419
2420## Return the closest reading that is a predecessor of both the given readings.
2421sub common_predecessor {
2422 my $self = shift;
4e5a7b2c 2423 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 2424 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 2425}
2426
2427sub common_successor {
2428 my $self = shift;
4e5a7b2c 2429 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 2430 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 2431}
2432
414cc046 2433
2434# TODO think about how to do this without ranks...
027d819c 2435sub _common_in_path {
22222af9 2436 my( $self, $r1, $r2, $dir ) = @_;
414cc046 2437 my $iter = $self->end->rank;
22222af9 2438 my @candidates;
414cc046 2439 my @last_r1 = ( $r1 );
2440 my @last_r2 = ( $r2 );
2441 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 2442 my %all_seen;
6771a1b1 2443 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 2444 while( !@candidates ) {
414cc046 2445 last unless $iter--; # Avoid looping infinitely
2446 # Iterate separately down the graph from r1 and r2
2447 my( @new_lc1, @new_lc2 );
2448 foreach my $lc ( @last_r1 ) {
2449 foreach my $p ( $lc->$dir ) {
2450 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 2451 # say STDERR "Path candidate $p from $lc";
414cc046 2452 push( @candidates, $p );
002e3600 2453 } elsif( !$all_seen{$p->id} ) {
414cc046 2454 $all_seen{$p->id} = 'r1';
2455 push( @new_lc1, $p );
2456 }
2457 }
2458 }
2459 foreach my $lc ( @last_r2 ) {
22222af9 2460 foreach my $p ( $lc->$dir ) {
414cc046 2461 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 2462 # say STDERR "Path candidate $p from $lc";
22222af9 2463 push( @candidates, $p );
002e3600 2464 } elsif( !$all_seen{$p->id} ) {
414cc046 2465 $all_seen{$p->id} = 'r2';
2466 push( @new_lc2, $p );
22222af9 2467 }
2468 }
2469 }
414cc046 2470 @last_r1 = @new_lc1;
2471 @last_r2 = @new_lc2;
22222af9 2472 }
2473 my @answer = sort { $a->rank <=> $b->rank } @candidates;
2474 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
2475}
2476
63778331 2477sub throw {
2478 Text::Tradition::Error->throw(
2479 'ident' => 'Collation error',
2480 'message' => $_[0],
2481 );
2482}
2483
dd3b58b0 2484no Moose;
2485__PACKAGE__->meta->make_immutable;
e867486f 2486
a445ce40 2487=head1 BUGS/TODO
2488
2489=over
2490
2491=item * Rework XML serialization in a more modular way
2492
2493=back
2494
027d819c 2495=head1 LICENSE
e867486f 2496
027d819c 2497This package is free software and is provided "as is" without express
2498or implied warranty. You can redistribute it and/or modify it under
2499the same terms as Perl itself.
e867486f 2500
027d819c 2501=head1 AUTHOR
e867486f 2502
027d819c 2503Tara L Andrews E<lt>aurum@cpan.orgE<gt>