start to add a proper and extensible relationship typology
[scpubgit/stemmatology.git] / base / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
6771a1b1 3use feature 'say';
910a0a6d 4use Encode qw( decode_utf8 );
5use File::Temp;
bfcbcecb 6use File::Which;
c9bf3dbf 7use Graph;
8e1394aa 8use IPC::Run qw( run binary );
82fa4d57 9use Text::CSV;
5c0072ef 10use Text::Tradition::Collation::Data;
b15511bf 11use Text::Tradition::Collation::Reading;
22222af9 12use Text::Tradition::Collation::RelationshipStore;
63778331 13use Text::Tradition::Error;
cc31ebaa 14use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
5c0072ef 15use XML::LibXML;
16use XML::LibXML::XPathContext;
dd3b58b0 17use Moose;
18
5c0072ef 19has _data => (
20 isa => 'Text::Tradition::Collation::Data',
21 is => 'ro',
22 required => 1,
23 handles => [ qw(
24 sequence
25 paths
26 _set_relations
27 relations
28 _set_start
29 _set_end
30 ac_label
31 has_cached_table
32 relationships
33 related_readings
34 get_relationship
35 del_relationship
36 equivalence
37 equivalence_graph
38 readings
39 reading
40 _add_reading
41 del_reading
42 has_reading
43 wit_list_separator
44 baselabel
45 linear
46 wordsep
47 start
48 end
49 cached_table
50 _graphcalc_done
51 has_cached_svg
52 wipe_table
53 )]
54);
dd3b58b0 55
3a2ebbf4 56has 'tradition' => (
57 is => 'ro',
d047cd52 58 isa => 'Text::Tradition',
8cfd99c4 59 writer => '_set_tradition',
8d9a1cd8 60 weak_ref => 1,
d047cd52 61 );
dd3b58b0 62
4e5a7b2c 63=head1 NAME
64
65Text::Tradition::Collation - a software model for a text collation
66
67=head1 SYNOPSIS
68
69 use Text::Tradition;
70 my $t = Text::Tradition->new(
71 'name' => 'this is a text',
72 'input' => 'TEI',
73 'file' => '/path/to/tei_parallel_seg_file.xml' );
74
75 my $c = $t->collation;
76 my @readings = $c->readings;
77 my @paths = $c->paths;
78 my @relationships = $c->relationships;
79
80 my $svg_variant_graph = $t->collation->as_svg();
81
82=head1 DESCRIPTION
83
84Text::Tradition is a library for representation and analysis of collated
85texts, particularly medieval ones. The Collation is the central feature of
86a Tradition, where the text, its sequence of readings, and its relationships
87between readings are actually kept.
88
89=head1 CONSTRUCTOR
90
91=head2 new
92
93The constructor. Takes a hash or hashref of the following arguments:
94
95=over
96
97=item * tradition - The Text::Tradition object to which the collation
98belongs. Required.
99
100=item * linear - Whether the collation should be linear; that is, whether
101transposed readings should be treated as two linked readings rather than one,
102and therefore whether the collation graph is acyclic. Defaults to true.
103
4e5a7b2c 104=item * baselabel - The default label for the path taken by a base text
105(if any). Defaults to 'base text'.
106
107=item * wit_list_separator - The string to join a list of witnesses for
108purposes of making labels in display graphs. Defaults to ', '.
109
110=item * ac_label - The extra label to tack onto a witness sigil when
111representing another layer of path for the given witness - that is, when
112a text has more than one possible reading due to scribal corrections or
113the like. Defaults to ' (a.c.)'.
114
4e483aa5 115=item * wordsep - The string used to separate words in the original text.
116Defaults to ' '.
117
4e5a7b2c 118=back
119
120=head1 ACCESSORS
121
122=head2 tradition
123
124=head2 linear
125
4e5a7b2c 126=head2 wit_list_separator
127
128=head2 baselabel
129
130=head2 ac_label
131
4e483aa5 132=head2 wordsep
133
4e5a7b2c 134Simple accessors for collation attributes.
135
136=head2 start
137
138The meta-reading at the start of every witness path.
139
140=head2 end
141
142The meta-reading at the end of every witness path.
143
144=head2 readings
145
146Returns all Reading objects in the graph.
147
148=head2 reading( $id )
149
150Returns the Reading object corresponding to the given ID.
151
152=head2 add_reading( $reading_args )
153
154Adds a new reading object to the collation.
155See L<Text::Tradition::Collation::Reading> for the available arguments.
156
157=head2 del_reading( $object_or_id )
158
159Removes the given reading from the collation, implicitly removing its
160paths and relationships.
161
4e483aa5 162=head2 merge_readings( $main, $second, $concatenate, $with_str )
163
164Merges the $second reading into the $main one. If $concatenate is true, then
165the merged node will carry the text of both readings, concatenated with either
166$with_str (if specified) or a sensible default (the empty string if the
167appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 168
4e483aa5 169The first two arguments may be either readings or reading IDs.
4e5a7b2c 170
171=head2 has_reading( $id )
172
173Predicate to see whether a given reading ID is in the graph.
174
175=head2 reading_witnesses( $object_or_id )
176
177Returns a list of sigils whose witnesses contain the reading.
178
179=head2 paths
180
181Returns all reading paths within the document - that is, all edges in the
182collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
183
184=head2 add_path( $source, $target, $sigil )
185
186Links the given readings in the collation in sequence, under the given witness
187sigil. The readings may be specified by object or ID.
188
189=head2 del_path( $source, $target, $sigil )
190
191Links the given readings in the collation in sequence, under the given witness
192sigil. The readings may be specified by object or ID.
193
194=head2 has_path( $source, $target );
195
196Returns true if the two readings are linked in sequence in any witness.
197The readings may be specified by object or ID.
198
199=head2 relationships
200
201Returns all Relationship objects in the collation.
202
203=head2 add_relationship( $reading, $other_reading, $options )
204
205Adds a new relationship of the type given in $options between the two readings,
206which may be specified by object or ID. Returns a value of ( $status, @vectors)
207where $status is true on success, and @vectors is a list of relationship edges
208that were ultimately added.
209See L<Text::Tradition::Collation::Relationship> for the available options.
210
211=cut
dd3b58b0 212
5c0072ef 213sub BUILDARGS {
214 my ( $class, @args ) = @_;
215 my %args = @args == 1 ? %{ $args[0] } : @args;
216 # TODO determine these from the Moose::Meta object
217 my @delegate_attrs = qw(sequence relations readings wit_list_separator baselabel
218 linear wordsep start end cached_table _graphcalc_done);
219 my %data_args;
220 for my $attr (@delegate_attrs) {
221 $data_args{$attr} = delete $args{$attr} if exists $args{$attr};
222 }
223 $args{_data} = Text::Tradition::Collation::Data->new(%data_args);
224 return \%args;
225}
226
d047cd52 227sub BUILD {
3a2ebbf4 228 my $self = shift;
22222af9 229 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
e4b73942 230 $self->_set_start( $self->add_reading(
231 { 'collation' => $self, 'is_start' => 1, 'init' => 1 } ) );
232 $self->_set_end( $self->add_reading(
233 { 'collation' => $self, 'is_end' => 1, 'init' => 1 } ) );
d047cd52 234}
784877d9 235
24efa55d 236sub register_relationship_type {
237 my $self = shift;
238 my %args = @_ == 1 ? %{$_[0]} : @_;
239 if( $self->relations->has_type( $args{name} ) ) {
240 throw( 'Relationship type ' . $args{name} . ' already registered' );
241 }
242 $self->relations->add_type( %args );
243}
244
3a2ebbf4 245### Reading construct/destruct functions
246
247sub add_reading {
248 my( $self, $reading ) = @_;
249 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
250 my %args = %$reading;
e4b73942 251 if( $args{'init'} ) {
252 # If we are initializing an empty collation, don't assume that we
253 # have set a tradition.
254 delete $args{'init'};
8943ff68 255 } elsif( $self->tradition->can('language') && $self->tradition->has_language
256 && !exists $args{'language'} ) {
6ad2ce78 257 $args{'language'} = $self->tradition->language;
258 }
3a2ebbf4 259 $reading = Text::Tradition::Collation::Reading->new(
260 'collation' => $self,
261 %args );
262 }
263 # First check to see if a reading with this ID exists.
264 if( $self->reading( $reading->id ) ) {
63778331 265 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 266 }
c1915ab9 267 $self->_graphcalc_done(0);
3a2ebbf4 268 $self->_add_reading( $reading->id => $reading );
269 # Once the reading has been added, put it in both graphs.
270 $self->sequence->add_vertex( $reading->id );
cecbe56d 271 $self->relations->add_reading( $reading->id );
3a2ebbf4 272 return $reading;
eca16057 273};
274
3a2ebbf4 275around del_reading => sub {
276 my $orig = shift;
277 my $self = shift;
278 my $arg = shift;
279
cecbe56d 280 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
281 $arg = $arg->id;
3a2ebbf4 282 }
3a2ebbf4 283 # Remove the reading from the graphs.
c1915ab9 284 $self->_graphcalc_done(0);
4e483aa5 285 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 286 $self->sequence->delete_vertex( $arg );
287 $self->relations->delete_reading( $arg );
3a2ebbf4 288
289 # Carry on.
cecbe56d 290 $self->$orig( $arg );
3a2ebbf4 291};
7854e12e 292
4e483aa5 293=begin testing
294
295use Text::Tradition;
296
297my $cxfile = 't/data/Collatex-16.xml';
298my $t = Text::Tradition->new(
299 'name' => 'inline',
300 'input' => 'CollateX',
301 'file' => $cxfile,
302 );
303my $c = $t->collation;
304
305my $rno = scalar $c->readings;
306# Split n21 for testing purposes
307my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
308my $old_r = $c->reading( 'n21' );
309$old_r->alter_text( 'to' );
310$c->del_path( 'n20', 'n21', 'A' );
311$c->add_path( 'n20', 'n21p0', 'A' );
312$c->add_path( 'n21p0', 'n21', 'A' );
313$c->flatten_ranks();
314ok( $c->reading( 'n21p0' ), "New reading exists" );
315is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
316
679f17e1 317# Combine n3 and n4 ( with his )
4e483aa5 318$c->merge_readings( 'n3', 'n4', 1 );
319ok( !$c->reading('n4'), "Reading n4 is gone" );
320is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
321
679f17e1 322# Collapse n9 and n10 ( rood / root )
323$c->merge_readings( 'n9', 'n10' );
324ok( !$c->reading('n10'), "Reading n10 is gone" );
325is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 326
327# Combine n21 and n21p0
328my $remaining = $c->reading('n21');
329$remaining ||= $c->reading('n22'); # one of these should still exist
330$c->merge_readings( 'n21p0', $remaining, 1 );
331ok( !$c->reading('n21'), "Reading $remaining is gone" );
332is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
333
334=end testing
335
336=cut
7854e12e 337
3a2ebbf4 338sub merge_readings {
339 my $self = shift;
340
56772e8c 341 # Sanity check
342 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
343 my $mergemeta = $kept_obj->is_meta;
344 throw( "Cannot merge meta and non-meta reading" )
345 unless ( $mergemeta && $del_obj->is_meta )
346 || ( !$mergemeta && !$del_obj->is_meta );
347 if( $mergemeta ) {
348 throw( "Cannot merge with start or end node" )
349 if( $kept_obj eq $self->start || $kept_obj eq $self->end
350 || $del_obj eq $self->start || $del_obj eq $self->end );
a445ce40 351 throw( "Cannot combine text of meta readings" ) if $combine;
56772e8c 352 }
3a2ebbf4 353 # We only need the IDs for adding paths to the graph, not the reading
354 # objects themselves.
56772e8c 355 my $kept = $kept_obj->id;
356 my $deleted = $del_obj->id;
c1915ab9 357 $self->_graphcalc_done(0);
10943ab0 358
3a2ebbf4 359 # The kept reading should inherit the paths and the relationships
360 # of the deleted reading.
361 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
362 my @vector = ( $kept );
363 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
364 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 365 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 366 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
367 $self->sequence->add_edge( @vector );
368 my $fwits = $self->sequence->get_edge_attributes( @vector );
369 @wits{keys %$fwits} = values %$fwits;
370 $self->sequence->set_edge_attributes( @vector, \%wits );
371 }
cecbe56d 372 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 373
374 # Do the deletion deed.
4e483aa5 375 if( $combine ) {
869a1ada 376 # Combine the text of the readings
4e483aa5 377 my $joinstr = $combine_char;
378 unless( defined $joinstr ) {
379 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
380 $joinstr = $self->wordsep unless defined $joinstr;
381 }
a445ce40 382 $kept_obj->_combine( $del_obj, $joinstr );
49d4f2ac 383 }
3a2ebbf4 384 $self->del_reading( $deleted );
385}
7854e12e 386
6771a1b1 387=head2 compress_readings
388
389Where possible in the graph, compresses plain sequences of readings into a
390single reading. The sequences must consist of readings with no
391relationships to other readings, with only a single witness path between
392them and no other witness paths from either that would skip the other. The
393readings must also not be marked as nonsense or bad grammar.
394
395WARNING: This operation cannot be undone.
396
397=cut
398
399sub compress_readings {
400 my $self = shift;
401 # Anywhere in the graph that there is a reading that joins only to a single
402 # successor, and neither of these have any relationships, just join the two
403 # readings.
6771a1b1 404 foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
428bcf0b 405 # Now look for readings that can be joined to their successors.
a445ce40 406 next unless $rdg->is_combinable;
6771a1b1 407 my %seen;
408 while( $self->sequence->successors( $rdg ) == 1 ) {
409 my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
410 throw( "Infinite loop" ) if $seen{$next->id};
411 $seen{$next->id} = 1;
412 last if $self->sequence->predecessors( $next ) > 1;
a445ce40 413 last unless $next->is_combinable;
6771a1b1 414 say "Joining readings $rdg and $next";
415 $self->merge_readings( $rdg, $next, 1 );
416 }
417 }
418 # Make sure we haven't screwed anything up
419 foreach my $wit ( $self->tradition->witnesses ) {
420 my $pathtext = $self->path_text( $wit->sigil );
421 my $origtext = join( ' ', @{$wit->text} );
422 throw( "Text differs for witness " . $wit->sigil )
423 unless $pathtext eq $origtext;
424 if( $wit->is_layered ) {
425 $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
426 $origtext = join( ' ', @{$wit->layertext} );
427 throw( "Ante-corr text differs for witness " . $wit->sigil )
428 unless $pathtext eq $origtext;
429 }
430 }
431
432 $self->relations->rebuild_equivalence();
433 $self->calculate_ranks();
434}
3265b0ce 435
3a2ebbf4 436# Helper function for manipulating the graph.
437sub _stringify_args {
4e483aa5 438 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 439 $first = $first->id
440 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
441 $second = $second->id
442 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 443 return( $first, $second, @args );
3a2ebbf4 444}
df6d9812 445
4e5a7b2c 446# Helper function for manipulating the graph.
447sub _objectify_args {
448 my( $self, $first, $second, $arg ) = @_;
449 $first = $self->reading( $first )
450 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
451 $second = $self->reading( $second )
452 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
453 return( $first, $second, $arg );
454}
3a2ebbf4 455### Path logic
456
457sub add_path {
458 my $self = shift;
459
460 # We only need the IDs for adding paths to the graph, not the reading
461 # objects themselves.
cecbe56d 462 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 463
c1915ab9 464 $self->_graphcalc_done(0);
3a2ebbf4 465 # Connect the readings
359944f7 466 unless( $self->sequence->has_edge( $source, $target ) ) {
467 $self->sequence->add_edge( $source, $target );
468 $self->relations->add_equivalence_edge( $source, $target );
469 }
3a2ebbf4 470 # Note the witness in question
471 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 472}
b15511bf 473
3a2ebbf4 474sub del_path {
475 my $self = shift;
49d4f2ac 476 my @args;
477 if( ref( $_[0] ) eq 'ARRAY' ) {
478 my $e = shift @_;
479 @args = ( @$e, @_ );
480 } else {
481 @args = @_;
482 }
3a2ebbf4 483
484 # We only need the IDs for adding paths to the graph, not the reading
485 # objects themselves.
49d4f2ac 486 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 487
c1915ab9 488 $self->_graphcalc_done(0);
3a2ebbf4 489 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 490 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 491 }
492 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
493 $self->sequence->delete_edge( $source, $target );
359944f7 494 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 495 }
784877d9 496}
497
3a2ebbf4 498
15d2d3df 499# Extra graph-alike utility
500sub has_path {
3a2ebbf4 501 my $self = shift;
502 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
503 return undef unless $self->sequence->has_edge( $source, $target );
504 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 505}
506
4e5a7b2c 507=head2 clear_witness( @sigil_list )
3a2ebbf4 508
4e5a7b2c 509Clear the given witnesses out of the collation entirely, removing references
510to them in paths, and removing readings that belong only to them. Should only
511be called via $tradition->del_witness.
3a2ebbf4 512
513=cut
514
4e5a7b2c 515sub clear_witness {
516 my( $self, @sigils ) = @_;
517
c1915ab9 518 $self->_graphcalc_done(0);
4e5a7b2c 519 # Clear the witness(es) out of the paths
520 foreach my $e ( $self->paths ) {
521 foreach my $sig ( @sigils ) {
522 $self->del_path( $e, $sig );
523 }
524 }
525
526 # Clear out the newly unused readings
527 foreach my $r ( $self->readings ) {
528 unless( $self->reading_witnesses( $r ) ) {
529 $self->del_reading( $r );
530 }
531 }
532}
3a2ebbf4 533
534sub add_relationship {
535 my $self = shift;
22222af9 536 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 537 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 538 $self->_graphcalc_done(0);
63778331 539 return @vectors;
22222af9 540}
ef9d481f 541
ca6e6095 542around qw/ get_relationship del_relationship / => sub {
543 my $orig = shift;
544 my $self = shift;
545 my @args = @_;
546 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
547 @args = @{$_[0]};
548 }
549 my( $source, $target ) = $self->_stringify_args( @args );
550 $self->$orig( $source, $target );
551};
552
22222af9 553=head2 reading_witnesses( $reading )
910a0a6d 554
22222af9 555Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 556
22222af9 557=cut
1d310495 558
1d310495 559sub reading_witnesses {
560 my( $self, $reading ) = @_;
561 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 562 # arbitrarily chosen "incoming". Thus, special-case the start node.
563 if( $reading eq $self->start ) {
bed6ce83 564 return map { $_->sigil } grep { $_->is_collated } $self->tradition->witnesses;
96dc90ec 565 }
1d310495 566 my %all_witnesses;
567 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
568 my $wits = $self->sequence->get_edge_attributes( @$e );
569 @all_witnesses{ keys %$wits } = 1;
570 }
c12bb878 571 my $acstr = $self->ac_label;
572 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
573 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
574 }
1d310495 575 return keys %all_witnesses;
910a0a6d 576}
577
4e5a7b2c 578=head1 OUTPUT METHODS
8e1394aa 579
0ecb975c 580=head2 as_svg( \%options )
8e1394aa 581
0068967c 582Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 583See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 584
585=cut
586
587sub as_svg {
0ecb975c 588 my( $self, $opts ) = @_;
bfcbcecb 589 throw( "Need GraphViz installed to output SVG" )
590 unless File::Which::which( 'dot' );
e247aad1 591 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 592 $self->calculate_ranks()
593 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
be3af600 594 my @cmd = qw/dot -Tsvg/;
595 my( $svg, $err );
596 my $dotfile = File::Temp->new();
597 ## USE FOR DEBUGGING
598 # $dotfile->unlink_on_destroy(0);
599 binmode $dotfile, ':utf8';
600 print $dotfile $self->as_dot( $opts );
601 push( @cmd, $dotfile->filename );
602 run( \@cmd, ">", binary(), \$svg );
603 $svg = decode_utf8( $svg );
604 return $svg;
8e1394aa 605}
606
b22576c6 607
0ecb975c 608=head2 as_dot( \%options )
b22576c6 609
0ecb975c 610Returns a string that is the collation graph expressed in dot
611(i.e. GraphViz) format. Options include:
b22576c6 612
0ecb975c 613=over 4
b22576c6 614
0ecb975c 615=item * from
b22576c6 616
0ecb975c 617=item * to
df6d9812 618
0ecb975c 619=item * color_common
620
621=back
df6d9812 622
623=cut
624
625sub as_dot {
0ecb975c 626 my( $self, $opts ) = @_;
627 my $startrank = $opts->{'from'} if $opts;
628 my $endrank = $opts->{'to'} if $opts;
629 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 630 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
631 && $self->end->rank > 100;
6648ee3d 632 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 633
b22576c6 634 # Check the arguments
635 if( $startrank ) {
636 return if $endrank && $startrank > $endrank;
637 return if $startrank > $self->end->rank;
638 }
639 if( defined $endrank ) {
640 return if $endrank < 0;
f1b3b33a 641 $endrank = undef if $endrank == $self->end->rank;
b22576c6 642 }
643
67da8d6c 644 my $graph_name = $self->tradition->name;
645 $graph_name =~ s/[^\w\s]//g;
646 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 647
648 my %graph_attrs = (
649 'rankdir' => 'LR',
650 'bgcolor' => 'none',
651 );
652 my %node_attrs = (
b8990398 653 'fontsize' => 14,
f13b5582 654 'fillcolor' => 'white',
655 'style' => 'filled',
656 'shape' => 'ellipse'
657 );
658 my %edge_attrs = (
659 'arrowhead' => 'open',
660 'color' => '#000000',
661 'fontcolor' => '#000000',
662 );
663
67da8d6c 664 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 665 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
666 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 667
b22576c6 668 # Output substitute start/end readings if necessary
669 if( $startrank ) {
43c94341 670 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 671 }
672 if( $endrank ) {
43c94341 673 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 674 }
b365fbae 675 if( $STRAIGHTENHACK ) {
676 ## HACK part 1
43c94341 677 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
678 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 679 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
680 }
b22576c6 681 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 682 # Sort the readings by rank if we have ranks; this speeds layout.
683 my @all_readings = $self->end->has_rank
684 ? sort { $a->rank <=> $b->rank } $self->readings
685 : $self->readings;
4633f9e4 686 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 687 foreach my $reading ( @all_readings ) {
b22576c6 688 # Only output readings within our rank range.
689 next if $startrank && $reading->rank < $startrank;
690 next if $endrank && $reading->rank > $endrank;
691 $used{$reading->id} = 1;
910a0a6d 692 # Need not output nodes without separate labels
3a2ebbf4 693 next if $reading->id eq $reading->text;
d4b75f44 694 my $rattrs;
30f0df34 695 my $label = $reading->text;
629e27b0 696 $label .= '-' if $reading->join_next;
697 $label = "-$label" if $reading->join_prior;
8f9cab7b 698 $label =~ s/\"/\\\"/g;
d4b75f44 699 $rattrs->{'label'} = $label;
10e4b1ac 700 $rattrs->{'id'} = $reading->id;
0ecb975c 701 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 702 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 703 }
3a2ebbf4 704
30ddc24c 705 # Add the real edges. Need to weight one edge per rank jump, in a
706 # continuous line.
b365fbae 707 # my $weighted = $self->_add_edge_weights;
b22576c6 708 my @edges = $self->paths;
3bdec618 709 my( %substart, %subend );
b22576c6 710 foreach my $edge ( @edges ) {
711 # Do we need to output this edge?
508fd430 712 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 713 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 714 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 715
b22576c6 716 # Account for the rank gap if necessary
30ddc24c 717 my $rank0 = $self->reading( $edge->[0] )->rank
718 if $self->reading( $edge->[0] )->has_rank;
719 my $rank1 = $self->reading( $edge->[1] )->rank
720 if $self->reading( $edge->[1] )->has_rank;
721 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
722 $variables->{'minlen'} = $rank1 - $rank0;
723 }
724
725 # Add the calculated edge weights
b365fbae 726 # if( exists $weighted->{$edge->[0]}
e247aad1 727 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
728 # # $variables->{'color'} = 'red';
729 # $variables->{'weight'} = 3.0;
730 # }
30ddc24c 731
508fd430 732 # EXPERIMENTAL: make edge width reflect no. of witnesses
733 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
734 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
735
f13b5582 736 my $varopts = _dot_attr_string( $variables );
737 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
738 $edge->[0], $edge->[1], $varopts );
3bdec618 739 } elsif( $used{$edge->[0]} ) {
96ba0418 740 $subend{$edge->[0]} = $edge->[1];
3bdec618 741 } elsif( $used{$edge->[1]} ) {
96ba0418 742 $substart{$edge->[1]} = $edge->[0];
b22576c6 743 }
df6d9812 744 }
bed6ce83 745
746 # If we are asked to, add relationship links
747 if( exists $opts->{show_relations} ) {
748 my $filter = $opts->{show_relations}; # can be 'transposition' or 'all'
749 if( $filter eq 'transposition' ) {
750 $filter =~ qr/^transposition$/;
751 }
752 foreach my $redge ( $self->relationships ) {
753 if( $used{$redge->[0]} && $used{$redge->[1]} ) {
754 if( $filter ne 'all' ) {
755 my $rel = $self->get_relationship( $redge );
756 next unless $rel->type =~ /$filter/;
757 my $variables = {
758 arrowhead => 'none',
759 color => '#FFA14F',
760 constraint => 'false',
761 label => uc( substr( $rel->type, 0, 4 ) ),
762 penwidth => '3',
763 };
764 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
765 $redge->[0], $redge->[1], _dot_attr_string( $variables ) );
766 }
767 }
768 }
769 }
770
3bdec618 771 # Add substitute start and end edges if necessary
772 foreach my $node ( keys %substart ) {
96ba0418 773 my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
f13b5582 774 my $variables = { %edge_attrs, 'label' => $witstr };
96ba0418 775 my $nrdg = $self->reading( $node );
776 if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
777 # Substart is actually one lower than $startrank
778 $variables->{'minlen'} = $nrdg->rank - ( $startrank - 1 );
779 }
f13b5582 780 my $varopts = _dot_attr_string( $variables );
96ba0418 781 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
3bdec618 782 }
783 foreach my $node ( keys %subend ) {
96ba0418 784 my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
f13b5582 785 my $variables = { %edge_attrs, 'label' => $witstr };
786 my $varopts = _dot_attr_string( $variables );
96ba0418 787 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
3bdec618 788 }
b365fbae 789 # HACK part 2
790 if( $STRAIGHTENHACK ) {
43c94341 791 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
792 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 793 }
30ddc24c 794
df6d9812 795 $dot .= "}\n";
796 return $dot;
797}
798
f13b5582 799sub _dot_attr_string {
800 my( $hash ) = @_;
801 my @attrs;
802 foreach my $k ( sort keys %$hash ) {
803 my $v = $hash->{$k};
804 push( @attrs, $k.'="'.$v.'"' );
805 }
806 return( '[ ' . join( ', ', @attrs ) . ' ]' );
807}
808
30ddc24c 809sub _add_edge_weights {
810 my $self = shift;
811 # Walk the graph from START to END, choosing the successor node with
812 # the largest number of witness paths each time.
813 my $weighted = {};
814 my $curr = $self->start->id;
008fc8a6 815 my $ranked = $self->end->has_rank;
30ddc24c 816 while( $curr ne $self->end->id ) {
008fc8a6 817 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 818 my @succ = sort { $self->path_witnesses( $curr, $a )
819 <=> $self->path_witnesses( $curr, $b ) }
820 $self->sequence->successors( $curr );
821 my $next = pop @succ;
008fc8a6 822 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 823 # Try to avoid lacunae in the weighted path.
008fc8a6 824 while( @succ &&
825 ( $self->reading( $next )->is_lacuna ||
826 $nextrank - $rank > 1 ) ){
30ddc24c 827 $next = pop @succ;
828 }
829 $weighted->{$curr} = $next;
830 $curr = $next;
831 }
832 return $weighted;
833}
834
027d819c 835=head2 path_witnesses( $edge )
836
837Returns the list of sigils whose witnesses are associated with the given edge.
838The edge can be passed as either an array or an arrayref of ( $source, $target ).
839
840=cut
841
3a2ebbf4 842sub path_witnesses {
843 my( $self, @edge ) = @_;
844 # If edge is an arrayref, cope.
845 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
846 my $e = shift @edge;
847 @edge = @$e;
848 }
849 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 850 return @wits;
3a2ebbf4 851}
852
7f9f05e8 853# Helper function. Make a display label for the given witnesses, showing a.c.
854# witnesses only where the main witness is not also in the list.
027d819c 855sub _path_display_label {
508fd430 856 my $self = shift;
7f9f05e8 857 my %wits;
858 map { $wits{$_} = 1 } @_;
859
860 # If an a.c. wit is listed, remove it if the main wit is also listed.
861 # Otherwise keep it for explicit listing.
862 my $aclabel = $self->ac_label;
863 my @disp_ac;
864 foreach my $w ( sort keys %wits ) {
865 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
866 if( exists $wits{$1} ) {
867 delete $wits{$w};
868 } else {
869 push( @disp_ac, $w );
870 }
871 }
872 }
873
874 # See if we are in a majority situation.
8f9cab7b 875 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 876 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 877 if( scalar keys %wits > $maj ) {
878 unshift( @disp_ac, 'majority' );
879 return join( ', ', @disp_ac );
8f9cab7b 880 } else {
7f9f05e8 881 return join( ', ', sort keys %wits );
8f9cab7b 882 }
883}
1dd07bda 884
bf6e338d 885=head2 readings_at_rank( $rank )
1dd07bda 886
bf6e338d 887Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 888
889=cut
890
bf6e338d 891sub readings_at_rank {
1dd07bda 892 my( $self, $rank ) = @_;
bf6e338d 893 my $table = $self->alignment_table;
894 # Table rank is real rank - 1.
895 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
896 my %readings;
897 foreach my $e ( @elements ) {
898 next unless ref( $e ) eq 'HASH';
899 next unless exists $e->{'t'};
900 $readings{$e->{'t'}->id} = $e->{'t'};
901 }
902 return values %readings;
1dd07bda 903}
8f9cab7b 904
4e5a7b2c 905=head2 as_graphml
8e1394aa 906
4e5a7b2c 907Returns a GraphML representation of the collation. The GraphML will contain
908two graphs. The first expresses the attributes of the readings and the witness
909paths that link them; the second expresses the relationships that link the
910readings. This is the native transfer format for a tradition.
8e1394aa 911
56eefa04 912=begin testing
913
914use Text::Tradition;
951ddfe8 915use TryCatch;
56eefa04 916
917my $READINGS = 311;
918my $PATHS = 361;
919
920my $datafile = 't/data/florilegium_tei_ps.xml';
921my $tradition = Text::Tradition->new( 'input' => 'TEI',
922 'name' => 'test0',
923 'file' => $datafile,
924 'linear' => 1 );
925
926ok( $tradition, "Got a tradition object" );
927is( scalar $tradition->witnesses, 13, "Found all witnesses" );
928ok( $tradition->collation, "Tradition has a collation" );
929
930my $c = $tradition->collation;
931is( scalar $c->readings, $READINGS, "Collation has all readings" );
932is( scalar $c->paths, $PATHS, "Collation has all paths" );
933is( scalar $c->relationships, 0, "Collation has all relationships" );
934
935# Add a few relationships
936$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
937$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
938$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
939
940# Now write it to GraphML and parse it again.
941
942my $graphml = $c->as_graphml;
943my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
944is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
945is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
946is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
947
9fef629b 948# Now add a stemma, write to GraphML, and look at the output.
951ddfe8 949SKIP: {
37bf09f4 950 skip "Analysis module not present", 3 unless $tradition->can( 'add_stemma' );
951ddfe8 951 my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
952 is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
953 is( $tradition->stemmata, 1, "Tradition now has the stemma" );
954 $graphml = $c->as_graphml;
955 like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
956}
2a812726 957
56eefa04 958=end testing
959
8e1394aa 960=cut
961
a445ce40 962## TODO MOVE this to Tradition.pm and modularize it better
8e1394aa 963sub as_graphml {
a30ca502 964 my( $self, $options ) = @_;
3d14b48e 965 $self->calculate_ranks unless $self->_graphcalc_done;
966
a30ca502 967 my $start = $options->{'from'}
968 ? $self->reading( $options->{'from'} ) : $self->start;
969 my $end = $options->{'to'}
970 ? $self->reading( $options->{'to'} ) : $self->end;
971 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
972 throw( 'Start node must be before end node' );
973 }
974 # The readings need to be ranked for this to work.
975 $start = $self->start unless $start->has_rank;
976 $end = $self->end unless $end->has_rank;
414cc046 977 my $rankoffset = 0;
978 unless( $start eq $self->start ) {
979 $rankoffset = $start->rank - 1;
980 }
a30ca502 981 my %use_readings;
982
8e1394aa 983 # Some namespaces
984 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
985 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
986 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 987 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 988
989 # Create the document and root node
428bcf0b 990 require XML::LibXML;
8e1394aa 991 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
992 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
993 $graphml->setDocumentElement( $root );
994 $root->setNamespace( $xsi_ns, 'xsi', 0 );
995 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 996
997 # List of attribute types to save on our objects and their corresponding
998 # GraphML types
999 my %save_types = (
1000 'Str' => 'string',
1001 'Int' => 'int',
1002 'Bool' => 'boolean',
10e4b1ac 1003 'ReadingID' => 'string',
bbd064a9 1004 'RelationshipType' => 'string',
1005 'RelationshipScope' => 'string',
1006 );
1007
bbd064a9 1008 # Add the data keys for the graph. Include an extra key 'version' for the
1009 # GraphML output version.
e309421a 1010 my %graph_data_keys;
1011 my $gdi = 0;
bbd064a9 1012 my %graph_attributes = ( 'version' => 'string' );
1013 # Graph attributes include those of Tradition and those of Collation.
1014 my %gattr_from;
1015 my $tmeta = $self->tradition->meta;
1016 my $cmeta = $self->meta;
1017 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
1018 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
1019 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
1020 next if $attr->name =~ /^_/;
bbd064a9 1021 next unless $save_types{$attr->type_constraint->name};
1022 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1023 }
9fef629b 1024 # Extra custom keys for complex objects that should be saved in some form.
1025 # The subroutine should return a string, or undef/empty.
951ddfe8 1026 if( $tmeta->has_method('stemmata') ) {
1027 $graph_attributes{'stemmata'} = sub {
1028 my @stemstrs;
1029 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1030 $self->tradition->stemmata;
1031 join( "\n", @stemstrs );
1032 };
1033 }
1034
8943ff68 1035 if( $tmeta->has_method('user') ) {
1036 $graph_attributes{'user'} = sub {
1037 $self->tradition->user ? $self->tradition->user->id : undef
1038 };
1039 }
bbd064a9 1040
1041 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 1042 $graph_data_keys{$datum} = 'dg'.$gdi++;
1043 my $key = $root->addNewChild( $graphml_ns, 'key' );
9fef629b 1044 my $dtype = ref( $graph_attributes{$datum} ) ? 'string'
1045 : $graph_attributes{$datum};
e309421a 1046 $key->setAttribute( 'attr.name', $datum );
9fef629b 1047 $key->setAttribute( 'attr.type', $dtype );
e309421a 1048 $key->setAttribute( 'for', 'graph' );
1049 $key->setAttribute( 'id', $graph_data_keys{$datum} );
1050 }
f6066bac 1051
bbd064a9 1052 # Add the data keys for reading nodes
1053 my %reading_attributes;
1054 my $rmeta = Text::Tradition::Collation::Reading->meta;
1055 foreach my $attr( $rmeta->get_all_attributes ) {
1056 next if $attr->name =~ /^_/;
bbd064a9 1057 next unless $save_types{$attr->type_constraint->name};
1058 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1059 }
a445ce40 1060 if( $self->start->does('Text::Tradition::Morphology' ) ) {
1061 # Extra custom key for the reading morphology
1062 $reading_attributes{'lexemes'} = 'string';
1063 }
7cd9f181 1064
ef9d481f 1065 my %node_data_keys;
1066 my $ndi = 0;
bbd064a9 1067 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1068 $node_data_keys{$datum} = 'dn'.$ndi++;
1069 my $key = $root->addNewChild( $graphml_ns, 'key' );
1070 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1071 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1072 $key->setAttribute( 'for', 'node' );
1073 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1074 }
1075
bbd064a9 1076 # Add the data keys for edges, that is, paths and relationships. Path
1077 # data does not come from a Moose class so is here manually.
ef9d481f 1078 my $edi = 0;
1079 my %edge_data_keys;
bbd064a9 1080 my %edge_attributes = (
3a2ebbf4 1081 witness => 'string', # ID/label for a path
3a2ebbf4 1082 extra => 'boolean', # Path key
3a2ebbf4 1083 );
bbd064a9 1084 my @path_attributes = keys %edge_attributes; # track our manual additions
1085 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1086 foreach my $attr( $pmeta->get_all_attributes ) {
1087 next if $attr->name =~ /^_/;
bbd064a9 1088 next unless $save_types{$attr->type_constraint->name};
1089 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1090 }
1091 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1092 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1093 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1094 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1095 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1096 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1097 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1098 }
3a2ebbf4 1099
cc31ebaa 1100 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1101 my $xmlidname = $self->tradition->name;
1102 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1103 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1104 $xmlidname = '_'.$xmlidname;
1105 }
2c669bca 1106 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1107 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1108 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1109 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1110 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1111 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1112 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1113 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1114
2a812726 1115 # Tradition/collation attribute data
bbd064a9 1116 foreach my $datum ( keys %graph_attributes ) {
1117 my $value;
1118 if( $datum eq 'version' ) {
2a812726 1119 $value = '3.2';
9fef629b 1120 } elsif( ref( $graph_attributes{$datum} ) ) {
1121 my $sub = $graph_attributes{$datum};
1122 $value = &$sub();
bbd064a9 1123 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1124 $value = $self->tradition->$datum;
1125 } else {
1126 $value = $self->$datum;
1127 }
2c669bca 1128 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1129 }
8e1394aa 1130
1131 my $node_ctr = 0;
1132 my %node_hash;
22222af9 1133 # Add our readings to the graph
3a2ebbf4 1134 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1135 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1136 ( $n->rank < $start->rank || $n->rank > $end->rank );
1137 $use_readings{$n->id} = 1;
2c669bca 1138 # Add to the main graph
1139 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1140 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1141 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1142 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1143 foreach my $d ( keys %reading_attributes ) {
255875b8 1144 my $nval = $n->$d;
7cd9f181 1145 # Custom serialization
1146 if( $d eq 'lexemes' ) {
1147 # If nval is a true value, we have lexemes so we need to
1148 # serialize them. Otherwise set nval to undef so that the
1149 # key is excluded from this reading.
1150 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1151 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1152 $nval = undef;
7cd9f181 1153 }
cc31ebaa 1154 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1155 # Adjust the ranks within the subgraph.
cc31ebaa 1156 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1157 : $nval - $rankoffset;
414cc046 1158 }
255875b8 1159 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1160 if defined $nval;
1161 }
b15511bf 1162 }
1163
2c669bca 1164 # Add the path edges to the sequence graph
df6d9812 1165 my $edge_ctr = 0;
3a2ebbf4 1166 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1167 # We add an edge in the graphml for every witness in $e.
a30ca502 1168 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1169 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1170 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1171 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1172 # Skip any path from start to end; that witness is not in the subgraph.
1173 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1174 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1175 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1176 $node_hash{ $e->[0] },
1177 $node_hash{ $e->[1] } );
2c669bca 1178 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1179 $edge_el->setAttribute( 'source', $from );
1180 $edge_el->setAttribute( 'target', $to );
1181 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1182
1183 # It's a witness path, so add the witness
1184 my $base = $wit;
1185 my $key = $edge_data_keys{'witness'};
1186 # Is this an ante-corr witness?
1187 my $aclabel = $self->ac_label;
1188 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1189 # Keep the base witness
1190 $base = $1;
1191 # ...and record that this is an 'extra' reading path
1192 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1193 }
1194 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1195 }
1196 }
1197
cc31ebaa 1198 # Report the actual number of nodes and edges that went in
1199 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1200 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1201
22222af9 1202 # Add the relationship graph to the XML
bbd064a9 1203 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1204 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1205 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1206
94c00c71 1207 # Save and return the thing
1208 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1209 return $result;
df6d9812 1210}
1211
b15511bf 1212sub _add_graphml_data {
1213 my( $el, $key, $value ) = @_;
b15511bf 1214 return unless defined $value;
c9bf3dbf 1215 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1216 $data_el->setAttribute( 'key', $key );
1217 $data_el->appendText( $value );
8e1394aa 1218}
1219
4e5a7b2c 1220=head2 as_csv
910a0a6d 1221
1222Returns a CSV alignment table representation of the collation graph, one
2c669bca 1223row per witness (or witness uncorrected.)
910a0a6d 1224
1225=cut
1226
1227sub as_csv {
3a2ebbf4 1228 my( $self ) = @_;
1dd07bda 1229 my $table = $self->alignment_table;
82fa4d57 1230 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1231 my @result;
2c669bca 1232 # Make the header row
1233 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1234 push( @result, decode_utf8( $csv->string ) );
1235 # Make the rest of the rows
1236 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1237 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1238 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1239 $csv->combine( @row );
910a0a6d 1240 push( @result, decode_utf8( $csv->string ) );
1241 }
3a2ebbf4 1242 return join( "\n", @result );
910a0a6d 1243}
1244
1dd07bda 1245=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1246
566f4595 1247Return a reference to an alignment table, in a slightly enhanced CollateX
1248format which looks like this:
1249
1250 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1251 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1252 { witness => "SIG2",
4e5a7b2c 1253 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1254 ... ],
1255 length => TEXTLEN };
1256
1257If $use_refs is set to 1, the reading object is returned in the table
1258instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1259
1260If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1261keys have a true hash value will be included.
2c669bca 1262
1263=cut
9f3ba6f7 1264
1dd07bda 1265sub alignment_table {
1266 my( $self ) = @_;
c1915ab9 1267 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1268 return $self->cached_table if $self->has_cached_table;
1269
0ecb975c 1270 # Make sure we can do this
1271 throw( "Need a linear graph in order to make an alignment table" )
1272 unless $self->linear;
1273 $self->calculate_ranks unless $self->end->has_rank;
1274
2c669bca 1275 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1276 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1277 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
6771a1b1 1278 # say STDERR "Making witness row(s) for " . $wit->sigil;
1f7aa795 1279 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1280 my @row = _make_witness_row( \@wit_path, \@all_pos );
bed6ce83 1281 my $witobj = { 'witness' => $wit->sigil, 'tokens' => \@row };
1282 $witobj->{'identifier'} = $wit->identifier if $wit->identifier;
1283 push( @{$table->{'alignment'}}, $witobj );
1f7aa795 1284 if( $wit->is_layered ) {
1285 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1286 $wit->sigil.$self->ac_label );
1dd07bda 1287 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
bed6ce83 1288 my $witacobj = { 'witness' => $wit->sigil.$self->ac_label,
1289 'tokens' => \@ac_row };
1290 $witacobj->{'identifier'} = $wit->identifier if $wit->identifier;
1291 push( @{$table->{'alignment'}}, $witacobj );
910a0a6d 1292 }
1293 }
1dd07bda 1294 $self->cached_table( $table );
1295 return $table;
910a0a6d 1296}
1297
1298sub _make_witness_row {
1dd07bda 1299 my( $path, $positions ) = @_;
910a0a6d 1300 my %char_hash;
1301 map { $char_hash{$_} = undef } @$positions;
2c669bca 1302 my $debug = 0;
910a0a6d 1303 foreach my $rdg ( @$path ) {
6771a1b1 1304 say STDERR "rank " . $rdg->rank if $debug;
1305 # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
1dd07bda 1306 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1307 }
1308 my @row = map { $char_hash{$_} } @$positions;
eca16057 1309 # Fill in lacuna markers for undef spots in the row
1310 my $last_el = shift @row;
1311 my @filled_row = ( $last_el );
1312 foreach my $el ( @row ) {
0e476982 1313 # If we are using node reference, make the lacuna node appear many times
1314 # in the table. If not, use the lacuna tag.
1dd07bda 1315 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1316 $el = $last_el;
eca16057 1317 }
1318 push( @filled_row, $el );
1319 $last_el = $el;
1320 }
1321 return @filled_row;
910a0a6d 1322}
1323
4e5a7b2c 1324=head1 NAVIGATION METHODS
910a0a6d 1325
4e5a7b2c 1326=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1327
1328Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1329with $last, for the witness given in $sigil. If a $backup sigil is
1330specified (e.g. when walking a layered witness), it will be used wherever
1331no $sigil path exists. If there is a base text reading, that will be
1332used wherever no path exists for $sigil or $backup.
e2902068 1333
1334=cut
1335
910a0a6d 1336# TODO Think about returning some lazy-eval iterator.
b0b4421a 1337# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1338
e2902068 1339sub reading_sequence {
861c3e27 1340 my( $self, $start, $end, $witness ) = @_;
e2902068 1341
930ff666 1342 $witness = $self->baselabel unless $witness;
e2902068 1343 my @readings = ( $start );
1344 my %seen;
1345 my $n = $start;
3a2ebbf4 1346 while( $n && $n->id ne $end->id ) {
1347 if( exists( $seen{$n->id} ) ) {
63778331 1348 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1349 }
3a2ebbf4 1350 $seen{$n->id} = 1;
910a0a6d 1351
861c3e27 1352 my $next = $self->next_reading( $n, $witness );
44771cf2 1353 unless( $next ) {
63778331 1354 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1355 }
910a0a6d 1356 push( @readings, $next );
1357 $n = $next;
e2902068 1358 }
1359 # Check that the last reading is our end reading.
1360 my $last = $readings[$#readings];
63778331 1361 throw( "Last reading found from " . $start->text .
1362 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1363 unless $last->id eq $end->id;
e2902068 1364
1365 return @readings;
1366}
1367
4e5a7b2c 1368=head2 next_reading( $reading, $sigil );
8e1394aa 1369
4a8828f0 1370Returns the reading that follows the given reading along the given witness
930ff666 1371path.
8e1394aa 1372
1373=cut
1374
4a8828f0 1375sub next_reading {
e2902068 1376 # Return the successor via the corresponding path.
8e1394aa 1377 my $self = shift;
3a2ebbf4 1378 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1379 return undef unless $answer;
3a2ebbf4 1380 return $self->reading( $answer );
8e1394aa 1381}
1382
4e5a7b2c 1383=head2 prior_reading( $reading, $sigil )
8e1394aa 1384
4a8828f0 1385Returns the reading that precedes the given reading along the given witness
930ff666 1386path.
8e1394aa 1387
1388=cut
1389
4a8828f0 1390sub prior_reading {
e2902068 1391 # Return the predecessor via the corresponding path.
8e1394aa 1392 my $self = shift;
3a2ebbf4 1393 my $answer = $self->_find_linked_reading( 'prior', @_ );
1394 return $self->reading( $answer );
8e1394aa 1395}
1396
4a8828f0 1397sub _find_linked_reading {
861c3e27 1398 my( $self, $direction, $node, $path ) = @_;
1399
1400 # Get a backup if we are dealing with a layered witness
1401 my $alt_path;
1402 my $aclabel = $self->ac_label;
1403 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1404 $alt_path = $1;
1405 }
1406
e2902068 1407 my @linked_paths = $direction eq 'next'
3a2ebbf4 1408 ? $self->sequence->edges_from( $node )
1409 : $self->sequence->edges_to( $node );
e2902068 1410 return undef unless scalar( @linked_paths );
8e1394aa 1411
e2902068 1412 # We have to find the linked path that contains all of the
1413 # witnesses supplied in $path.
1414 my( @path_wits, @alt_path_wits );
4e5a7b2c 1415 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1416 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1417 my $base_le;
1418 my $alt_le;
1419 foreach my $le ( @linked_paths ) {
3a2ebbf4 1420 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1421 $base_le = $le;
910a0a6d 1422 }
508fd430 1423 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1424 if( _is_within( \@path_wits, \@le_wits ) ) {
1425 # This is the right path.
1426 return $direction eq 'next' ? $le->[1] : $le->[0];
1427 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1428 $alt_le = $le;
1429 }
8e1394aa 1430 }
e2902068 1431 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1432 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1433 if $alt_le;
e2902068 1434
1435 # Got this far? Return the base path if it exists.
3a2ebbf4 1436 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1437 if $base_le;
e2902068 1438
1439 # Got this far? We have no appropriate path.
2c669bca 1440 warn "Could not find $direction node from " . $node->id
910a0a6d 1441 . " along path $path";
8e1394aa 1442 return undef;
1443}
1444
4a8828f0 1445# Some set logic.
1446sub _is_within {
1447 my( $set1, $set2 ) = @_;
7854e12e 1448 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1449 foreach my $el ( @$set1 ) {
910a0a6d 1450 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1451 }
1452 return $ret;
1453}
1454
4e5a7b2c 1455# Return the string that joins together a list of witnesses for
1456# display on a single path.
1457sub _witnesses_of_label {
1458 my( $self, $label ) = @_;
1459 my $regex = $self->wit_list_separator;
1460 my @answer = split( /\Q$regex\E/, $label );
1461 return @answer;
b0b4421a 1462}
1463
d4b75f44 1464=head2 common_readings
1465
1466Returns the list of common readings in the graph (i.e. those readings that are
1467shared by all non-lacunose witnesses.)
1468
1469=cut
1470
1471sub common_readings {
1472 my $self = shift;
1473 my @common = grep { $_->is_common } $self->readings;
1474 return @common;
1475}
1476
fae52efd 1477=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1478
1479Returns the text of a witness (plus its backup, if we are using a layer)
1480as stored in the collation. The text is returned as a string, where the
1481individual readings are joined with spaces and the meta-readings (e.g.
1482lacunae) are omitted. Optional specification of $start and $end allows
1483the generation of a subset of the witness text.
4e5a7b2c 1484
b0b4421a 1485=cut
1486
1487sub path_text {
861c3e27 1488 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1489 $start = $self->start unless $start;
1490 $end = $self->end unless $end;
861c3e27 1491 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1492 my $pathtext = '';
1493 my $last;
1494 foreach my $r ( @path ) {
6ad2ce78 1495 unless ( $r->join_prior || !$last || $last->join_next ) {
1496 $pathtext .= ' ';
1497 }
1498 $pathtext .= $r->text;
629e27b0 1499 $last = $r;
1500 }
1501 return $pathtext;
b0b4421a 1502}
4e5a7b2c 1503
1504=head1 INITIALIZATION METHODS
1505
1506These are mostly for use by parsers.
1507
1508=head2 make_witness_path( $witness )
1509
1510Link the array of readings contained in $witness->path (and in
1511$witness->uncorrected_path if it exists) into collation paths.
1512Clear out the arrays when finished.
de51424a 1513
4e5a7b2c 1514=head2 make_witness_paths
1515
1516Call make_witness_path for all witnesses in the tradition.
1517
1518=cut
930ff666 1519
7e450e44 1520# For use when a collation is constructed from a base text and an apparatus.
1521# We have the sequences of readings and just need to add path edges.
1f7aa795 1522# When we are done, clear out the witness path attributes, as they are no
1523# longer needed.
1524# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1525
6a222840 1526sub make_witness_paths {
1527 my( $self ) = @_;
910a0a6d 1528 foreach my $wit ( $self->tradition->witnesses ) {
6771a1b1 1529 # say STDERR "Making path for " . $wit->sigil;
910a0a6d 1530 $self->make_witness_path( $wit );
7854e12e 1531 }
7854e12e 1532}
1533
6a222840 1534sub make_witness_path {
7854e12e 1535 my( $self, $wit ) = @_;
1536 my @chain = @{$wit->path};
15d2d3df 1537 my $sig = $wit->sigil;
fae52efd 1538 # Add start and end if necessary
1539 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1540 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1541 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1542 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1543 }
1f7aa795 1544 if( $wit->is_layered ) {
d9e873d0 1545 @chain = @{$wit->uncorrected_path};
fae52efd 1546 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1547 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1548 foreach my $idx( 0 .. $#chain-1 ) {
1549 my $source = $chain[$idx];
1550 my $target = $chain[$idx+1];
1551 $self->add_path( $source, $target, $sig.$self->ac_label )
1552 unless $self->has_path( $source, $target, $sig );
1553 }
15d2d3df 1554 }
1f7aa795 1555 $wit->clear_path;
1556 $wit->clear_uncorrected_path;
e2902068 1557}
1558
4e5a7b2c 1559=head2 calculate_ranks
1560
1561Calculate the reading ranks (that is, their aligned positions relative
1562to each other) for the graph. This can only be called on linear collations.
1563
b365fbae 1564=begin testing
1565
1566use Text::Tradition;
1567
1568my $cxfile = 't/data/Collatex-16.xml';
1569my $t = Text::Tradition->new(
1570 'name' => 'inline',
1571 'input' => 'CollateX',
1572 'file' => $cxfile,
1573 );
1574my $c = $t->collation;
1575
1576# Make an svg
bfcbcecb 1577my $table = $c->alignment_table;
1578ok( $c->has_cached_table, "Alignment table was cached" );
1579is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1580$c->calculate_ranks;
bfcbcecb 1581is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1582$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1583isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1584
1585=end testing
1586
4e5a7b2c 1587=cut
1588
910a0a6d 1589sub calculate_ranks {
1590 my $self = shift;
b365fbae 1591 # Save the existing ranks, in case we need to invalidate the cached SVG.
1592 my %existing_ranks;
ac4d7ac2 1593 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1594
1595 # Do the rankings based on the relationship equivalence graph, starting
1596 # with the start node.
56772e8c 1597 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1598
910a0a6d 1599 # Transfer our rankings from the topological graph to the real one.
1600 foreach my $r ( $self->readings ) {
cecbe56d 1601 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1602 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1603 } else {
63778331 1604 # Die. Find the last rank we calculated.
359944f7 1605 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1606 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1607 $self->readings;
1608 my $last = pop @all_defined;
1609 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1610 }
de51424a 1611 }
bfcbcecb 1612 # Do we need to invalidate the cached data?
be3af600 1613 if( $self->has_cached_table ) {
b365fbae 1614 foreach my $r ( $self->readings ) {
7c293912 1615 next if defined( $existing_ranks{$r} )
1616 && $existing_ranks{$r} == $r->rank;
c1915ab9 1617 # Something has changed, so clear the cache
bfcbcecb 1618 $self->_clear_cache;
c1915ab9 1619 # ...and recalculate the common readings.
1620 $self->calculate_common_readings();
b365fbae 1621 last;
1622 }
1623 }
c1915ab9 1624 # The graph calculation information is now up to date.
1625 $self->_graphcalc_done(1);
8e1394aa 1626}
3a1f2523 1627
c1915ab9 1628sub _clear_cache {
1629 my $self = shift;
c1915ab9 1630 $self->wipe_table if $self->has_cached_table;
1631}
1632
1633
4e5a7b2c 1634=head2 flatten_ranks
1635
1636A convenience method for parsing collation data. Searches the graph for readings
1637with the same text at the same rank, and merges any that are found.
1638
1639=cut
1640
0e476982 1641sub flatten_ranks {
1642 my $self = shift;
1643 my %unique_rank_rdg;
bf6e338d 1644 my $changed;
0e476982 1645 foreach my $rdg ( $self->readings ) {
1646 next unless $rdg->has_rank;
1647 my $key = $rdg->rank . "||" . $rdg->text;
1648 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1649 # Make sure they don't have different grammatical forms
1650 my $ur = $unique_rank_rdg{$key};
a445ce40 1651 if( $rdg->is_identical( $ur ) ) {
1652 # Combine!
1653 #say STDERR "Combining readings at same rank: $key";
1654 $changed = 1;
1655 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1656 # TODO see if this now makes a common point.
07e6765f 1657 }
0e476982 1658 } else {
1659 $unique_rank_rdg{$key} = $rdg;
1660 }
1661 }
bf6e338d 1662 # If we merged readings, the ranks are still fine but the alignment
1663 # table is wrong. Wipe it.
1664 $self->wipe_table() if $changed;
0e476982 1665}
4633f9e4 1666
1667
d4b75f44 1668=head2 calculate_common_readings
1669
1670Goes through the graph identifying the readings that appear in every witness
1671(apart from those with lacunae at that spot.) Marks them as common and returns
1672the list.
1673
1674=begin testing
1675
1676use Text::Tradition;
1677
1678my $cxfile = 't/data/Collatex-16.xml';
1679my $t = Text::Tradition->new(
1680 'name' => 'inline',
1681 'input' => 'CollateX',
1682 'file' => $cxfile,
1683 );
1684my $c = $t->collation;
1685
1686my @common = $c->calculate_common_readings();
1687is( scalar @common, 8, "Found correct number of common readings" );
1688my @marked = sort $c->common_readings();
1689is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1690my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1691is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1692
1693=end testing
1694
1695=cut
1696
1697sub calculate_common_readings {
1698 my $self = shift;
1699 my @common;
c1915ab9 1700 map { $_->is_common( 0 ) } $self->readings;
1701 # Implicitly calls calculate_ranks
1dd07bda 1702 my $table = $self->alignment_table;
d4b75f44 1703 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1704 my @row = map { $_->{'tokens'}->[$idx]
1705 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1706 @{$table->{'alignment'}};
d4b75f44 1707 my %hash;
1708 foreach my $r ( @row ) {
1709 if( $r ) {
1710 $hash{$r->id} = $r unless $r->is_meta;
1711 } else {
1712 $hash{'UNDEF'} = $r;
1713 }
1714 }
1715 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1716 my( $r ) = values %hash;
1717 $r->is_common( 1 );
1718 push( @common, $r );
1719 }
1720 }
1721 return @common;
1722}
1723
861c3e27 1724=head2 text_from_paths
1725
1726Calculate the text array for all witnesses from the path, for later consistency
1727checking. Only to be used if there is no non-graph-based way to know the
1728original texts.
1729
1730=cut
1731
1732sub text_from_paths {
1733 my $self = shift;
1734 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1735 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1736 my @text;
1737 foreach my $r ( @readings ) {
1738 next if $r->is_meta;
1739 push( @text, $r->text );
1740 }
861c3e27 1741 $wit->text( \@text );
1742 if( $wit->is_layered ) {
5164a6f0 1743 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1744 $wit->sigil.$self->ac_label );
1745 my @uctext;
1746 foreach my $r ( @ucrdgs ) {
1747 next if $r->is_meta;
1748 push( @uctext, $r->text );
1749 }
1750 $wit->layertext( \@uctext );
861c3e27 1751 }
1752 }
1753}
0e476982 1754
4e5a7b2c 1755=head1 UTILITY FUNCTIONS
1756
1757=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1758
4e5a7b2c 1759Find the last reading that occurs in sequence before both the given readings.
414cc046 1760At the very least this should be $self->start.
4e5a7b2c 1761
1762=head2 common_successor( $reading_a, $reading_b )
1763
1764Find the first reading that occurs in sequence after both the given readings.
414cc046 1765At the very least this should be $self->end.
4e5a7b2c 1766
22222af9 1767=begin testing
1768
1769use Text::Tradition;
1770
1771my $cxfile = 't/data/Collatex-16.xml';
1772my $t = Text::Tradition->new(
1773 'name' => 'inline',
1774 'input' => 'CollateX',
1775 'file' => $cxfile,
1776 );
1777my $c = $t->collation;
1778
679f17e1 1779is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1780 'n20', "Found correct common predecessor" );
679f17e1 1781is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1782 '__END__', "Found correct common successor" );
22222af9 1783
4e5a7b2c 1784is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1785 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1786is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1787 '__END__', "Found correct common successor for readings on same path" );
22222af9 1788
1789=end testing
1790
1791=cut
1792
1793## Return the closest reading that is a predecessor of both the given readings.
1794sub common_predecessor {
1795 my $self = shift;
4e5a7b2c 1796 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1797 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1798}
1799
1800sub common_successor {
1801 my $self = shift;
4e5a7b2c 1802 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1803 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1804}
1805
414cc046 1806
1807# TODO think about how to do this without ranks...
027d819c 1808sub _common_in_path {
22222af9 1809 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1810 my $iter = $self->end->rank;
22222af9 1811 my @candidates;
414cc046 1812 my @last_r1 = ( $r1 );
1813 my @last_r2 = ( $r2 );
1814 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1815 my %all_seen;
6771a1b1 1816 # say STDERR "Finding common $dir for $r1, $r2";
22222af9 1817 while( !@candidates ) {
414cc046 1818 last unless $iter--; # Avoid looping infinitely
1819 # Iterate separately down the graph from r1 and r2
1820 my( @new_lc1, @new_lc2 );
1821 foreach my $lc ( @last_r1 ) {
1822 foreach my $p ( $lc->$dir ) {
1823 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
6771a1b1 1824 # say STDERR "Path candidate $p from $lc";
414cc046 1825 push( @candidates, $p );
002e3600 1826 } elsif( !$all_seen{$p->id} ) {
414cc046 1827 $all_seen{$p->id} = 'r1';
1828 push( @new_lc1, $p );
1829 }
1830 }
1831 }
1832 foreach my $lc ( @last_r2 ) {
22222af9 1833 foreach my $p ( $lc->$dir ) {
414cc046 1834 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
6771a1b1 1835 # say STDERR "Path candidate $p from $lc";
22222af9 1836 push( @candidates, $p );
002e3600 1837 } elsif( !$all_seen{$p->id} ) {
414cc046 1838 $all_seen{$p->id} = 'r2';
1839 push( @new_lc2, $p );
22222af9 1840 }
1841 }
1842 }
414cc046 1843 @last_r1 = @new_lc1;
1844 @last_r2 = @new_lc2;
22222af9 1845 }
1846 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1847 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1848}
1849
63778331 1850sub throw {
1851 Text::Tradition::Error->throw(
1852 'ident' => 'Collation error',
1853 'message' => $_[0],
1854 );
1855}
1856
dd3b58b0 1857no Moose;
1858__PACKAGE__->meta->make_immutable;
e867486f 1859
a445ce40 1860=head1 BUGS/TODO
1861
1862=over
1863
1864=item * Rework XML serialization in a more modular way
1865
1866=back
1867
027d819c 1868=head1 LICENSE
e867486f 1869
027d819c 1870This package is free software and is provided "as is" without express
1871or implied warranty. You can redistribute it and/or modify it under
1872the same terms as Perl itself.
e867486f 1873
027d819c 1874=head1 AUTHOR
e867486f 1875
027d819c 1876Tara L Andrews E<lt>aurum@cpan.orgE<gt>