abandon 'collated' relationship removal
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
910a0a6d 8use Text::CSV_XS;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
df6d9812 12use XML::LibXML;
a344ffcf 13use XML::LibXML::XPathContext;
dd3b58b0 14use Moose;
15
3a2ebbf4 16has 'sequence' => (
d047cd52 17 is => 'ro',
3a2ebbf4 18 isa => 'Graph',
19 default => sub { Graph->new() },
d047cd52 20 handles => {
3a2ebbf4 21 paths => 'edges',
d047cd52 22 },
d047cd52 23 );
3a2ebbf4 24
25has 'relations' => (
26 is => 'ro',
22222af9 27 isa => 'Text::Tradition::Collation::RelationshipStore',
28 handles => {
29 relationships => 'relationships',
30 related_readings => 'related_readings',
ee801e17 31 del_relationship => 'del_relationship',
22222af9 32 },
33 writer => '_set_relations',
3a2ebbf4 34 );
dd3b58b0 35
3a2ebbf4 36has 'tradition' => (
37 is => 'ro',
d047cd52 38 isa => 'Text::Tradition',
8d9a1cd8 39 weak_ref => 1,
d047cd52 40 );
dd3b58b0 41
3a2ebbf4 42has 'readings' => (
43 isa => 'HashRef[Text::Tradition::Collation::Reading]',
44 traits => ['Hash'],
45 handles => {
46 reading => 'get',
47 _add_reading => 'set',
48 del_reading => 'delete',
49 has_reading => 'exists',
50 readings => 'values',
51 },
52 default => sub { {} },
53 );
910a0a6d 54
4a8828f0 55has 'wit_list_separator' => (
7854e12e 56 is => 'rw',
57 isa => 'Str',
58 default => ', ',
59 );
60
61has 'baselabel' => (
62 is => 'rw',
63 isa => 'Str',
64 default => 'base text',
65 );
4a8828f0 66
15d2d3df 67has 'linear' => (
68 is => 'rw',
69 isa => 'Bool',
70 default => 1,
71 );
c84275ff 72
ef9d481f 73has 'ac_label' => (
74 is => 'rw',
75 isa => 'Str',
76 default => ' (a.c.)',
77 );
3a2ebbf4 78
79has 'start' => (
80 is => 'ro',
81 isa => 'Text::Tradition::Collation::Reading',
82 writer => '_set_start',
83 weak_ref => 1,
84 );
85
86has 'end' => (
87 is => 'ro',
88 isa => 'Text::Tradition::Collation::Reading',
89 writer => '_set_end',
90 weak_ref => 1,
91 );
b365fbae 92
93has 'cached_svg' => (
94 is => 'rw',
95 isa => 'Str',
96 predicate => 'has_cached_svg',
97 clearer => 'wipe_svg',
98 );
1dd07bda 99
100has 'cached_table' => (
101 is => 'rw',
102 isa => 'HashRef',
103 predicate => 'has_cached_table',
104 clearer => 'wipe_table',
105 );
c1915ab9 106
107has '_graphcalc_done' => (
108 is => 'rw',
109 isa => 'Bool',
110 default => undef,
111 );
1f563ac3 112
4e5a7b2c 113=head1 NAME
114
115Text::Tradition::Collation - a software model for a text collation
116
117=head1 SYNOPSIS
118
119 use Text::Tradition;
120 my $t = Text::Tradition->new(
121 'name' => 'this is a text',
122 'input' => 'TEI',
123 'file' => '/path/to/tei_parallel_seg_file.xml' );
124
125 my $c = $t->collation;
126 my @readings = $c->readings;
127 my @paths = $c->paths;
128 my @relationships = $c->relationships;
129
130 my $svg_variant_graph = $t->collation->as_svg();
131
132=head1 DESCRIPTION
133
134Text::Tradition is a library for representation and analysis of collated
135texts, particularly medieval ones. The Collation is the central feature of
136a Tradition, where the text, its sequence of readings, and its relationships
137between readings are actually kept.
138
139=head1 CONSTRUCTOR
140
141=head2 new
142
143The constructor. Takes a hash or hashref of the following arguments:
144
145=over
146
147=item * tradition - The Text::Tradition object to which the collation
148belongs. Required.
149
150=item * linear - Whether the collation should be linear; that is, whether
151transposed readings should be treated as two linked readings rather than one,
152and therefore whether the collation graph is acyclic. Defaults to true.
153
4e5a7b2c 154=item * baselabel - The default label for the path taken by a base text
155(if any). Defaults to 'base text'.
156
157=item * wit_list_separator - The string to join a list of witnesses for
158purposes of making labels in display graphs. Defaults to ', '.
159
160=item * ac_label - The extra label to tack onto a witness sigil when
161representing another layer of path for the given witness - that is, when
162a text has more than one possible reading due to scribal corrections or
163the like. Defaults to ' (a.c.)'.
164
165=back
166
167=head1 ACCESSORS
168
169=head2 tradition
170
171=head2 linear
172
4e5a7b2c 173=head2 wit_list_separator
174
175=head2 baselabel
176
177=head2 ac_label
178
179Simple accessors for collation attributes.
180
181=head2 start
182
183The meta-reading at the start of every witness path.
184
185=head2 end
186
187The meta-reading at the end of every witness path.
188
189=head2 readings
190
191Returns all Reading objects in the graph.
192
193=head2 reading( $id )
194
195Returns the Reading object corresponding to the given ID.
196
197=head2 add_reading( $reading_args )
198
199Adds a new reading object to the collation.
200See L<Text::Tradition::Collation::Reading> for the available arguments.
201
202=head2 del_reading( $object_or_id )
203
204Removes the given reading from the collation, implicitly removing its
205paths and relationships.
206
207=head2 merge_readings( $main, $second )
208
209Merges the $second reading into the $main one.
210The arguments may be either readings or reading IDs.
211
212=head2 has_reading( $id )
213
214Predicate to see whether a given reading ID is in the graph.
215
216=head2 reading_witnesses( $object_or_id )
217
218Returns a list of sigils whose witnesses contain the reading.
219
220=head2 paths
221
222Returns all reading paths within the document - that is, all edges in the
223collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
224
225=head2 add_path( $source, $target, $sigil )
226
227Links the given readings in the collation in sequence, under the given witness
228sigil. The readings may be specified by object or ID.
229
230=head2 del_path( $source, $target, $sigil )
231
232Links the given readings in the collation in sequence, under the given witness
233sigil. The readings may be specified by object or ID.
234
235=head2 has_path( $source, $target );
236
237Returns true if the two readings are linked in sequence in any witness.
238The readings may be specified by object or ID.
239
240=head2 relationships
241
242Returns all Relationship objects in the collation.
243
244=head2 add_relationship( $reading, $other_reading, $options )
245
246Adds a new relationship of the type given in $options between the two readings,
247which may be specified by object or ID. Returns a value of ( $status, @vectors)
248where $status is true on success, and @vectors is a list of relationship edges
249that were ultimately added.
250See L<Text::Tradition::Collation::Relationship> for the available options.
251
252=cut
dd3b58b0 253
d047cd52 254sub BUILD {
3a2ebbf4 255 my $self = shift;
22222af9 256 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 257 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
258 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 259}
784877d9 260
3a2ebbf4 261### Reading construct/destruct functions
262
263sub add_reading {
264 my( $self, $reading ) = @_;
265 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
266 my %args = %$reading;
267 $reading = Text::Tradition::Collation::Reading->new(
268 'collation' => $self,
269 %args );
270 }
271 # First check to see if a reading with this ID exists.
272 if( $self->reading( $reading->id ) ) {
63778331 273 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 274 }
c1915ab9 275 $self->_graphcalc_done(0);
3a2ebbf4 276 $self->_add_reading( $reading->id => $reading );
277 # Once the reading has been added, put it in both graphs.
278 $self->sequence->add_vertex( $reading->id );
22222af9 279 $self->relations->add_reading( $reading->id );
3a2ebbf4 280 return $reading;
eca16057 281};
282
3a2ebbf4 283around del_reading => sub {
284 my $orig = shift;
285 my $self = shift;
286 my $arg = shift;
287
288 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
289 $arg = $arg->id;
290 }
3a2ebbf4 291 # Remove the reading from the graphs.
c1915ab9 292 $self->_graphcalc_done(0);
3a2ebbf4 293 $self->sequence->delete_vertex( $arg );
22222af9 294 $self->relations->delete_reading( $arg );
3a2ebbf4 295
296 # Carry on.
297 $self->$orig( $arg );
298};
7854e12e 299
3a2ebbf4 300# merge_readings( $main, $to_be_deleted );
7854e12e 301
3a2ebbf4 302sub merge_readings {
303 my $self = shift;
304
305 # We only need the IDs for adding paths to the graph, not the reading
306 # objects themselves.
49d4f2ac 307 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 308 $self->_graphcalc_done(0);
3a2ebbf4 309
310 # The kept reading should inherit the paths and the relationships
311 # of the deleted reading.
312 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
313 my @vector = ( $kept );
314 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
315 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 316 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 317 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
318 $self->sequence->add_edge( @vector );
319 my $fwits = $self->sequence->get_edge_attributes( @vector );
320 @wits{keys %$fwits} = values %$fwits;
321 $self->sequence->set_edge_attributes( @vector, \%wits );
322 }
22222af9 323 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 324
325 # Do the deletion deed.
49d4f2ac 326 if( $combine_char ) {
327 my $kept_obj = $self->reading( $kept );
328 my $new_text = join( $combine_char, $kept_obj->text,
329 $self->reading( $deleted )->text );
330 $kept_obj->alter_text( $new_text );
331 }
3a2ebbf4 332 $self->del_reading( $deleted );
333}
7854e12e 334
3265b0ce 335
3a2ebbf4 336# Helper function for manipulating the graph.
337sub _stringify_args {
338 my( $self, $first, $second, $arg ) = @_;
339 $first = $first->id
340 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
341 $second = $second->id
342 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
343 return( $first, $second, $arg );
344}
df6d9812 345
4e5a7b2c 346# Helper function for manipulating the graph.
347sub _objectify_args {
348 my( $self, $first, $second, $arg ) = @_;
349 $first = $self->reading( $first )
350 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
351 $second = $self->reading( $second )
352 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
353 return( $first, $second, $arg );
354}
3a2ebbf4 355### Path logic
356
357sub add_path {
358 my $self = shift;
359
360 # We only need the IDs for adding paths to the graph, not the reading
361 # objects themselves.
362 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
363
c1915ab9 364 $self->_graphcalc_done(0);
3a2ebbf4 365 # Connect the readings
366 $self->sequence->add_edge( $source, $target );
367 # Note the witness in question
368 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 369};
370
3a2ebbf4 371sub del_path {
372 my $self = shift;
49d4f2ac 373 my @args;
374 if( ref( $_[0] ) eq 'ARRAY' ) {
375 my $e = shift @_;
376 @args = ( @$e, @_ );
377 } else {
378 @args = @_;
379 }
3a2ebbf4 380
381 # We only need the IDs for adding paths to the graph, not the reading
382 # objects themselves.
49d4f2ac 383 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 384
c1915ab9 385 $self->_graphcalc_done(0);
3a2ebbf4 386 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 387 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 388 }
389 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
390 $self->sequence->delete_edge( $source, $target );
391 }
784877d9 392}
393
3a2ebbf4 394
15d2d3df 395# Extra graph-alike utility
396sub has_path {
3a2ebbf4 397 my $self = shift;
398 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
399 return undef unless $self->sequence->has_edge( $source, $target );
400 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 401}
402
4e5a7b2c 403=head2 clear_witness( @sigil_list )
3a2ebbf4 404
4e5a7b2c 405Clear the given witnesses out of the collation entirely, removing references
406to them in paths, and removing readings that belong only to them. Should only
407be called via $tradition->del_witness.
3a2ebbf4 408
409=cut
410
4e5a7b2c 411sub clear_witness {
412 my( $self, @sigils ) = @_;
413
c1915ab9 414 $self->_graphcalc_done(0);
4e5a7b2c 415 # Clear the witness(es) out of the paths
416 foreach my $e ( $self->paths ) {
417 foreach my $sig ( @sigils ) {
418 $self->del_path( $e, $sig );
419 }
420 }
421
422 # Clear out the newly unused readings
423 foreach my $r ( $self->readings ) {
424 unless( $self->reading_witnesses( $r ) ) {
425 $self->del_reading( $r );
426 }
427 }
428}
3a2ebbf4 429
430sub add_relationship {
431 my $self = shift;
22222af9 432 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 433 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 434 $self->reading( $source ), $target, $self->reading( $target ), $opts );
c1915ab9 435 $self->_graphcalc_done(0);
63778331 436 return @vectors;
22222af9 437}
ef9d481f 438
22222af9 439=head2 reading_witnesses( $reading )
910a0a6d 440
22222af9 441Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 442
22222af9 443=cut
1d310495 444
1d310495 445sub reading_witnesses {
446 my( $self, $reading ) = @_;
447 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 448 # arbitrarily chosen "incoming". Thus, special-case the start node.
449 if( $reading eq $self->start ) {
450 return map { $_->sigil } $self->tradition->witnesses;
451 }
1d310495 452 my %all_witnesses;
453 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
454 my $wits = $self->sequence->get_edge_attributes( @$e );
455 @all_witnesses{ keys %$wits } = 1;
456 }
c12bb878 457 my $acstr = $self->ac_label;
458 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
459 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
460 }
1d310495 461 return keys %all_witnesses;
910a0a6d 462}
463
4e5a7b2c 464=head1 OUTPUT METHODS
8e1394aa 465
0ecb975c 466=head2 as_svg( \%options )
8e1394aa 467
0068967c 468Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 469See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 470
471=cut
472
473sub as_svg {
0ecb975c 474 my( $self, $opts ) = @_;
bfcbcecb 475 throw( "Need GraphViz installed to output SVG" )
476 unless File::Which::which( 'dot' );
e247aad1 477 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
c1915ab9 478 $self->calculate_ranks() unless $self->_graphcalc_done;
e247aad1 479 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 480 my @cmd = qw/dot -Tsvg/;
481 my( $svg, $err );
482 my $dotfile = File::Temp->new();
483 ## USE FOR DEBUGGING
484 # $dotfile->unlink_on_destroy(0);
485 binmode $dotfile, ':utf8';
486 print $dotfile $self->as_dot( $opts );
487 push( @cmd, $dotfile->filename );
488 run( \@cmd, ">", binary(), \$svg );
e247aad1 489 $svg = decode_utf8( $svg );
490 $self->cached_svg( $svg ) unless $want_subgraph;
491 return $svg;
492 } else {
493 return $self->cached_svg;
b365fbae 494 }
8e1394aa 495}
496
b22576c6 497
0ecb975c 498=head2 as_dot( \%options )
b22576c6 499
0ecb975c 500Returns a string that is the collation graph expressed in dot
501(i.e. GraphViz) format. Options include:
b22576c6 502
0ecb975c 503=over 4
b22576c6 504
0ecb975c 505=item * from
b22576c6 506
0ecb975c 507=item * to
df6d9812 508
0ecb975c 509=item * color_common
510
511=back
df6d9812 512
513=cut
514
515sub as_dot {
0ecb975c 516 my( $self, $opts ) = @_;
517 my $startrank = $opts->{'from'} if $opts;
518 my $endrank = $opts->{'to'} if $opts;
519 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 520 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
521 && $self->end->rank > 100;
522
b22576c6 523 # Check the arguments
524 if( $startrank ) {
525 return if $endrank && $startrank > $endrank;
526 return if $startrank > $self->end->rank;
527 }
528 if( defined $endrank ) {
529 return if $endrank < 0;
f1b3b33a 530 $endrank = undef if $endrank == $self->end->rank;
b22576c6 531 }
532
67da8d6c 533 my $graph_name = $self->tradition->name;
534 $graph_name =~ s/[^\w\s]//g;
535 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 536
537 my %graph_attrs = (
538 'rankdir' => 'LR',
539 'bgcolor' => 'none',
540 );
541 my %node_attrs = (
b8990398 542 'fontsize' => 14,
f13b5582 543 'fillcolor' => 'white',
544 'style' => 'filled',
545 'shape' => 'ellipse'
546 );
547 my %edge_attrs = (
548 'arrowhead' => 'open',
549 'color' => '#000000',
550 'fontcolor' => '#000000',
551 );
552
67da8d6c 553 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 554 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
555 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 556
b22576c6 557 # Output substitute start/end readings if necessary
558 if( $startrank ) {
559 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
560 }
561 if( $endrank ) {
562 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
563 }
b365fbae 564 if( $STRAIGHTENHACK ) {
565 ## HACK part 1
566 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
567 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
568 }
b22576c6 569 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 570 # Sort the readings by rank if we have ranks; this speeds layout.
571 my @all_readings = $self->end->has_rank
572 ? sort { $a->rank <=> $b->rank } $self->readings
573 : $self->readings;
4633f9e4 574 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 575 foreach my $reading ( @all_readings ) {
b22576c6 576 # Only output readings within our rank range.
577 next if $startrank && $reading->rank < $startrank;
578 next if $endrank && $reading->rank > $endrank;
579 $used{$reading->id} = 1;
910a0a6d 580 # Need not output nodes without separate labels
3a2ebbf4 581 next if $reading->id eq $reading->text;
d4b75f44 582 my $rattrs;
30f0df34 583 my $label = $reading->text;
8f9cab7b 584 $label =~ s/\"/\\\"/g;
d4b75f44 585 $rattrs->{'label'} = $label;
0ecb975c 586 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 587 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 588 }
3a2ebbf4 589
30ddc24c 590 # Add the real edges. Need to weight one edge per rank jump, in a
591 # continuous line.
b365fbae 592 # my $weighted = $self->_add_edge_weights;
b22576c6 593 my @edges = $self->paths;
3bdec618 594 my( %substart, %subend );
b22576c6 595 foreach my $edge ( @edges ) {
596 # Do we need to output this edge?
508fd430 597 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 598 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 599 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 600
b22576c6 601 # Account for the rank gap if necessary
30ddc24c 602 my $rank0 = $self->reading( $edge->[0] )->rank
603 if $self->reading( $edge->[0] )->has_rank;
604 my $rank1 = $self->reading( $edge->[1] )->rank
605 if $self->reading( $edge->[1] )->has_rank;
606 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
607 $variables->{'minlen'} = $rank1 - $rank0;
608 }
609
610 # Add the calculated edge weights
b365fbae 611 # if( exists $weighted->{$edge->[0]}
e247aad1 612 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
613 # # $variables->{'color'} = 'red';
614 # $variables->{'weight'} = 3.0;
615 # }
30ddc24c 616
508fd430 617 # EXPERIMENTAL: make edge width reflect no. of witnesses
618 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
619 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
620
f13b5582 621 my $varopts = _dot_attr_string( $variables );
622 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
623 $edge->[0], $edge->[1], $varopts );
3bdec618 624 } elsif( $used{$edge->[0]} ) {
625 $subend{$edge->[0]} = 1;
626 } elsif( $used{$edge->[1]} ) {
627 $substart{$edge->[1]} = 1;
b22576c6 628 }
df6d9812 629 }
3bdec618 630 # Add substitute start and end edges if necessary
631 foreach my $node ( keys %substart ) {
027d819c 632 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 633 my $variables = { %edge_attrs, 'label' => $witstr };
634 my $varopts = _dot_attr_string( $variables );
635 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 636 }
637 foreach my $node ( keys %subend ) {
027d819c 638 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 639 my $variables = { %edge_attrs, 'label' => $witstr };
640 my $varopts = _dot_attr_string( $variables );
641 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 642 }
b365fbae 643 # HACK part 2
644 if( $STRAIGHTENHACK ) {
645 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
646 }
30ddc24c 647
df6d9812 648 $dot .= "}\n";
649 return $dot;
650}
651
f13b5582 652sub _dot_attr_string {
653 my( $hash ) = @_;
654 my @attrs;
655 foreach my $k ( sort keys %$hash ) {
656 my $v = $hash->{$k};
657 push( @attrs, $k.'="'.$v.'"' );
658 }
659 return( '[ ' . join( ', ', @attrs ) . ' ]' );
660}
661
30ddc24c 662sub _add_edge_weights {
663 my $self = shift;
664 # Walk the graph from START to END, choosing the successor node with
665 # the largest number of witness paths each time.
666 my $weighted = {};
667 my $curr = $self->start->id;
008fc8a6 668 my $ranked = $self->end->has_rank;
30ddc24c 669 while( $curr ne $self->end->id ) {
008fc8a6 670 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 671 my @succ = sort { $self->path_witnesses( $curr, $a )
672 <=> $self->path_witnesses( $curr, $b ) }
673 $self->sequence->successors( $curr );
674 my $next = pop @succ;
008fc8a6 675 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 676 # Try to avoid lacunae in the weighted path.
008fc8a6 677 while( @succ &&
678 ( $self->reading( $next )->is_lacuna ||
679 $nextrank - $rank > 1 ) ){
30ddc24c 680 $next = pop @succ;
681 }
682 $weighted->{$curr} = $next;
683 $curr = $next;
684 }
685 return $weighted;
686}
687
027d819c 688=head2 path_witnesses( $edge )
689
690Returns the list of sigils whose witnesses are associated with the given edge.
691The edge can be passed as either an array or an arrayref of ( $source, $target ).
692
693=cut
694
3a2ebbf4 695sub path_witnesses {
696 my( $self, @edge ) = @_;
697 # If edge is an arrayref, cope.
698 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
699 my $e = shift @edge;
700 @edge = @$e;
701 }
702 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 703 return @wits;
3a2ebbf4 704}
705
027d819c 706sub _path_display_label {
508fd430 707 my $self = shift;
708 my @wits = sort @_;
8f9cab7b 709 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
710 if( scalar @wits > $maj ) {
f13b5582 711 # TODO break out a.c. wits
8f9cab7b 712 return 'majority';
713 } else {
714 return join( ', ', @wits );
715 }
716}
1dd07bda 717
718=head2 witnesses_at_rank
719
720Returns a list of witnesses that are not lacunose, for a given rank.
721
722=cut
723
724sub witnesses_at_rank {
725 my( $self, $rank ) = @_;
726}
8f9cab7b 727
4e5a7b2c 728=head2 as_graphml
8e1394aa 729
4e5a7b2c 730Returns a GraphML representation of the collation. The GraphML will contain
731two graphs. The first expresses the attributes of the readings and the witness
732paths that link them; the second expresses the relationships that link the
733readings. This is the native transfer format for a tradition.
8e1394aa 734
56eefa04 735=begin testing
736
737use Text::Tradition;
738
739my $READINGS = 311;
740my $PATHS = 361;
741
742my $datafile = 't/data/florilegium_tei_ps.xml';
743my $tradition = Text::Tradition->new( 'input' => 'TEI',
744 'name' => 'test0',
745 'file' => $datafile,
746 'linear' => 1 );
747
748ok( $tradition, "Got a tradition object" );
749is( scalar $tradition->witnesses, 13, "Found all witnesses" );
750ok( $tradition->collation, "Tradition has a collation" );
751
752my $c = $tradition->collation;
753is( scalar $c->readings, $READINGS, "Collation has all readings" );
754is( scalar $c->paths, $PATHS, "Collation has all paths" );
755is( scalar $c->relationships, 0, "Collation has all relationships" );
756
757# Add a few relationships
758$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
759$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
760$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
761
762# Now write it to GraphML and parse it again.
763
764my $graphml = $c->as_graphml;
765my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
766is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
767is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
768is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
769
770=end testing
771
8e1394aa 772=cut
773
774sub as_graphml {
3a2ebbf4 775 my( $self ) = @_;
3d14b48e 776 $self->calculate_ranks unless $self->_graphcalc_done;
777
8e1394aa 778 # Some namespaces
779 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
780 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
781 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 782 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 783
784 # Create the document and root node
785 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
786 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
787 $graphml->setDocumentElement( $root );
788 $root->setNamespace( $xsi_ns, 'xsi', 0 );
789 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
790
e309421a 791 # Add the data keys for the graph
792 my %graph_data_keys;
793 my $gdi = 0;
1d310495 794 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 795 foreach my $datum ( @graph_attributes ) {
796 $graph_data_keys{$datum} = 'dg'.$gdi++;
797 my $key = $root->addNewChild( $graphml_ns, 'key' );
798 $key->setAttribute( 'attr.name', $datum );
799 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
800 $key->setAttribute( 'for', 'graph' );
801 $key->setAttribute( 'id', $graph_data_keys{$datum} );
802 }
f6066bac 803
8e1394aa 804 # Add the data keys for nodes
ef9d481f 805 my %node_data_keys;
806 my $ndi = 0;
3a2ebbf4 807 my %node_data = (
808 id => 'string',
255875b8 809 text => 'string',
3a2ebbf4 810 rank => 'string',
811 is_start => 'boolean',
812 is_end => 'boolean',
813 is_lacuna => 'boolean',
814 );
815 foreach my $datum ( keys %node_data ) {
910a0a6d 816 $node_data_keys{$datum} = 'dn'.$ndi++;
817 my $key = $root->addNewChild( $graphml_ns, 'key' );
818 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 819 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 820 $key->setAttribute( 'for', 'node' );
821 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 822 }
823
df6d9812 824 # Add the data keys for edges, i.e. witnesses
ef9d481f 825 my $edi = 0;
826 my %edge_data_keys;
3a2ebbf4 827 my %edge_data = (
f523c7a8 828 class => 'string', # Class, deprecated soon
3a2ebbf4 829 witness => 'string', # ID/label for a path
830 relationship => 'string', # ID/label for a relationship
831 extra => 'boolean', # Path key
c84275ff 832 scope => 'string', # Relationship key
3d14b48e 833 annotation => 'string', # Relationship key
3a2ebbf4 834 non_correctable => 'boolean', # Relationship key
835 non_independent => 'boolean', # Relationship key
836 );
837 foreach my $datum ( keys %edge_data ) {
838 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 839 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 840 $key->setAttribute( 'attr.name', $datum );
841 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 842 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 843 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 844 }
3a2ebbf4 845
22222af9 846 # Add the collation graph itself
2c669bca 847 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
848 $sgraph->setAttribute( 'edgedefault', 'directed' );
849 $sgraph->setAttribute( 'id', $self->tradition->name );
850 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
851 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
852 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
853 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
854 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 855
e309421a 856 # Collation attribute data
857 foreach my $datum ( @graph_attributes ) {
2c669bca 858 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
859 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 860 }
8e1394aa 861
862 my $node_ctr = 0;
863 my %node_hash;
22222af9 864 # Add our readings to the graph
3a2ebbf4 865 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 866 # Add to the main graph
867 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 868 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 869 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 870 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 871 foreach my $d ( keys %node_data ) {
872 my $nval = $n->$d;
873 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
874 if defined $nval;
875 }
b15511bf 876 }
877
2c669bca 878 # Add the path edges to the sequence graph
df6d9812 879 my $edge_ctr = 0;
3a2ebbf4 880 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
881 # We add an edge in the graphml for every witness in $e.
508fd430 882 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 883 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
884 $node_hash{ $e->[0] },
885 $node_hash{ $e->[1] } );
2c669bca 886 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 887 $edge_el->setAttribute( 'source', $from );
888 $edge_el->setAttribute( 'target', $to );
889 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 890
891 # It's a witness path, so add the witness
892 my $base = $wit;
893 my $key = $edge_data_keys{'witness'};
894 # Is this an ante-corr witness?
895 my $aclabel = $self->ac_label;
896 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
897 # Keep the base witness
898 $base = $1;
899 # ...and record that this is an 'extra' reading path
900 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
901 }
902 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 903 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 904 }
905 }
906
22222af9 907 # Add the relationship graph to the XML
027d819c 908 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 909 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 910
94c00c71 911 # Save and return the thing
912 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 913 return $result;
df6d9812 914}
915
b15511bf 916sub _add_graphml_data {
917 my( $el, $key, $value ) = @_;
b15511bf 918 return unless defined $value;
c9bf3dbf 919 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 920 $data_el->setAttribute( 'key', $key );
921 $data_el->appendText( $value );
8e1394aa 922}
923
4e5a7b2c 924=head2 as_csv
910a0a6d 925
926Returns a CSV alignment table representation of the collation graph, one
2c669bca 927row per witness (or witness uncorrected.)
910a0a6d 928
929=cut
930
931sub as_csv {
3a2ebbf4 932 my( $self ) = @_;
1dd07bda 933 my $table = $self->alignment_table;
910a0a6d 934 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
935 my @result;
2c669bca 936 # Make the header row
937 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
938 push( @result, decode_utf8( $csv->string ) );
939 # Make the rest of the rows
940 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 941 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 942 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 943 $csv->combine( @row );
910a0a6d 944 push( @result, decode_utf8( $csv->string ) );
945 }
3a2ebbf4 946 return join( "\n", @result );
910a0a6d 947}
948
1dd07bda 949=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 950
566f4595 951Return a reference to an alignment table, in a slightly enhanced CollateX
952format which looks like this:
953
954 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 955 tokens => [ { t => "TEXT" }, ... ] },
566f4595 956 { witness => "SIG2",
4e5a7b2c 957 tokens => [ { t => "TEXT" }, ... ] },
566f4595 958 ... ],
959 length => TEXTLEN };
960
961If $use_refs is set to 1, the reading object is returned in the table
962instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 963
964If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 965keys have a true hash value will be included.
2c669bca 966
967=cut
9f3ba6f7 968
1dd07bda 969sub alignment_table {
970 my( $self ) = @_;
c1915ab9 971 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 972 return $self->cached_table if $self->has_cached_table;
973
0ecb975c 974 # Make sure we can do this
975 throw( "Need a linear graph in order to make an alignment table" )
976 unless $self->linear;
977 $self->calculate_ranks unless $self->end->has_rank;
978
2c669bca 979 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 980 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 981 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 982 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 983 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 984 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 985 push( @{$table->{'alignment'}},
986 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 987 if( $wit->is_layered ) {
988 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 989 $wit->sigil.$self->ac_label );
1dd07bda 990 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 991 push( @{$table->{'alignment'}},
992 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 993 }
994 }
1dd07bda 995 $self->cached_table( $table );
996 return $table;
910a0a6d 997}
998
999sub _make_witness_row {
1dd07bda 1000 my( $path, $positions ) = @_;
910a0a6d 1001 my %char_hash;
1002 map { $char_hash{$_} = undef } @$positions;
2c669bca 1003 my $debug = 0;
910a0a6d 1004 foreach my $rdg ( @$path ) {
eca16057 1005 my $rtext = $rdg->text;
1006 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1007 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1008 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1009 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1010 }
1011 my @row = map { $char_hash{$_} } @$positions;
eca16057 1012 # Fill in lacuna markers for undef spots in the row
1013 my $last_el = shift @row;
1014 my @filled_row = ( $last_el );
1015 foreach my $el ( @row ) {
0e476982 1016 # If we are using node reference, make the lacuna node appear many times
1017 # in the table. If not, use the lacuna tag.
1dd07bda 1018 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1019 $el = $last_el;
eca16057 1020 }
1021 push( @filled_row, $el );
1022 $last_el = $el;
1023 }
1024 return @filled_row;
910a0a6d 1025}
1026
4e5a7b2c 1027=head1 NAVIGATION METHODS
910a0a6d 1028
4e5a7b2c 1029=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1030
1031Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1032with $last, for the witness given in $sigil. If a $backup sigil is
1033specified (e.g. when walking a layered witness), it will be used wherever
1034no $sigil path exists. If there is a base text reading, that will be
1035used wherever no path exists for $sigil or $backup.
e2902068 1036
1037=cut
1038
910a0a6d 1039# TODO Think about returning some lazy-eval iterator.
b0b4421a 1040# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1041
e2902068 1042sub reading_sequence {
861c3e27 1043 my( $self, $start, $end, $witness ) = @_;
e2902068 1044
930ff666 1045 $witness = $self->baselabel unless $witness;
e2902068 1046 my @readings = ( $start );
1047 my %seen;
1048 my $n = $start;
3a2ebbf4 1049 while( $n && $n->id ne $end->id ) {
1050 if( exists( $seen{$n->id} ) ) {
63778331 1051 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1052 }
3a2ebbf4 1053 $seen{$n->id} = 1;
910a0a6d 1054
861c3e27 1055 my $next = $self->next_reading( $n, $witness );
44771cf2 1056 unless( $next ) {
63778331 1057 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1058 }
910a0a6d 1059 push( @readings, $next );
1060 $n = $next;
e2902068 1061 }
1062 # Check that the last reading is our end reading.
1063 my $last = $readings[$#readings];
63778331 1064 throw( "Last reading found from " . $start->text .
1065 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1066 unless $last->id eq $end->id;
e2902068 1067
1068 return @readings;
1069}
1070
4e5a7b2c 1071=head2 next_reading( $reading, $sigil );
8e1394aa 1072
4a8828f0 1073Returns the reading that follows the given reading along the given witness
930ff666 1074path.
8e1394aa 1075
1076=cut
1077
4a8828f0 1078sub next_reading {
e2902068 1079 # Return the successor via the corresponding path.
8e1394aa 1080 my $self = shift;
3a2ebbf4 1081 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1082 return undef unless $answer;
3a2ebbf4 1083 return $self->reading( $answer );
8e1394aa 1084}
1085
4e5a7b2c 1086=head2 prior_reading( $reading, $sigil )
8e1394aa 1087
4a8828f0 1088Returns the reading that precedes the given reading along the given witness
930ff666 1089path.
8e1394aa 1090
1091=cut
1092
4a8828f0 1093sub prior_reading {
e2902068 1094 # Return the predecessor via the corresponding path.
8e1394aa 1095 my $self = shift;
3a2ebbf4 1096 my $answer = $self->_find_linked_reading( 'prior', @_ );
1097 return $self->reading( $answer );
8e1394aa 1098}
1099
4a8828f0 1100sub _find_linked_reading {
861c3e27 1101 my( $self, $direction, $node, $path ) = @_;
1102
1103 # Get a backup if we are dealing with a layered witness
1104 my $alt_path;
1105 my $aclabel = $self->ac_label;
1106 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1107 $alt_path = $1;
1108 }
1109
e2902068 1110 my @linked_paths = $direction eq 'next'
3a2ebbf4 1111 ? $self->sequence->edges_from( $node )
1112 : $self->sequence->edges_to( $node );
e2902068 1113 return undef unless scalar( @linked_paths );
8e1394aa 1114
e2902068 1115 # We have to find the linked path that contains all of the
1116 # witnesses supplied in $path.
1117 my( @path_wits, @alt_path_wits );
4e5a7b2c 1118 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1119 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1120 my $base_le;
1121 my $alt_le;
1122 foreach my $le ( @linked_paths ) {
3a2ebbf4 1123 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1124 $base_le = $le;
910a0a6d 1125 }
508fd430 1126 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1127 if( _is_within( \@path_wits, \@le_wits ) ) {
1128 # This is the right path.
1129 return $direction eq 'next' ? $le->[1] : $le->[0];
1130 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1131 $alt_le = $le;
1132 }
8e1394aa 1133 }
e2902068 1134 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1135 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1136 if $alt_le;
e2902068 1137
1138 # Got this far? Return the base path if it exists.
3a2ebbf4 1139 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1140 if $base_le;
e2902068 1141
1142 # Got this far? We have no appropriate path.
2c669bca 1143 warn "Could not find $direction node from " . $node->id
910a0a6d 1144 . " along path $path";
8e1394aa 1145 return undef;
1146}
1147
4a8828f0 1148# Some set logic.
1149sub _is_within {
1150 my( $set1, $set2 ) = @_;
7854e12e 1151 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1152 foreach my $el ( @$set1 ) {
910a0a6d 1153 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1154 }
1155 return $ret;
1156}
1157
4e5a7b2c 1158# Return the string that joins together a list of witnesses for
1159# display on a single path.
1160sub _witnesses_of_label {
1161 my( $self, $label ) = @_;
1162 my $regex = $self->wit_list_separator;
1163 my @answer = split( /\Q$regex\E/, $label );
1164 return @answer;
b0b4421a 1165}
1166
d4b75f44 1167=head2 common_readings
1168
1169Returns the list of common readings in the graph (i.e. those readings that are
1170shared by all non-lacunose witnesses.)
1171
1172=cut
1173
1174sub common_readings {
1175 my $self = shift;
1176 my @common = grep { $_->is_common } $self->readings;
1177 return @common;
1178}
1179
b0b4421a 1180=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1181
1182Returns the text of a witness (plus its backup, if we are using a layer)
1183as stored in the collation. The text is returned as a string, where the
1184individual readings are joined with spaces and the meta-readings (e.g.
1185lacunae) are omitted. Optional specification of $start and $end allows
1186the generation of a subset of the witness text.
4e5a7b2c 1187
b0b4421a 1188=cut
1189
1190sub path_text {
861c3e27 1191 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1192 $start = $self->start unless $start;
1193 $end = $self->end unless $end;
861c3e27 1194 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1195 return join( ' ', map { $_->text } @path );
1196}
4e5a7b2c 1197
1198=head1 INITIALIZATION METHODS
1199
1200These are mostly for use by parsers.
1201
1202=head2 make_witness_path( $witness )
1203
1204Link the array of readings contained in $witness->path (and in
1205$witness->uncorrected_path if it exists) into collation paths.
1206Clear out the arrays when finished.
de51424a 1207
4e5a7b2c 1208=head2 make_witness_paths
1209
1210Call make_witness_path for all witnesses in the tradition.
1211
1212=cut
930ff666 1213
7e450e44 1214# For use when a collation is constructed from a base text and an apparatus.
1215# We have the sequences of readings and just need to add path edges.
1f7aa795 1216# When we are done, clear out the witness path attributes, as they are no
1217# longer needed.
1218# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1219
6a222840 1220sub make_witness_paths {
1221 my( $self ) = @_;
910a0a6d 1222 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1223 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1224 $self->make_witness_path( $wit );
7854e12e 1225 }
7854e12e 1226}
1227
6a222840 1228sub make_witness_path {
7854e12e 1229 my( $self, $wit ) = @_;
1230 my @chain = @{$wit->path};
15d2d3df 1231 my $sig = $wit->sigil;
7854e12e 1232 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1233 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1234 }
1f7aa795 1235 if( $wit->is_layered ) {
d9e873d0 1236 @chain = @{$wit->uncorrected_path};
1237 foreach my $idx( 0 .. $#chain-1 ) {
1238 my $source = $chain[$idx];
1239 my $target = $chain[$idx+1];
1240 $self->add_path( $source, $target, $sig.$self->ac_label )
1241 unless $self->has_path( $source, $target, $sig );
1242 }
15d2d3df 1243 }
1f7aa795 1244 $wit->clear_path;
1245 $wit->clear_uncorrected_path;
e2902068 1246}
1247
4e5a7b2c 1248=head2 calculate_ranks
1249
1250Calculate the reading ranks (that is, their aligned positions relative
1251to each other) for the graph. This can only be called on linear collations.
1252
b365fbae 1253=begin testing
1254
1255use Text::Tradition;
1256
1257my $cxfile = 't/data/Collatex-16.xml';
1258my $t = Text::Tradition->new(
1259 'name' => 'inline',
1260 'input' => 'CollateX',
1261 'file' => $cxfile,
1262 );
1263my $c = $t->collation;
1264
1265# Make an svg
bfcbcecb 1266my $table = $c->alignment_table;
1267ok( $c->has_cached_table, "Alignment table was cached" );
1268is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1269$c->calculate_ranks;
bfcbcecb 1270is( $c->alignment_table, $table, "Cached table retained with no rank change" );
b365fbae 1271$c->add_relationship( 'n9', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1272isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1273
1274=end testing
1275
4e5a7b2c 1276=cut
1277
910a0a6d 1278sub calculate_ranks {
1279 my $self = shift;
b365fbae 1280 # Save the existing ranks, in case we need to invalidate the cached SVG.
1281 my %existing_ranks;
910a0a6d 1282 # Walk a version of the graph where every node linked by a relationship
1283 # edge is fundamentally the same node, and do a topological ranking on
1284 # the nodes in this graph.
c9bf3dbf 1285 my $topo_graph = Graph->new();
910a0a6d 1286 my %rel_containers;
1287 my $rel_ctr = 0;
1288 # Add the nodes
1289 foreach my $r ( $self->readings ) {
3a2ebbf4 1290 next if exists $rel_containers{$r->id};
910a0a6d 1291 my @rels = $r->related_readings( 'colocated' );
1292 if( @rels ) {
1293 # Make a relationship container.
1294 push( @rels, $r );
c9bf3dbf 1295 my $rn = 'rel_container_' . $rel_ctr++;
1296 $topo_graph->add_vertex( $rn );
910a0a6d 1297 foreach( @rels ) {
3a2ebbf4 1298 $rel_containers{$_->id} = $rn;
910a0a6d 1299 }
1300 } else {
1301 # Add a new node to mirror the old node.
3a2ebbf4 1302 $rel_containers{$r->id} = $r->id;
1303 $topo_graph->add_vertex( $r->id );
910a0a6d 1304 }
4a8828f0 1305 }
3a1f2523 1306
3a2ebbf4 1307 # Add the edges.
910a0a6d 1308 foreach my $r ( $self->readings ) {
b365fbae 1309 $existing_ranks{$r} = $r->rank;
3a2ebbf4 1310 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1311 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1312 $rel_containers{$n} );
4e5a7b2c 1313 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1314 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1315 }
1316 }
1317
1318 # Now do the rankings, starting with the start node.
3a2ebbf4 1319 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1320 my $node_ranks = { $topo_start => 0 };
910a0a6d 1321 my @curr_origin = ( $topo_start );
1322 # A little iterative function.
1323 while( @curr_origin ) {
c9bf3dbf 1324 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1325 }
1326 # Transfer our rankings from the topological graph to the real one.
1327 foreach my $r ( $self->readings ) {
3a2ebbf4 1328 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1329 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1330 } else {
63778331 1331 # Die. Find the last rank we calculated.
1332 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1333 <=> $node_ranks->{$rel_containers{$b->id}} }
1334 $self->readings;
1335 my $last = pop @all_defined;
1336 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1337 }
de51424a 1338 }
bfcbcecb 1339 # Do we need to invalidate the cached data?
1340 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1341 foreach my $r ( $self->readings ) {
1342 next if $existing_ranks{$r} == $r->rank;
c1915ab9 1343 # Something has changed, so clear the cache
bfcbcecb 1344 $self->_clear_cache;
c1915ab9 1345 # ...and recalculate the common readings.
1346 $self->calculate_common_readings();
b365fbae 1347 last;
1348 }
1349 }
c1915ab9 1350 # The graph calculation information is now up to date.
1351 $self->_graphcalc_done(1);
8e1394aa 1352}
3a1f2523 1353
910a0a6d 1354sub _assign_rank {
c9bf3dbf 1355 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1356 # Look at each of the children of @current_nodes. If all the child's
1357 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1358 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1359 # parent gets a rank.
910a0a6d 1360 my @next_nodes;
1361 foreach my $c ( @current_nodes ) {
c9bf3dbf 1362 warn "Current reading $c has no rank!"
1363 unless exists $node_ranks->{$c};
1364 # print STDERR "Looking at child of node $c, rank "
1365 # . $node_ranks->{$c} . "\n";
1366 foreach my $child ( $graph->successors( $c ) ) {
1367 next if exists $node_ranks->{$child};
910a0a6d 1368 my $highest_rank = -1;
1369 my $skip = 0;
c9bf3dbf 1370 foreach my $parent ( $graph->predecessors( $child ) ) {
1371 if( exists $node_ranks->{$parent} ) {
1372 $highest_rank = $node_ranks->{$parent}
1373 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1374 } else {
1375 $skip = 1;
1376 last;
1377 }
1378 }
1379 next if $skip;
c9bf3dbf 1380 my $c_rank = $highest_rank + 1;
1381 # print STDERR "Assigning rank $c_rank to node $child \n";
1382 $node_ranks->{$child} = $c_rank;
910a0a6d 1383 push( @next_nodes, $child );
1384 }
1385 }
1386 return @next_nodes;
4cdd82f1 1387}
910a0a6d 1388
c1915ab9 1389sub _clear_cache {
1390 my $self = shift;
1391 $self->wipe_svg if $self->has_cached_svg;
1392 $self->wipe_table if $self->has_cached_table;
1393}
1394
1395
4e5a7b2c 1396=head2 flatten_ranks
1397
1398A convenience method for parsing collation data. Searches the graph for readings
1399with the same text at the same rank, and merges any that are found.
1400
1401=cut
1402
0e476982 1403sub flatten_ranks {
1404 my $self = shift;
1405 my %unique_rank_rdg;
1406 foreach my $rdg ( $self->readings ) {
1407 next unless $rdg->has_rank;
1408 my $key = $rdg->rank . "||" . $rdg->text;
1409 if( exists $unique_rank_rdg{$key} ) {
1410 # Combine!
56eefa04 1411 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1412 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1413 # TODO see if this now makes a common point.
0e476982 1414 } else {
1415 $unique_rank_rdg{$key} = $rdg;
1416 }
1417 }
1418}
4633f9e4 1419
1420
d4b75f44 1421=head2 calculate_common_readings
1422
1423Goes through the graph identifying the readings that appear in every witness
1424(apart from those with lacunae at that spot.) Marks them as common and returns
1425the list.
1426
1427=begin testing
1428
1429use Text::Tradition;
1430
1431my $cxfile = 't/data/Collatex-16.xml';
1432my $t = Text::Tradition->new(
1433 'name' => 'inline',
1434 'input' => 'CollateX',
1435 'file' => $cxfile,
1436 );
1437my $c = $t->collation;
1438
1439my @common = $c->calculate_common_readings();
1440is( scalar @common, 8, "Found correct number of common readings" );
1441my @marked = sort $c->common_readings();
1442is( scalar @common, 8, "All common readings got marked as such" );
1443my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1444is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1445
1446=end testing
1447
1448=cut
1449
1450sub calculate_common_readings {
1451 my $self = shift;
1452 my @common;
c1915ab9 1453 map { $_->is_common( 0 ) } $self->readings;
1454 # Implicitly calls calculate_ranks
1dd07bda 1455 my $table = $self->alignment_table;
d4b75f44 1456 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1457 my @row = map { $_->{'tokens'}->[$idx]
1458 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1459 @{$table->{'alignment'}};
d4b75f44 1460 my %hash;
1461 foreach my $r ( @row ) {
1462 if( $r ) {
1463 $hash{$r->id} = $r unless $r->is_meta;
1464 } else {
1465 $hash{'UNDEF'} = $r;
1466 }
1467 }
1468 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1469 my( $r ) = values %hash;
1470 $r->is_common( 1 );
1471 push( @common, $r );
1472 }
1473 }
1474 return @common;
1475}
1476
861c3e27 1477=head2 text_from_paths
1478
1479Calculate the text array for all witnesses from the path, for later consistency
1480checking. Only to be used if there is no non-graph-based way to know the
1481original texts.
1482
1483=cut
1484
1485sub text_from_paths {
1486 my $self = shift;
1487 foreach my $wit ( $self->tradition->witnesses ) {
1488 my @text = split( /\s+/,
1489 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1490 $wit->text( \@text );
1491 if( $wit->is_layered ) {
1492 my @uctext = split( /\s+/,
1493 $self->reading_sequence( $self->start, $self->end,
1494 $wit->sigil.$self->ac_label ) );
1495 $wit->text( \@uctext );
1496 }
1497 }
1498}
0e476982 1499
4e5a7b2c 1500=head1 UTILITY FUNCTIONS
1501
1502=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1503
4e5a7b2c 1504Find the last reading that occurs in sequence before both the given readings.
1505
1506=head2 common_successor( $reading_a, $reading_b )
1507
1508Find the first reading that occurs in sequence after both the given readings.
1509
22222af9 1510=begin testing
1511
1512use Text::Tradition;
1513
1514my $cxfile = 't/data/Collatex-16.xml';
1515my $t = Text::Tradition->new(
1516 'name' => 'inline',
1517 'input' => 'CollateX',
1518 'file' => $cxfile,
1519 );
1520my $c = $t->collation;
1521
4e5a7b2c 1522is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1523 'n20', "Found correct common predecessor" );
4e5a7b2c 1524is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1525 '#END#', "Found correct common successor" );
1526
4e5a7b2c 1527is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1528 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1529is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1530 '#END#', "Found correct common successor for readings on same path" );
1531
1532=end testing
1533
1534=cut
1535
1536## Return the closest reading that is a predecessor of both the given readings.
1537sub common_predecessor {
1538 my $self = shift;
4e5a7b2c 1539 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1540 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1541}
1542
1543sub common_successor {
1544 my $self = shift;
4e5a7b2c 1545 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1546 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1547}
1548
027d819c 1549sub _common_in_path {
22222af9 1550 my( $self, $r1, $r2, $dir ) = @_;
1551 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1552 $iter = $self->end->rank - $iter if $dir eq 'successors';
1553 my @candidates;
1554 my @last_checked = ( $r1, $r2 );
1555 my %all_seen;
1556 while( !@candidates ) {
1557 my @new_lc;
1558 foreach my $lc ( @last_checked ) {
1559 foreach my $p ( $lc->$dir ) {
1560 if( $all_seen{$p->id} ) {
1561 push( @candidates, $p );
1562 } else {
1563 $all_seen{$p->id} = 1;
1564 push( @new_lc, $p );
1565 }
1566 }
1567 }
1568 @last_checked = @new_lc;
1569 }
1570 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1571 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1572}
1573
63778331 1574sub throw {
1575 Text::Tradition::Error->throw(
1576 'ident' => 'Collation error',
1577 'message' => $_[0],
1578 );
1579}
1580
dd3b58b0 1581no Moose;
1582__PACKAGE__->meta->make_immutable;
e867486f 1583
027d819c 1584=head1 LICENSE
e867486f 1585
027d819c 1586This package is free software and is provided "as is" without express
1587or implied warranty. You can redistribute it and/or modify it under
1588the same terms as Perl itself.
e867486f 1589
027d819c 1590=head1 AUTHOR
e867486f 1591
027d819c 1592Tara L Andrews E<lt>aurum@cpan.orgE<gt>