restore meta readings to equiv graph
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
cecbe56d 302 $self->relations->add_reading( $reading->id );
3a2ebbf4 303 return $reading;
eca16057 304};
305
3a2ebbf4 306around del_reading => sub {
307 my $orig = shift;
308 my $self = shift;
309 my $arg = shift;
310
cecbe56d 311 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
312 $arg = $arg->id;
3a2ebbf4 313 }
3a2ebbf4 314 # Remove the reading from the graphs.
c1915ab9 315 $self->_graphcalc_done(0);
4e483aa5 316 $self->_clear_cache; # Explicitly clear caches to GC the reading
cecbe56d 317 $self->sequence->delete_vertex( $arg );
318 $self->relations->delete_reading( $arg );
3a2ebbf4 319
320 # Carry on.
cecbe56d 321 $self->$orig( $arg );
3a2ebbf4 322};
7854e12e 323
4e483aa5 324=begin testing
325
326use Text::Tradition;
327
328my $cxfile = 't/data/Collatex-16.xml';
329my $t = Text::Tradition->new(
330 'name' => 'inline',
331 'input' => 'CollateX',
332 'file' => $cxfile,
333 );
334my $c = $t->collation;
335
336my $rno = scalar $c->readings;
337# Split n21 for testing purposes
338my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
339my $old_r = $c->reading( 'n21' );
340$old_r->alter_text( 'to' );
341$c->del_path( 'n20', 'n21', 'A' );
342$c->add_path( 'n20', 'n21p0', 'A' );
343$c->add_path( 'n21p0', 'n21', 'A' );
344$c->flatten_ranks();
345ok( $c->reading( 'n21p0' ), "New reading exists" );
346is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
347
679f17e1 348# Combine n3 and n4 ( with his )
4e483aa5 349$c->merge_readings( 'n3', 'n4', 1 );
350ok( !$c->reading('n4'), "Reading n4 is gone" );
351is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
352
679f17e1 353# Collapse n9 and n10 ( rood / root )
354$c->merge_readings( 'n9', 'n10' );
355ok( !$c->reading('n10'), "Reading n10 is gone" );
356is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 357
358# Combine n21 and n21p0
359my $remaining = $c->reading('n21');
360$remaining ||= $c->reading('n22'); # one of these should still exist
361$c->merge_readings( 'n21p0', $remaining, 1 );
362ok( !$c->reading('n21'), "Reading $remaining is gone" );
363is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
364
365=end testing
366
367=cut
7854e12e 368
3a2ebbf4 369sub merge_readings {
370 my $self = shift;
371
56772e8c 372 # Sanity check
373 my( $kept_obj, $del_obj, $combine, $combine_char ) = $self->_objectify_args( @_ );
374 my $mergemeta = $kept_obj->is_meta;
375 throw( "Cannot merge meta and non-meta reading" )
376 unless ( $mergemeta && $del_obj->is_meta )
377 || ( !$mergemeta && !$del_obj->is_meta );
378 if( $mergemeta ) {
379 throw( "Cannot merge with start or end node" )
380 if( $kept_obj eq $self->start || $kept_obj eq $self->end
381 || $del_obj eq $self->start || $del_obj eq $self->end );
382 }
3a2ebbf4 383 # We only need the IDs for adding paths to the graph, not the reading
384 # objects themselves.
56772e8c 385 my $kept = $kept_obj->id;
386 my $deleted = $del_obj->id;
c1915ab9 387 $self->_graphcalc_done(0);
10943ab0 388
3a2ebbf4 389 # The kept reading should inherit the paths and the relationships
390 # of the deleted reading.
391 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
392 my @vector = ( $kept );
393 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
394 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 395 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 396 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
397 $self->sequence->add_edge( @vector );
398 my $fwits = $self->sequence->get_edge_attributes( @vector );
399 @wits{keys %$fwits} = values %$fwits;
400 $self->sequence->set_edge_attributes( @vector, \%wits );
401 }
cecbe56d 402 $self->relations->merge_readings( $kept, $deleted, $combine );
3a2ebbf4 403
404 # Do the deletion deed.
4e483aa5 405 if( $combine ) {
4e483aa5 406 my $joinstr = $combine_char;
407 unless( defined $joinstr ) {
408 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
409 $joinstr = $self->wordsep unless defined $joinstr;
410 }
411 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 412 }
3a2ebbf4 413 $self->del_reading( $deleted );
414}
7854e12e 415
3265b0ce 416
3a2ebbf4 417# Helper function for manipulating the graph.
418sub _stringify_args {
4e483aa5 419 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 420 $first = $first->id
421 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
422 $second = $second->id
423 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 424 return( $first, $second, @args );
3a2ebbf4 425}
df6d9812 426
4e5a7b2c 427# Helper function for manipulating the graph.
428sub _objectify_args {
429 my( $self, $first, $second, $arg ) = @_;
430 $first = $self->reading( $first )
431 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
432 $second = $self->reading( $second )
433 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
434 return( $first, $second, $arg );
435}
3a2ebbf4 436### Path logic
437
438sub add_path {
439 my $self = shift;
440
441 # We only need the IDs for adding paths to the graph, not the reading
442 # objects themselves.
cecbe56d 443 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
3a2ebbf4 444
c1915ab9 445 $self->_graphcalc_done(0);
3a2ebbf4 446 # Connect the readings
359944f7 447 unless( $self->sequence->has_edge( $source, $target ) ) {
448 $self->sequence->add_edge( $source, $target );
449 $self->relations->add_equivalence_edge( $source, $target );
450 }
3a2ebbf4 451 # Note the witness in question
452 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 453}
b15511bf 454
3a2ebbf4 455sub del_path {
456 my $self = shift;
49d4f2ac 457 my @args;
458 if( ref( $_[0] ) eq 'ARRAY' ) {
459 my $e = shift @_;
460 @args = ( @$e, @_ );
461 } else {
462 @args = @_;
463 }
3a2ebbf4 464
465 # We only need the IDs for adding paths to the graph, not the reading
466 # objects themselves.
49d4f2ac 467 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 468
c1915ab9 469 $self->_graphcalc_done(0);
3a2ebbf4 470 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 471 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 472 }
473 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
474 $self->sequence->delete_edge( $source, $target );
359944f7 475 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 476 }
784877d9 477}
478
3a2ebbf4 479
15d2d3df 480# Extra graph-alike utility
481sub has_path {
3a2ebbf4 482 my $self = shift;
483 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
484 return undef unless $self->sequence->has_edge( $source, $target );
485 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 486}
487
4e5a7b2c 488=head2 clear_witness( @sigil_list )
3a2ebbf4 489
4e5a7b2c 490Clear the given witnesses out of the collation entirely, removing references
491to them in paths, and removing readings that belong only to them. Should only
492be called via $tradition->del_witness.
3a2ebbf4 493
494=cut
495
4e5a7b2c 496sub clear_witness {
497 my( $self, @sigils ) = @_;
498
c1915ab9 499 $self->_graphcalc_done(0);
4e5a7b2c 500 # Clear the witness(es) out of the paths
501 foreach my $e ( $self->paths ) {
502 foreach my $sig ( @sigils ) {
503 $self->del_path( $e, $sig );
504 }
505 }
506
507 # Clear out the newly unused readings
508 foreach my $r ( $self->readings ) {
509 unless( $self->reading_witnesses( $r ) ) {
510 $self->del_reading( $r );
511 }
512 }
513}
3a2ebbf4 514
515sub add_relationship {
516 my $self = shift;
22222af9 517 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 518 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 519 $self->_graphcalc_done(0);
63778331 520 return @vectors;
22222af9 521}
ef9d481f 522
ca6e6095 523around qw/ get_relationship del_relationship / => sub {
524 my $orig = shift;
525 my $self = shift;
526 my @args = @_;
527 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
528 @args = @{$_[0]};
529 }
530 my( $source, $target ) = $self->_stringify_args( @args );
531 $self->$orig( $source, $target );
532};
533
22222af9 534=head2 reading_witnesses( $reading )
910a0a6d 535
22222af9 536Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 537
22222af9 538=cut
1d310495 539
1d310495 540sub reading_witnesses {
541 my( $self, $reading ) = @_;
542 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 543 # arbitrarily chosen "incoming". Thus, special-case the start node.
544 if( $reading eq $self->start ) {
545 return map { $_->sigil } $self->tradition->witnesses;
546 }
1d310495 547 my %all_witnesses;
548 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
549 my $wits = $self->sequence->get_edge_attributes( @$e );
550 @all_witnesses{ keys %$wits } = 1;
551 }
c12bb878 552 my $acstr = $self->ac_label;
553 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
554 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
555 }
1d310495 556 return keys %all_witnesses;
910a0a6d 557}
558
4e5a7b2c 559=head1 OUTPUT METHODS
8e1394aa 560
0ecb975c 561=head2 as_svg( \%options )
8e1394aa 562
0068967c 563Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 564See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 565
566=cut
567
568sub as_svg {
0ecb975c 569 my( $self, $opts ) = @_;
bfcbcecb 570 throw( "Need GraphViz installed to output SVG" )
571 unless File::Which::which( 'dot' );
e247aad1 572 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 573 $self->calculate_ranks()
574 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
e247aad1 575 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 576 my @cmd = qw/dot -Tsvg/;
577 my( $svg, $err );
578 my $dotfile = File::Temp->new();
579 ## USE FOR DEBUGGING
580 # $dotfile->unlink_on_destroy(0);
581 binmode $dotfile, ':utf8';
582 print $dotfile $self->as_dot( $opts );
583 push( @cmd, $dotfile->filename );
584 run( \@cmd, ">", binary(), \$svg );
e247aad1 585 $svg = decode_utf8( $svg );
586 $self->cached_svg( $svg ) unless $want_subgraph;
587 return $svg;
588 } else {
589 return $self->cached_svg;
b365fbae 590 }
8e1394aa 591}
592
b22576c6 593
0ecb975c 594=head2 as_dot( \%options )
b22576c6 595
0ecb975c 596Returns a string that is the collation graph expressed in dot
597(i.e. GraphViz) format. Options include:
b22576c6 598
0ecb975c 599=over 4
b22576c6 600
0ecb975c 601=item * from
b22576c6 602
0ecb975c 603=item * to
df6d9812 604
0ecb975c 605=item * color_common
606
607=back
df6d9812 608
609=cut
610
611sub as_dot {
0ecb975c 612 my( $self, $opts ) = @_;
613 my $startrank = $opts->{'from'} if $opts;
614 my $endrank = $opts->{'to'} if $opts;
615 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 616 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
617 && $self->end->rank > 100;
6648ee3d 618 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 619
b22576c6 620 # Check the arguments
621 if( $startrank ) {
622 return if $endrank && $startrank > $endrank;
623 return if $startrank > $self->end->rank;
624 }
625 if( defined $endrank ) {
626 return if $endrank < 0;
f1b3b33a 627 $endrank = undef if $endrank == $self->end->rank;
b22576c6 628 }
629
67da8d6c 630 my $graph_name = $self->tradition->name;
631 $graph_name =~ s/[^\w\s]//g;
632 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 633
634 my %graph_attrs = (
635 'rankdir' => 'LR',
636 'bgcolor' => 'none',
637 );
638 my %node_attrs = (
b8990398 639 'fontsize' => 14,
f13b5582 640 'fillcolor' => 'white',
641 'style' => 'filled',
642 'shape' => 'ellipse'
643 );
644 my %edge_attrs = (
645 'arrowhead' => 'open',
646 'color' => '#000000',
647 'fontcolor' => '#000000',
648 );
649
67da8d6c 650 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 651 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
652 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 653
b22576c6 654 # Output substitute start/end readings if necessary
655 if( $startrank ) {
43c94341 656 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 657 }
658 if( $endrank ) {
43c94341 659 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 660 }
b365fbae 661 if( $STRAIGHTENHACK ) {
662 ## HACK part 1
43c94341 663 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
664 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 665 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
666 }
b22576c6 667 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 668 # Sort the readings by rank if we have ranks; this speeds layout.
669 my @all_readings = $self->end->has_rank
670 ? sort { $a->rank <=> $b->rank } $self->readings
671 : $self->readings;
4633f9e4 672 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 673 foreach my $reading ( @all_readings ) {
b22576c6 674 # Only output readings within our rank range.
675 next if $startrank && $reading->rank < $startrank;
676 next if $endrank && $reading->rank > $endrank;
677 $used{$reading->id} = 1;
910a0a6d 678 # Need not output nodes without separate labels
3a2ebbf4 679 next if $reading->id eq $reading->text;
d4b75f44 680 my $rattrs;
30f0df34 681 my $label = $reading->text;
629e27b0 682 $label .= '-' if $reading->join_next;
683 $label = "-$label" if $reading->join_prior;
8f9cab7b 684 $label =~ s/\"/\\\"/g;
d4b75f44 685 $rattrs->{'label'} = $label;
10e4b1ac 686 $rattrs->{'id'} = $reading->id;
0ecb975c 687 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 688 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 689 }
3a2ebbf4 690
30ddc24c 691 # Add the real edges. Need to weight one edge per rank jump, in a
692 # continuous line.
b365fbae 693 # my $weighted = $self->_add_edge_weights;
b22576c6 694 my @edges = $self->paths;
3bdec618 695 my( %substart, %subend );
b22576c6 696 foreach my $edge ( @edges ) {
697 # Do we need to output this edge?
508fd430 698 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 699 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 700 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 701
b22576c6 702 # Account for the rank gap if necessary
30ddc24c 703 my $rank0 = $self->reading( $edge->[0] )->rank
704 if $self->reading( $edge->[0] )->has_rank;
705 my $rank1 = $self->reading( $edge->[1] )->rank
706 if $self->reading( $edge->[1] )->has_rank;
707 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
708 $variables->{'minlen'} = $rank1 - $rank0;
709 }
710
711 # Add the calculated edge weights
b365fbae 712 # if( exists $weighted->{$edge->[0]}
e247aad1 713 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
714 # # $variables->{'color'} = 'red';
715 # $variables->{'weight'} = 3.0;
716 # }
30ddc24c 717
508fd430 718 # EXPERIMENTAL: make edge width reflect no. of witnesses
719 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
720 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
721
f13b5582 722 my $varopts = _dot_attr_string( $variables );
723 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
724 $edge->[0], $edge->[1], $varopts );
3bdec618 725 } elsif( $used{$edge->[0]} ) {
726 $subend{$edge->[0]} = 1;
727 } elsif( $used{$edge->[1]} ) {
728 $substart{$edge->[1]} = 1;
b22576c6 729 }
df6d9812 730 }
3bdec618 731 # Add substitute start and end edges if necessary
732 foreach my $node ( keys %substart ) {
027d819c 733 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 734 my $variables = { %edge_attrs, 'label' => $witstr };
735 my $varopts = _dot_attr_string( $variables );
43c94341 736 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;";
3bdec618 737 }
738 foreach my $node ( keys %subend ) {
027d819c 739 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 740 my $variables = { %edge_attrs, 'label' => $witstr };
741 my $varopts = _dot_attr_string( $variables );
43c94341 742 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;";
3bdec618 743 }
b365fbae 744 # HACK part 2
745 if( $STRAIGHTENHACK ) {
43c94341 746 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
747 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 748 }
30ddc24c 749
df6d9812 750 $dot .= "}\n";
751 return $dot;
752}
753
f13b5582 754sub _dot_attr_string {
755 my( $hash ) = @_;
756 my @attrs;
757 foreach my $k ( sort keys %$hash ) {
758 my $v = $hash->{$k};
759 push( @attrs, $k.'="'.$v.'"' );
760 }
761 return( '[ ' . join( ', ', @attrs ) . ' ]' );
762}
763
30ddc24c 764sub _add_edge_weights {
765 my $self = shift;
766 # Walk the graph from START to END, choosing the successor node with
767 # the largest number of witness paths each time.
768 my $weighted = {};
769 my $curr = $self->start->id;
008fc8a6 770 my $ranked = $self->end->has_rank;
30ddc24c 771 while( $curr ne $self->end->id ) {
008fc8a6 772 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 773 my @succ = sort { $self->path_witnesses( $curr, $a )
774 <=> $self->path_witnesses( $curr, $b ) }
775 $self->sequence->successors( $curr );
776 my $next = pop @succ;
008fc8a6 777 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 778 # Try to avoid lacunae in the weighted path.
008fc8a6 779 while( @succ &&
780 ( $self->reading( $next )->is_lacuna ||
781 $nextrank - $rank > 1 ) ){
30ddc24c 782 $next = pop @succ;
783 }
784 $weighted->{$curr} = $next;
785 $curr = $next;
786 }
787 return $weighted;
788}
789
027d819c 790=head2 path_witnesses( $edge )
791
792Returns the list of sigils whose witnesses are associated with the given edge.
793The edge can be passed as either an array or an arrayref of ( $source, $target ).
794
795=cut
796
3a2ebbf4 797sub path_witnesses {
798 my( $self, @edge ) = @_;
799 # If edge is an arrayref, cope.
800 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
801 my $e = shift @edge;
802 @edge = @$e;
803 }
804 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 805 return @wits;
3a2ebbf4 806}
807
7f9f05e8 808# Helper function. Make a display label for the given witnesses, showing a.c.
809# witnesses only where the main witness is not also in the list.
027d819c 810sub _path_display_label {
508fd430 811 my $self = shift;
7f9f05e8 812 my %wits;
813 map { $wits{$_} = 1 } @_;
814
815 # If an a.c. wit is listed, remove it if the main wit is also listed.
816 # Otherwise keep it for explicit listing.
817 my $aclabel = $self->ac_label;
818 my @disp_ac;
819 foreach my $w ( sort keys %wits ) {
820 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
821 if( exists $wits{$1} ) {
822 delete $wits{$w};
823 } else {
824 push( @disp_ac, $w );
825 }
826 }
827 }
828
829 # See if we are in a majority situation.
8f9cab7b 830 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 831 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 832 if( scalar keys %wits > $maj ) {
833 unshift( @disp_ac, 'majority' );
834 return join( ', ', @disp_ac );
8f9cab7b 835 } else {
7f9f05e8 836 return join( ', ', sort keys %wits );
8f9cab7b 837 }
838}
1dd07bda 839
bf6e338d 840=head2 readings_at_rank( $rank )
1dd07bda 841
bf6e338d 842Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 843
844=cut
845
bf6e338d 846sub readings_at_rank {
1dd07bda 847 my( $self, $rank ) = @_;
bf6e338d 848 my $table = $self->alignment_table;
849 # Table rank is real rank - 1.
850 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
851 my %readings;
852 foreach my $e ( @elements ) {
853 next unless ref( $e ) eq 'HASH';
854 next unless exists $e->{'t'};
855 $readings{$e->{'t'}->id} = $e->{'t'};
856 }
857 return values %readings;
1dd07bda 858}
8f9cab7b 859
4e5a7b2c 860=head2 as_graphml
8e1394aa 861
4e5a7b2c 862Returns a GraphML representation of the collation. The GraphML will contain
863two graphs. The first expresses the attributes of the readings and the witness
864paths that link them; the second expresses the relationships that link the
865readings. This is the native transfer format for a tradition.
8e1394aa 866
56eefa04 867=begin testing
868
869use Text::Tradition;
870
871my $READINGS = 311;
872my $PATHS = 361;
873
874my $datafile = 't/data/florilegium_tei_ps.xml';
875my $tradition = Text::Tradition->new( 'input' => 'TEI',
876 'name' => 'test0',
877 'file' => $datafile,
878 'linear' => 1 );
879
880ok( $tradition, "Got a tradition object" );
881is( scalar $tradition->witnesses, 13, "Found all witnesses" );
882ok( $tradition->collation, "Tradition has a collation" );
883
884my $c = $tradition->collation;
885is( scalar $c->readings, $READINGS, "Collation has all readings" );
886is( scalar $c->paths, $PATHS, "Collation has all paths" );
887is( scalar $c->relationships, 0, "Collation has all relationships" );
888
889# Add a few relationships
890$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
891$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
892$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
893
894# Now write it to GraphML and parse it again.
895
896my $graphml = $c->as_graphml;
897my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
898is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
899is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
900is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
901
2a812726 902# Now add a stemma, write to GraphML, and parse again.
903my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
904is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
905is( $tradition->stemmata, 1, "Tradition now has the stemma" );
906$graphml = $c->as_graphml;
907like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
908
56eefa04 909=end testing
910
8e1394aa 911=cut
912
913sub as_graphml {
a30ca502 914 my( $self, $options ) = @_;
3d14b48e 915 $self->calculate_ranks unless $self->_graphcalc_done;
916
a30ca502 917 my $start = $options->{'from'}
918 ? $self->reading( $options->{'from'} ) : $self->start;
919 my $end = $options->{'to'}
920 ? $self->reading( $options->{'to'} ) : $self->end;
921 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
922 throw( 'Start node must be before end node' );
923 }
924 # The readings need to be ranked for this to work.
925 $start = $self->start unless $start->has_rank;
926 $end = $self->end unless $end->has_rank;
414cc046 927 my $rankoffset = 0;
928 unless( $start eq $self->start ) {
929 $rankoffset = $start->rank - 1;
930 }
a30ca502 931 my %use_readings;
932
8e1394aa 933 # Some namespaces
934 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
935 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
936 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 937 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 938
939 # Create the document and root node
940 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
941 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
942 $graphml->setDocumentElement( $root );
943 $root->setNamespace( $xsi_ns, 'xsi', 0 );
944 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 945
946 # List of attribute types to save on our objects and their corresponding
947 # GraphML types
948 my %save_types = (
949 'Str' => 'string',
950 'Int' => 'int',
951 'Bool' => 'boolean',
10e4b1ac 952 'ReadingID' => 'string',
bbd064a9 953 'RelationshipType' => 'string',
954 'RelationshipScope' => 'string',
955 );
956
957 # List of attribute names *not* to save on our objects.
958 # We will also not save any attribute beginning with _.
959 my %skipsave;
960 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 961
bbd064a9 962 # Add the data keys for the graph. Include an extra key 'version' for the
963 # GraphML output version.
e309421a 964 my %graph_data_keys;
965 my $gdi = 0;
bbd064a9 966 my %graph_attributes = ( 'version' => 'string' );
967 # Graph attributes include those of Tradition and those of Collation.
968 my %gattr_from;
969 my $tmeta = $self->tradition->meta;
970 my $cmeta = $self->meta;
971 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
972 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
973 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
974 next if $attr->name =~ /^_/;
975 next if $skipsave{$attr->name};
976 next unless $save_types{$attr->type_constraint->name};
977 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
978 }
2a812726 979 # Extra custom key for the tradition stemma(ta)
980 $graph_attributes{'stemmata'} = 'string';
bbd064a9 981
982 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 983 $graph_data_keys{$datum} = 'dg'.$gdi++;
984 my $key = $root->addNewChild( $graphml_ns, 'key' );
985 $key->setAttribute( 'attr.name', $datum );
bbd064a9 986 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 987 $key->setAttribute( 'for', 'graph' );
988 $key->setAttribute( 'id', $graph_data_keys{$datum} );
989 }
f6066bac 990
bbd064a9 991 # Add the data keys for reading nodes
992 my %reading_attributes;
993 my $rmeta = Text::Tradition::Collation::Reading->meta;
994 foreach my $attr( $rmeta->get_all_attributes ) {
995 next if $attr->name =~ /^_/;
996 next if $skipsave{$attr->name};
997 next unless $save_types{$attr->type_constraint->name};
998 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
999 }
7cd9f181 1000 # Extra custom key for the reading morphology
1001 $reading_attributes{'lexemes'} = 'string';
1002
ef9d481f 1003 my %node_data_keys;
1004 my $ndi = 0;
bbd064a9 1005 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 1006 $node_data_keys{$datum} = 'dn'.$ndi++;
1007 my $key = $root->addNewChild( $graphml_ns, 'key' );
1008 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1009 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1010 $key->setAttribute( 'for', 'node' );
1011 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1012 }
1013
bbd064a9 1014 # Add the data keys for edges, that is, paths and relationships. Path
1015 # data does not come from a Moose class so is here manually.
ef9d481f 1016 my $edi = 0;
1017 my %edge_data_keys;
bbd064a9 1018 my %edge_attributes = (
3a2ebbf4 1019 witness => 'string', # ID/label for a path
3a2ebbf4 1020 extra => 'boolean', # Path key
3a2ebbf4 1021 );
bbd064a9 1022 my @path_attributes = keys %edge_attributes; # track our manual additions
1023 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1024 foreach my $attr( $pmeta->get_all_attributes ) {
1025 next if $attr->name =~ /^_/;
1026 next if $skipsave{$attr->name};
1027 next unless $save_types{$attr->type_constraint->name};
1028 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1029 }
1030 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1031 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1032 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1033 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1034 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1035 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1036 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1037 }
3a2ebbf4 1038
cc31ebaa 1039 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1040 my $xmlidname = $self->tradition->name;
1041 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1042 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1043 $xmlidname = '_'.$xmlidname;
1044 }
2c669bca 1045 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1046 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1047 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1048 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1049 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1050 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1051 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1052 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1053
2a812726 1054 # Tradition/collation attribute data
bbd064a9 1055 foreach my $datum ( keys %graph_attributes ) {
1056 my $value;
1057 if( $datum eq 'version' ) {
2a812726 1058 $value = '3.2';
1059 } elsif( $datum eq 'stemmata' ) {
1060 my @stemstrs;
1061 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1062 $self->tradition->stemmata;
1063 $value = join( "\n", @stemstrs );
bbd064a9 1064 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1065 $value = $self->tradition->$datum;
1066 } else {
1067 $value = $self->$datum;
1068 }
2c669bca 1069 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1070 }
8e1394aa 1071
1072 my $node_ctr = 0;
1073 my %node_hash;
22222af9 1074 # Add our readings to the graph
3a2ebbf4 1075 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1076 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1077 ( $n->rank < $start->rank || $n->rank > $end->rank );
1078 $use_readings{$n->id} = 1;
2c669bca 1079 # Add to the main graph
1080 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1081 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1082 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1083 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1084 foreach my $d ( keys %reading_attributes ) {
255875b8 1085 my $nval = $n->$d;
7cd9f181 1086 # Custom serialization
1087 if( $d eq 'lexemes' ) {
1088 # If nval is a true value, we have lexemes so we need to
1089 # serialize them. Otherwise set nval to undef so that the
1090 # key is excluded from this reading.
1091 $nval = $nval ? $n->_serialize_lexemes : undef;
18c64d55 1092 } elsif( $d eq 'normal_form' && $n->normal_form eq $n->text ) {
1093 $nval = undef;
7cd9f181 1094 }
cc31ebaa 1095 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1096 # Adjust the ranks within the subgraph.
cc31ebaa 1097 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1098 : $nval - $rankoffset;
414cc046 1099 }
255875b8 1100 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1101 if defined $nval;
1102 }
b15511bf 1103 }
1104
2c669bca 1105 # Add the path edges to the sequence graph
df6d9812 1106 my $edge_ctr = 0;
3a2ebbf4 1107 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1108 # We add an edge in the graphml for every witness in $e.
a30ca502 1109 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1110 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1111 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1112 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1113 # Skip any path from start to end; that witness is not in the subgraph.
1114 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1115 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1116 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1117 $node_hash{ $e->[0] },
1118 $node_hash{ $e->[1] } );
2c669bca 1119 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1120 $edge_el->setAttribute( 'source', $from );
1121 $edge_el->setAttribute( 'target', $to );
1122 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1123
1124 # It's a witness path, so add the witness
1125 my $base = $wit;
1126 my $key = $edge_data_keys{'witness'};
1127 # Is this an ante-corr witness?
1128 my $aclabel = $self->ac_label;
1129 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1130 # Keep the base witness
1131 $base = $1;
1132 # ...and record that this is an 'extra' reading path
1133 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1134 }
1135 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1136 }
1137 }
1138
cc31ebaa 1139 # Report the actual number of nodes and edges that went in
1140 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1141 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1142
22222af9 1143 # Add the relationship graph to the XML
bbd064a9 1144 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1145 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1146 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1147
94c00c71 1148 # Save and return the thing
1149 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1150 return $result;
df6d9812 1151}
1152
b15511bf 1153sub _add_graphml_data {
1154 my( $el, $key, $value ) = @_;
b15511bf 1155 return unless defined $value;
c9bf3dbf 1156 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1157 $data_el->setAttribute( 'key', $key );
1158 $data_el->appendText( $value );
8e1394aa 1159}
1160
4e5a7b2c 1161=head2 as_csv
910a0a6d 1162
1163Returns a CSV alignment table representation of the collation graph, one
2c669bca 1164row per witness (or witness uncorrected.)
910a0a6d 1165
1166=cut
1167
1168sub as_csv {
3a2ebbf4 1169 my( $self ) = @_;
1dd07bda 1170 my $table = $self->alignment_table;
82fa4d57 1171 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1172 my @result;
2c669bca 1173 # Make the header row
1174 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1175 push( @result, decode_utf8( $csv->string ) );
1176 # Make the rest of the rows
1177 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1178 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1179 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1180 $csv->combine( @row );
910a0a6d 1181 push( @result, decode_utf8( $csv->string ) );
1182 }
3a2ebbf4 1183 return join( "\n", @result );
910a0a6d 1184}
1185
1dd07bda 1186=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1187
566f4595 1188Return a reference to an alignment table, in a slightly enhanced CollateX
1189format which looks like this:
1190
1191 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1192 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1193 { witness => "SIG2",
4e5a7b2c 1194 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1195 ... ],
1196 length => TEXTLEN };
1197
1198If $use_refs is set to 1, the reading object is returned in the table
1199instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1200
1201If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1202keys have a true hash value will be included.
2c669bca 1203
1204=cut
9f3ba6f7 1205
1dd07bda 1206sub alignment_table {
1207 my( $self ) = @_;
c1915ab9 1208 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1209 return $self->cached_table if $self->has_cached_table;
1210
0ecb975c 1211 # Make sure we can do this
1212 throw( "Need a linear graph in order to make an alignment table" )
1213 unless $self->linear;
1214 $self->calculate_ranks unless $self->end->has_rank;
1215
2c669bca 1216 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1217 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1218 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1219 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1220 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1221 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1222 push( @{$table->{'alignment'}},
1223 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1224 if( $wit->is_layered ) {
1225 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1226 $wit->sigil.$self->ac_label );
1dd07bda 1227 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1228 push( @{$table->{'alignment'}},
1229 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1230 }
1231 }
1dd07bda 1232 $self->cached_table( $table );
1233 return $table;
910a0a6d 1234}
1235
1236sub _make_witness_row {
1dd07bda 1237 my( $path, $positions ) = @_;
910a0a6d 1238 my %char_hash;
1239 map { $char_hash{$_} = undef } @$positions;
2c669bca 1240 my $debug = 0;
910a0a6d 1241 foreach my $rdg ( @$path ) {
eca16057 1242 my $rtext = $rdg->text;
1243 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1244 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1245 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1246 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1247 }
1248 my @row = map { $char_hash{$_} } @$positions;
eca16057 1249 # Fill in lacuna markers for undef spots in the row
1250 my $last_el = shift @row;
1251 my @filled_row = ( $last_el );
1252 foreach my $el ( @row ) {
0e476982 1253 # If we are using node reference, make the lacuna node appear many times
1254 # in the table. If not, use the lacuna tag.
1dd07bda 1255 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1256 $el = $last_el;
eca16057 1257 }
1258 push( @filled_row, $el );
1259 $last_el = $el;
1260 }
1261 return @filled_row;
910a0a6d 1262}
1263
4e5a7b2c 1264=head1 NAVIGATION METHODS
910a0a6d 1265
4e5a7b2c 1266=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1267
1268Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1269with $last, for the witness given in $sigil. If a $backup sigil is
1270specified (e.g. when walking a layered witness), it will be used wherever
1271no $sigil path exists. If there is a base text reading, that will be
1272used wherever no path exists for $sigil or $backup.
e2902068 1273
1274=cut
1275
910a0a6d 1276# TODO Think about returning some lazy-eval iterator.
b0b4421a 1277# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1278
e2902068 1279sub reading_sequence {
861c3e27 1280 my( $self, $start, $end, $witness ) = @_;
e2902068 1281
930ff666 1282 $witness = $self->baselabel unless $witness;
e2902068 1283 my @readings = ( $start );
1284 my %seen;
1285 my $n = $start;
3a2ebbf4 1286 while( $n && $n->id ne $end->id ) {
1287 if( exists( $seen{$n->id} ) ) {
63778331 1288 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1289 }
3a2ebbf4 1290 $seen{$n->id} = 1;
910a0a6d 1291
861c3e27 1292 my $next = $self->next_reading( $n, $witness );
44771cf2 1293 unless( $next ) {
63778331 1294 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1295 }
910a0a6d 1296 push( @readings, $next );
1297 $n = $next;
e2902068 1298 }
1299 # Check that the last reading is our end reading.
1300 my $last = $readings[$#readings];
63778331 1301 throw( "Last reading found from " . $start->text .
1302 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1303 unless $last->id eq $end->id;
e2902068 1304
1305 return @readings;
1306}
1307
4e5a7b2c 1308=head2 next_reading( $reading, $sigil );
8e1394aa 1309
4a8828f0 1310Returns the reading that follows the given reading along the given witness
930ff666 1311path.
8e1394aa 1312
1313=cut
1314
4a8828f0 1315sub next_reading {
e2902068 1316 # Return the successor via the corresponding path.
8e1394aa 1317 my $self = shift;
3a2ebbf4 1318 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1319 return undef unless $answer;
3a2ebbf4 1320 return $self->reading( $answer );
8e1394aa 1321}
1322
4e5a7b2c 1323=head2 prior_reading( $reading, $sigil )
8e1394aa 1324
4a8828f0 1325Returns the reading that precedes the given reading along the given witness
930ff666 1326path.
8e1394aa 1327
1328=cut
1329
4a8828f0 1330sub prior_reading {
e2902068 1331 # Return the predecessor via the corresponding path.
8e1394aa 1332 my $self = shift;
3a2ebbf4 1333 my $answer = $self->_find_linked_reading( 'prior', @_ );
1334 return $self->reading( $answer );
8e1394aa 1335}
1336
4a8828f0 1337sub _find_linked_reading {
861c3e27 1338 my( $self, $direction, $node, $path ) = @_;
1339
1340 # Get a backup if we are dealing with a layered witness
1341 my $alt_path;
1342 my $aclabel = $self->ac_label;
1343 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1344 $alt_path = $1;
1345 }
1346
e2902068 1347 my @linked_paths = $direction eq 'next'
3a2ebbf4 1348 ? $self->sequence->edges_from( $node )
1349 : $self->sequence->edges_to( $node );
e2902068 1350 return undef unless scalar( @linked_paths );
8e1394aa 1351
e2902068 1352 # We have to find the linked path that contains all of the
1353 # witnesses supplied in $path.
1354 my( @path_wits, @alt_path_wits );
4e5a7b2c 1355 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1356 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1357 my $base_le;
1358 my $alt_le;
1359 foreach my $le ( @linked_paths ) {
3a2ebbf4 1360 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1361 $base_le = $le;
910a0a6d 1362 }
508fd430 1363 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1364 if( _is_within( \@path_wits, \@le_wits ) ) {
1365 # This is the right path.
1366 return $direction eq 'next' ? $le->[1] : $le->[0];
1367 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1368 $alt_le = $le;
1369 }
8e1394aa 1370 }
e2902068 1371 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1372 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1373 if $alt_le;
e2902068 1374
1375 # Got this far? Return the base path if it exists.
3a2ebbf4 1376 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1377 if $base_le;
e2902068 1378
1379 # Got this far? We have no appropriate path.
2c669bca 1380 warn "Could not find $direction node from " . $node->id
910a0a6d 1381 . " along path $path";
8e1394aa 1382 return undef;
1383}
1384
4a8828f0 1385# Some set logic.
1386sub _is_within {
1387 my( $set1, $set2 ) = @_;
7854e12e 1388 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1389 foreach my $el ( @$set1 ) {
910a0a6d 1390 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1391 }
1392 return $ret;
1393}
1394
4e5a7b2c 1395# Return the string that joins together a list of witnesses for
1396# display on a single path.
1397sub _witnesses_of_label {
1398 my( $self, $label ) = @_;
1399 my $regex = $self->wit_list_separator;
1400 my @answer = split( /\Q$regex\E/, $label );
1401 return @answer;
b0b4421a 1402}
1403
d4b75f44 1404=head2 common_readings
1405
1406Returns the list of common readings in the graph (i.e. those readings that are
1407shared by all non-lacunose witnesses.)
1408
1409=cut
1410
1411sub common_readings {
1412 my $self = shift;
1413 my @common = grep { $_->is_common } $self->readings;
1414 return @common;
1415}
1416
fae52efd 1417=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1418
1419Returns the text of a witness (plus its backup, if we are using a layer)
1420as stored in the collation. The text is returned as a string, where the
1421individual readings are joined with spaces and the meta-readings (e.g.
1422lacunae) are omitted. Optional specification of $start and $end allows
1423the generation of a subset of the witness text.
4e5a7b2c 1424
b0b4421a 1425=cut
1426
1427sub path_text {
861c3e27 1428 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1429 $start = $self->start unless $start;
1430 $end = $self->end unless $end;
861c3e27 1431 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1432 my $pathtext = '';
1433 my $last;
1434 foreach my $r ( @path ) {
6ad2ce78 1435 unless ( $r->join_prior || !$last || $last->join_next ) {
1436 $pathtext .= ' ';
1437 }
1438 $pathtext .= $r->text;
629e27b0 1439 $last = $r;
1440 }
1441 return $pathtext;
b0b4421a 1442}
4e5a7b2c 1443
1444=head1 INITIALIZATION METHODS
1445
1446These are mostly for use by parsers.
1447
1448=head2 make_witness_path( $witness )
1449
1450Link the array of readings contained in $witness->path (and in
1451$witness->uncorrected_path if it exists) into collation paths.
1452Clear out the arrays when finished.
de51424a 1453
4e5a7b2c 1454=head2 make_witness_paths
1455
1456Call make_witness_path for all witnesses in the tradition.
1457
1458=cut
930ff666 1459
7e450e44 1460# For use when a collation is constructed from a base text and an apparatus.
1461# We have the sequences of readings and just need to add path edges.
1f7aa795 1462# When we are done, clear out the witness path attributes, as they are no
1463# longer needed.
1464# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1465
6a222840 1466sub make_witness_paths {
1467 my( $self ) = @_;
910a0a6d 1468 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1469 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1470 $self->make_witness_path( $wit );
7854e12e 1471 }
7854e12e 1472}
1473
6a222840 1474sub make_witness_path {
7854e12e 1475 my( $self, $wit ) = @_;
1476 my @chain = @{$wit->path};
15d2d3df 1477 my $sig = $wit->sigil;
fae52efd 1478 # Add start and end if necessary
1479 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1480 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1481 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1482 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1483 }
1f7aa795 1484 if( $wit->is_layered ) {
d9e873d0 1485 @chain = @{$wit->uncorrected_path};
fae52efd 1486 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1487 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1488 foreach my $idx( 0 .. $#chain-1 ) {
1489 my $source = $chain[$idx];
1490 my $target = $chain[$idx+1];
1491 $self->add_path( $source, $target, $sig.$self->ac_label )
1492 unless $self->has_path( $source, $target, $sig );
1493 }
15d2d3df 1494 }
1f7aa795 1495 $wit->clear_path;
1496 $wit->clear_uncorrected_path;
e2902068 1497}
1498
4e5a7b2c 1499=head2 calculate_ranks
1500
1501Calculate the reading ranks (that is, their aligned positions relative
1502to each other) for the graph. This can only be called on linear collations.
1503
b365fbae 1504=begin testing
1505
1506use Text::Tradition;
1507
1508my $cxfile = 't/data/Collatex-16.xml';
1509my $t = Text::Tradition->new(
1510 'name' => 'inline',
1511 'input' => 'CollateX',
1512 'file' => $cxfile,
1513 );
1514my $c = $t->collation;
1515
1516# Make an svg
bfcbcecb 1517my $table = $c->alignment_table;
1518ok( $c->has_cached_table, "Alignment table was cached" );
1519is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1520$c->calculate_ranks;
bfcbcecb 1521is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1522$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1523isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1524
1525=end testing
1526
4e5a7b2c 1527=cut
1528
910a0a6d 1529sub calculate_ranks {
1530 my $self = shift;
b365fbae 1531 # Save the existing ranks, in case we need to invalidate the cached SVG.
1532 my %existing_ranks;
ac4d7ac2 1533 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1534
1535 # Do the rankings based on the relationship equivalence graph, starting
1536 # with the start node.
56772e8c 1537 my ( $node_ranks, $rank_nodes ) = $self->relations->equivalence_ranks();
1538
910a0a6d 1539 # Transfer our rankings from the topological graph to the real one.
1540 foreach my $r ( $self->readings ) {
cecbe56d 1541 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
359944f7 1542 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1543 } else {
63778331 1544 # Die. Find the last rank we calculated.
359944f7 1545 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1546 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1547 $self->readings;
1548 my $last = pop @all_defined;
1549 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1550 }
de51424a 1551 }
bfcbcecb 1552 # Do we need to invalidate the cached data?
1553 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1554 foreach my $r ( $self->readings ) {
7c293912 1555 next if defined( $existing_ranks{$r} )
1556 && $existing_ranks{$r} == $r->rank;
c1915ab9 1557 # Something has changed, so clear the cache
bfcbcecb 1558 $self->_clear_cache;
c1915ab9 1559 # ...and recalculate the common readings.
1560 $self->calculate_common_readings();
b365fbae 1561 last;
1562 }
1563 }
c1915ab9 1564 # The graph calculation information is now up to date.
1565 $self->_graphcalc_done(1);
8e1394aa 1566}
3a1f2523 1567
c1915ab9 1568sub _clear_cache {
1569 my $self = shift;
1570 $self->wipe_svg if $self->has_cached_svg;
1571 $self->wipe_table if $self->has_cached_table;
1572}
1573
1574
4e5a7b2c 1575=head2 flatten_ranks
1576
1577A convenience method for parsing collation data. Searches the graph for readings
1578with the same text at the same rank, and merges any that are found.
1579
1580=cut
1581
0e476982 1582sub flatten_ranks {
1583 my $self = shift;
1584 my %unique_rank_rdg;
bf6e338d 1585 my $changed;
0e476982 1586 foreach my $rdg ( $self->readings ) {
1587 next unless $rdg->has_rank;
1588 my $key = $rdg->rank . "||" . $rdg->text;
1589 if( exists $unique_rank_rdg{$key} ) {
07e6765f 1590 # Make sure they don't have different grammatical forms
1591 my $ur = $unique_rank_rdg{$key};
1592 if( $rdg->disambiguated && $ur->disambiguated ) {
1593 my $rform = join( '//', map { $_->form->to_string } $rdg->lexemes );
1594 my $uform = join( '//', map { $_->form->to_string } $ur->lexemes );
1595 next unless $rform eq $uform;
1596 } elsif( $rdg->disambiguated xor $ur->disambiguated ) {
1597 next;
1598 }
0e476982 1599 # Combine!
cecbe56d 1600 #print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1601 $changed = 1;
0e476982 1602 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1603 # TODO see if this now makes a common point.
0e476982 1604 } else {
1605 $unique_rank_rdg{$key} = $rdg;
1606 }
1607 }
bf6e338d 1608 # If we merged readings, the ranks are still fine but the alignment
1609 # table is wrong. Wipe it.
1610 $self->wipe_table() if $changed;
0e476982 1611}
4633f9e4 1612
1613
d4b75f44 1614=head2 calculate_common_readings
1615
1616Goes through the graph identifying the readings that appear in every witness
1617(apart from those with lacunae at that spot.) Marks them as common and returns
1618the list.
1619
1620=begin testing
1621
1622use Text::Tradition;
1623
1624my $cxfile = 't/data/Collatex-16.xml';
1625my $t = Text::Tradition->new(
1626 'name' => 'inline',
1627 'input' => 'CollateX',
1628 'file' => $cxfile,
1629 );
1630my $c = $t->collation;
1631
1632my @common = $c->calculate_common_readings();
1633is( scalar @common, 8, "Found correct number of common readings" );
1634my @marked = sort $c->common_readings();
1635is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1636my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1637is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1638
1639=end testing
1640
1641=cut
1642
1643sub calculate_common_readings {
1644 my $self = shift;
1645 my @common;
c1915ab9 1646 map { $_->is_common( 0 ) } $self->readings;
1647 # Implicitly calls calculate_ranks
1dd07bda 1648 my $table = $self->alignment_table;
d4b75f44 1649 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1650 my @row = map { $_->{'tokens'}->[$idx]
1651 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1652 @{$table->{'alignment'}};
d4b75f44 1653 my %hash;
1654 foreach my $r ( @row ) {
1655 if( $r ) {
1656 $hash{$r->id} = $r unless $r->is_meta;
1657 } else {
1658 $hash{'UNDEF'} = $r;
1659 }
1660 }
1661 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1662 my( $r ) = values %hash;
1663 $r->is_common( 1 );
1664 push( @common, $r );
1665 }
1666 }
1667 return @common;
1668}
1669
861c3e27 1670=head2 text_from_paths
1671
1672Calculate the text array for all witnesses from the path, for later consistency
1673checking. Only to be used if there is no non-graph-based way to know the
1674original texts.
1675
1676=cut
1677
1678sub text_from_paths {
1679 my $self = shift;
1680 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1681 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1682 my @text;
1683 foreach my $r ( @readings ) {
1684 next if $r->is_meta;
1685 push( @text, $r->text );
1686 }
861c3e27 1687 $wit->text( \@text );
1688 if( $wit->is_layered ) {
5164a6f0 1689 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1690 $wit->sigil.$self->ac_label );
1691 my @uctext;
1692 foreach my $r ( @ucrdgs ) {
1693 next if $r->is_meta;
1694 push( @uctext, $r->text );
1695 }
1696 $wit->layertext( \@uctext );
861c3e27 1697 }
1698 }
1699}
0e476982 1700
4e5a7b2c 1701=head1 UTILITY FUNCTIONS
1702
1703=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1704
4e5a7b2c 1705Find the last reading that occurs in sequence before both the given readings.
414cc046 1706At the very least this should be $self->start.
4e5a7b2c 1707
1708=head2 common_successor( $reading_a, $reading_b )
1709
1710Find the first reading that occurs in sequence after both the given readings.
414cc046 1711At the very least this should be $self->end.
4e5a7b2c 1712
22222af9 1713=begin testing
1714
1715use Text::Tradition;
1716
1717my $cxfile = 't/data/Collatex-16.xml';
1718my $t = Text::Tradition->new(
1719 'name' => 'inline',
1720 'input' => 'CollateX',
1721 'file' => $cxfile,
1722 );
1723my $c = $t->collation;
1724
679f17e1 1725is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1726 'n20', "Found correct common predecessor" );
679f17e1 1727is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1728 '__END__', "Found correct common successor" );
22222af9 1729
4e5a7b2c 1730is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1731 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1732is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1733 '__END__', "Found correct common successor for readings on same path" );
22222af9 1734
1735=end testing
1736
1737=cut
1738
1739## Return the closest reading that is a predecessor of both the given readings.
1740sub common_predecessor {
1741 my $self = shift;
4e5a7b2c 1742 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1743 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1744}
1745
1746sub common_successor {
1747 my $self = shift;
4e5a7b2c 1748 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1749 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1750}
1751
414cc046 1752
1753# TODO think about how to do this without ranks...
027d819c 1754sub _common_in_path {
22222af9 1755 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1756 my $iter = $self->end->rank;
22222af9 1757 my @candidates;
414cc046 1758 my @last_r1 = ( $r1 );
1759 my @last_r2 = ( $r2 );
1760 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1761 my %all_seen;
414cc046 1762 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1763 while( !@candidates ) {
414cc046 1764 last unless $iter--; # Avoid looping infinitely
1765 # Iterate separately down the graph from r1 and r2
1766 my( @new_lc1, @new_lc2 );
1767 foreach my $lc ( @last_r1 ) {
1768 foreach my $p ( $lc->$dir ) {
1769 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1770 # print STDERR "Path candidate $p from $lc\n";
1771 push( @candidates, $p );
002e3600 1772 } elsif( !$all_seen{$p->id} ) {
414cc046 1773 $all_seen{$p->id} = 'r1';
1774 push( @new_lc1, $p );
1775 }
1776 }
1777 }
1778 foreach my $lc ( @last_r2 ) {
22222af9 1779 foreach my $p ( $lc->$dir ) {
414cc046 1780 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1781 # print STDERR "Path candidate $p from $lc\n";
22222af9 1782 push( @candidates, $p );
002e3600 1783 } elsif( !$all_seen{$p->id} ) {
414cc046 1784 $all_seen{$p->id} = 'r2';
1785 push( @new_lc2, $p );
22222af9 1786 }
1787 }
1788 }
414cc046 1789 @last_r1 = @new_lc1;
1790 @last_r2 = @new_lc2;
22222af9 1791 }
1792 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1793 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1794}
1795
63778331 1796sub throw {
1797 Text::Tradition::Error->throw(
1798 'ident' => 'Collation error',
1799 'message' => $_[0],
1800 );
1801}
1802
dd3b58b0 1803no Moose;
1804__PACKAGE__->meta->make_immutable;
e867486f 1805
027d819c 1806=head1 LICENSE
e867486f 1807
027d819c 1808This package is free software and is provided "as is" without express
1809or implied warranty. You can redistribute it and/or modify it under
1810the same terms as Perl itself.
e867486f 1811
027d819c 1812=head1 AUTHOR
e867486f 1813
027d819c 1814Tara L Andrews E<lt>aurum@cpan.orgE<gt>