allow creation and SVG rendering of nonlinear graphs
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
bfcbcecb 5use File::Which;
c9bf3dbf 6use Graph;
8e1394aa 7use IPC::Run qw( run binary );
82fa4d57 8use Text::CSV;
b15511bf 9use Text::Tradition::Collation::Reading;
22222af9 10use Text::Tradition::Collation::RelationshipStore;
63778331 11use Text::Tradition::Error;
cc31ebaa 12use XML::Easy::Syntax qw( $xml10_namestartchar_rx $xml10_namechar_rx );
df6d9812 13use XML::LibXML;
a344ffcf 14use XML::LibXML::XPathContext;
dd3b58b0 15use Moose;
16
3a2ebbf4 17has 'sequence' => (
d047cd52 18 is => 'ro',
3a2ebbf4 19 isa => 'Graph',
20 default => sub { Graph->new() },
d047cd52 21 handles => {
3a2ebbf4 22 paths => 'edges',
d047cd52 23 },
d047cd52 24 );
3a2ebbf4 25
26has 'relations' => (
27 is => 'ro',
22222af9 28 isa => 'Text::Tradition::Collation::RelationshipStore',
29 handles => {
30 relationships => 'relationships',
31 related_readings => 'related_readings',
202ccb18 32 get_relationship => 'get_relationship',
ee801e17 33 del_relationship => 'del_relationship',
359944f7 34 equivalence => 'equivalence',
35 equivalence_graph => 'equivalence_graph',
22222af9 36 },
37 writer => '_set_relations',
3a2ebbf4 38 );
dd3b58b0 39
3a2ebbf4 40has 'tradition' => (
41 is => 'ro',
d047cd52 42 isa => 'Text::Tradition',
8cfd99c4 43 writer => '_set_tradition',
8d9a1cd8 44 weak_ref => 1,
d047cd52 45 );
dd3b58b0 46
3a2ebbf4 47has 'readings' => (
48 isa => 'HashRef[Text::Tradition::Collation::Reading]',
49 traits => ['Hash'],
50 handles => {
51 reading => 'get',
52 _add_reading => 'set',
53 del_reading => 'delete',
54 has_reading => 'exists',
55 readings => 'values',
56 },
57 default => sub { {} },
58 );
910a0a6d 59
4a8828f0 60has 'wit_list_separator' => (
7854e12e 61 is => 'rw',
62 isa => 'Str',
63 default => ', ',
64 );
65
66has 'baselabel' => (
67 is => 'rw',
68 isa => 'Str',
69 default => 'base text',
70 );
4a8828f0 71
15d2d3df 72has 'linear' => (
73 is => 'rw',
74 isa => 'Bool',
75 default => 1,
76 );
c84275ff 77
ef9d481f 78has 'ac_label' => (
79 is => 'rw',
80 isa => 'Str',
81 default => ' (a.c.)',
82 );
3a2ebbf4 83
4e483aa5 84has 'wordsep' => (
85 is => 'rw',
86 isa => 'Str',
87 default => ' ',
88 );
89
3a2ebbf4 90has 'start' => (
91 is => 'ro',
92 isa => 'Text::Tradition::Collation::Reading',
93 writer => '_set_start',
94 weak_ref => 1,
95 );
96
97has 'end' => (
98 is => 'ro',
99 isa => 'Text::Tradition::Collation::Reading',
100 writer => '_set_end',
101 weak_ref => 1,
102 );
b365fbae 103
104has 'cached_svg' => (
105 is => 'rw',
106 isa => 'Str',
107 predicate => 'has_cached_svg',
108 clearer => 'wipe_svg',
109 );
1dd07bda 110
111has 'cached_table' => (
112 is => 'rw',
113 isa => 'HashRef',
114 predicate => 'has_cached_table',
115 clearer => 'wipe_table',
116 );
c1915ab9 117
118has '_graphcalc_done' => (
119 is => 'rw',
120 isa => 'Bool',
121 default => undef,
122 );
1f563ac3 123
4e5a7b2c 124=head1 NAME
125
126Text::Tradition::Collation - a software model for a text collation
127
128=head1 SYNOPSIS
129
130 use Text::Tradition;
131 my $t = Text::Tradition->new(
132 'name' => 'this is a text',
133 'input' => 'TEI',
134 'file' => '/path/to/tei_parallel_seg_file.xml' );
135
136 my $c = $t->collation;
137 my @readings = $c->readings;
138 my @paths = $c->paths;
139 my @relationships = $c->relationships;
140
141 my $svg_variant_graph = $t->collation->as_svg();
142
143=head1 DESCRIPTION
144
145Text::Tradition is a library for representation and analysis of collated
146texts, particularly medieval ones. The Collation is the central feature of
147a Tradition, where the text, its sequence of readings, and its relationships
148between readings are actually kept.
149
150=head1 CONSTRUCTOR
151
152=head2 new
153
154The constructor. Takes a hash or hashref of the following arguments:
155
156=over
157
158=item * tradition - The Text::Tradition object to which the collation
159belongs. Required.
160
161=item * linear - Whether the collation should be linear; that is, whether
162transposed readings should be treated as two linked readings rather than one,
163and therefore whether the collation graph is acyclic. Defaults to true.
164
4e5a7b2c 165=item * baselabel - The default label for the path taken by a base text
166(if any). Defaults to 'base text'.
167
168=item * wit_list_separator - The string to join a list of witnesses for
169purposes of making labels in display graphs. Defaults to ', '.
170
171=item * ac_label - The extra label to tack onto a witness sigil when
172representing another layer of path for the given witness - that is, when
173a text has more than one possible reading due to scribal corrections or
174the like. Defaults to ' (a.c.)'.
175
4e483aa5 176=item * wordsep - The string used to separate words in the original text.
177Defaults to ' '.
178
4e5a7b2c 179=back
180
181=head1 ACCESSORS
182
183=head2 tradition
184
185=head2 linear
186
4e5a7b2c 187=head2 wit_list_separator
188
189=head2 baselabel
190
191=head2 ac_label
192
4e483aa5 193=head2 wordsep
194
4e5a7b2c 195Simple accessors for collation attributes.
196
197=head2 start
198
199The meta-reading at the start of every witness path.
200
201=head2 end
202
203The meta-reading at the end of every witness path.
204
205=head2 readings
206
207Returns all Reading objects in the graph.
208
209=head2 reading( $id )
210
211Returns the Reading object corresponding to the given ID.
212
213=head2 add_reading( $reading_args )
214
215Adds a new reading object to the collation.
216See L<Text::Tradition::Collation::Reading> for the available arguments.
217
218=head2 del_reading( $object_or_id )
219
220Removes the given reading from the collation, implicitly removing its
221paths and relationships.
222
4e483aa5 223=head2 merge_readings( $main, $second, $concatenate, $with_str )
224
225Merges the $second reading into the $main one. If $concatenate is true, then
226the merged node will carry the text of both readings, concatenated with either
227$with_str (if specified) or a sensible default (the empty string if the
228appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
4e5a7b2c 229
4e483aa5 230The first two arguments may be either readings or reading IDs.
4e5a7b2c 231
232=head2 has_reading( $id )
233
234Predicate to see whether a given reading ID is in the graph.
235
236=head2 reading_witnesses( $object_or_id )
237
238Returns a list of sigils whose witnesses contain the reading.
239
240=head2 paths
241
242Returns all reading paths within the document - that is, all edges in the
243collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
244
245=head2 add_path( $source, $target, $sigil )
246
247Links the given readings in the collation in sequence, under the given witness
248sigil. The readings may be specified by object or ID.
249
250=head2 del_path( $source, $target, $sigil )
251
252Links the given readings in the collation in sequence, under the given witness
253sigil. The readings may be specified by object or ID.
254
255=head2 has_path( $source, $target );
256
257Returns true if the two readings are linked in sequence in any witness.
258The readings may be specified by object or ID.
259
260=head2 relationships
261
262Returns all Relationship objects in the collation.
263
264=head2 add_relationship( $reading, $other_reading, $options )
265
266Adds a new relationship of the type given in $options between the two readings,
267which may be specified by object or ID. Returns a value of ( $status, @vectors)
268where $status is true on success, and @vectors is a list of relationship edges
269that were ultimately added.
270See L<Text::Tradition::Collation::Relationship> for the available options.
271
272=cut
dd3b58b0 273
d047cd52 274sub BUILD {
3a2ebbf4 275 my $self = shift;
22222af9 276 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 277 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
278 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 279}
784877d9 280
3a2ebbf4 281### Reading construct/destruct functions
282
283sub add_reading {
284 my( $self, $reading ) = @_;
285 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 my %args = %$reading;
6ad2ce78 287 if( $self->tradition->has_language && !exists $args{'language'} ) {
288 $args{'language'} = $self->tradition->language;
289 }
3a2ebbf4 290 $reading = Text::Tradition::Collation::Reading->new(
291 'collation' => $self,
292 %args );
293 }
294 # First check to see if a reading with this ID exists.
295 if( $self->reading( $reading->id ) ) {
63778331 296 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 297 }
c1915ab9 298 $self->_graphcalc_done(0);
3a2ebbf4 299 $self->_add_reading( $reading->id => $reading );
300 # Once the reading has been added, put it in both graphs.
301 $self->sequence->add_vertex( $reading->id );
22222af9 302 $self->relations->add_reading( $reading->id );
3a2ebbf4 303 return $reading;
eca16057 304};
305
3a2ebbf4 306around del_reading => sub {
307 my $orig = shift;
308 my $self = shift;
309 my $arg = shift;
310
311 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
312 $arg = $arg->id;
313 }
3a2ebbf4 314 # Remove the reading from the graphs.
c1915ab9 315 $self->_graphcalc_done(0);
4e483aa5 316 $self->_clear_cache; # Explicitly clear caches to GC the reading
3a2ebbf4 317 $self->sequence->delete_vertex( $arg );
22222af9 318 $self->relations->delete_reading( $arg );
3a2ebbf4 319
320 # Carry on.
321 $self->$orig( $arg );
322};
7854e12e 323
4e483aa5 324=begin testing
325
326use Text::Tradition;
327
328my $cxfile = 't/data/Collatex-16.xml';
329my $t = Text::Tradition->new(
330 'name' => 'inline',
331 'input' => 'CollateX',
332 'file' => $cxfile,
333 );
334my $c = $t->collation;
335
336my $rno = scalar $c->readings;
337# Split n21 for testing purposes
338my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
339my $old_r = $c->reading( 'n21' );
340$old_r->alter_text( 'to' );
341$c->del_path( 'n20', 'n21', 'A' );
342$c->add_path( 'n20', 'n21p0', 'A' );
343$c->add_path( 'n21p0', 'n21', 'A' );
344$c->flatten_ranks();
345ok( $c->reading( 'n21p0' ), "New reading exists" );
346is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
347
679f17e1 348# Combine n3 and n4 ( with his )
4e483aa5 349$c->merge_readings( 'n3', 'n4', 1 );
350ok( !$c->reading('n4'), "Reading n4 is gone" );
351is( $c->reading('n3')->text, 'with his', "Reading n3 has both words" );
352
679f17e1 353# Collapse n9 and n10 ( rood / root )
354$c->merge_readings( 'n9', 'n10' );
355ok( !$c->reading('n10'), "Reading n10 is gone" );
356is( $c->reading('n9')->text, 'rood', "Reading n9 has an unchanged word" );
4e483aa5 357
358# Combine n21 and n21p0
359my $remaining = $c->reading('n21');
360$remaining ||= $c->reading('n22'); # one of these should still exist
361$c->merge_readings( 'n21p0', $remaining, 1 );
362ok( !$c->reading('n21'), "Reading $remaining is gone" );
363is( $c->reading('n21p0')->text, 'unto', "Reading n21p0 merged correctly" );
364
365=end testing
366
367=cut
7854e12e 368
3a2ebbf4 369sub merge_readings {
370 my $self = shift;
371
372 # We only need the IDs for adding paths to the graph, not the reading
373 # objects themselves.
4e483aa5 374 my( $kept, $deleted, $combine, $combine_char ) = $self->_stringify_args( @_ );
c1915ab9 375 $self->_graphcalc_done(0);
3a2ebbf4 376
377 # The kept reading should inherit the paths and the relationships
378 # of the deleted reading.
379 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
380 my @vector = ( $kept );
381 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
382 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 383 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 384 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
385 $self->sequence->add_edge( @vector );
386 my $fwits = $self->sequence->get_edge_attributes( @vector );
387 @wits{keys %$fwits} = values %$fwits;
388 $self->sequence->set_edge_attributes( @vector, \%wits );
389 }
22222af9 390 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 391
392 # Do the deletion deed.
4e483aa5 393 if( $combine ) {
49d4f2ac 394 my $kept_obj = $self->reading( $kept );
4e483aa5 395 my $del_obj = $self->reading( $deleted );
396 my $joinstr = $combine_char;
397 unless( defined $joinstr ) {
398 $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
399 $joinstr = $self->wordsep unless defined $joinstr;
400 }
401 $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
49d4f2ac 402 }
3a2ebbf4 403 $self->del_reading( $deleted );
404}
7854e12e 405
3265b0ce 406
3a2ebbf4 407# Helper function for manipulating the graph.
408sub _stringify_args {
4e483aa5 409 my( $self, $first, $second, @args ) = @_;
3a2ebbf4 410 $first = $first->id
411 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
412 $second = $second->id
413 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
4e483aa5 414 return( $first, $second, @args );
3a2ebbf4 415}
df6d9812 416
4e5a7b2c 417# Helper function for manipulating the graph.
418sub _objectify_args {
419 my( $self, $first, $second, $arg ) = @_;
420 $first = $self->reading( $first )
421 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
422 $second = $self->reading( $second )
423 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
424 return( $first, $second, $arg );
425}
3a2ebbf4 426### Path logic
427
428sub add_path {
429 my $self = shift;
430
431 # We only need the IDs for adding paths to the graph, not the reading
432 # objects themselves.
433 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
434
c1915ab9 435 $self->_graphcalc_done(0);
3a2ebbf4 436 # Connect the readings
359944f7 437 unless( $self->sequence->has_edge( $source, $target ) ) {
438 $self->sequence->add_edge( $source, $target );
439 $self->relations->add_equivalence_edge( $source, $target );
440 }
3a2ebbf4 441 # Note the witness in question
442 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
359944f7 443}
b15511bf 444
3a2ebbf4 445sub del_path {
446 my $self = shift;
49d4f2ac 447 my @args;
448 if( ref( $_[0] ) eq 'ARRAY' ) {
449 my $e = shift @_;
450 @args = ( @$e, @_ );
451 } else {
452 @args = @_;
453 }
3a2ebbf4 454
455 # We only need the IDs for adding paths to the graph, not the reading
456 # objects themselves.
49d4f2ac 457 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 458
c1915ab9 459 $self->_graphcalc_done(0);
3a2ebbf4 460 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 461 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 462 }
463 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
464 $self->sequence->delete_edge( $source, $target );
359944f7 465 $self->relations->delete_equivalence_edge( $source, $target );
3a2ebbf4 466 }
784877d9 467}
468
3a2ebbf4 469
15d2d3df 470# Extra graph-alike utility
471sub has_path {
3a2ebbf4 472 my $self = shift;
473 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
474 return undef unless $self->sequence->has_edge( $source, $target );
475 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 476}
477
4e5a7b2c 478=head2 clear_witness( @sigil_list )
3a2ebbf4 479
4e5a7b2c 480Clear the given witnesses out of the collation entirely, removing references
481to them in paths, and removing readings that belong only to them. Should only
482be called via $tradition->del_witness.
3a2ebbf4 483
484=cut
485
4e5a7b2c 486sub clear_witness {
487 my( $self, @sigils ) = @_;
488
c1915ab9 489 $self->_graphcalc_done(0);
4e5a7b2c 490 # Clear the witness(es) out of the paths
491 foreach my $e ( $self->paths ) {
492 foreach my $sig ( @sigils ) {
493 $self->del_path( $e, $sig );
494 }
495 }
496
497 # Clear out the newly unused readings
498 foreach my $r ( $self->readings ) {
499 unless( $self->reading_witnesses( $r ) ) {
500 $self->del_reading( $r );
501 }
502 }
503}
3a2ebbf4 504
505sub add_relationship {
506 my $self = shift;
22222af9 507 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
414cc046 508 my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
c1915ab9 509 $self->_graphcalc_done(0);
63778331 510 return @vectors;
22222af9 511}
ef9d481f 512
ca6e6095 513around qw/ get_relationship del_relationship / => sub {
514 my $orig = shift;
515 my $self = shift;
516 my @args = @_;
517 if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
518 @args = @{$_[0]};
519 }
520 my( $source, $target ) = $self->_stringify_args( @args );
521 $self->$orig( $source, $target );
522};
523
22222af9 524=head2 reading_witnesses( $reading )
910a0a6d 525
22222af9 526Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 527
22222af9 528=cut
1d310495 529
1d310495 530sub reading_witnesses {
531 my( $self, $reading ) = @_;
532 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 533 # arbitrarily chosen "incoming". Thus, special-case the start node.
534 if( $reading eq $self->start ) {
535 return map { $_->sigil } $self->tradition->witnesses;
536 }
1d310495 537 my %all_witnesses;
538 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
539 my $wits = $self->sequence->get_edge_attributes( @$e );
540 @all_witnesses{ keys %$wits } = 1;
541 }
c12bb878 542 my $acstr = $self->ac_label;
543 foreach my $acwit ( grep { $_ =~ s/^(.*)\Q$acstr\E$/$1/ } keys %all_witnesses ) {
544 delete $all_witnesses{$acwit.$acstr} if exists $all_witnesses{$acwit};
545 }
1d310495 546 return keys %all_witnesses;
910a0a6d 547}
548
4e5a7b2c 549=head1 OUTPUT METHODS
8e1394aa 550
0ecb975c 551=head2 as_svg( \%options )
8e1394aa 552
0068967c 553Returns an SVG string that represents the graph, via as_dot and graphviz.
bfcbcecb 554See as_dot for a list of options. Must have GraphViz (dot) installed to run.
8e1394aa 555
556=cut
557
558sub as_svg {
0ecb975c 559 my( $self, $opts ) = @_;
bfcbcecb 560 throw( "Need GraphViz installed to output SVG" )
561 unless File::Which::which( 'dot' );
e247aad1 562 my $want_subgraph = exists $opts->{'from'} || exists $opts->{'to'};
1ff82d4f 563 $self->calculate_ranks()
564 unless( $self->_graphcalc_done || $opts->{'nocalc'} || !$self->linear );
e247aad1 565 if( !$self->has_cached_svg || $opts->{'recalc'} || $want_subgraph ) {
b365fbae 566 my @cmd = qw/dot -Tsvg/;
567 my( $svg, $err );
568 my $dotfile = File::Temp->new();
569 ## USE FOR DEBUGGING
570 # $dotfile->unlink_on_destroy(0);
571 binmode $dotfile, ':utf8';
572 print $dotfile $self->as_dot( $opts );
573 push( @cmd, $dotfile->filename );
574 run( \@cmd, ">", binary(), \$svg );
e247aad1 575 $svg = decode_utf8( $svg );
576 $self->cached_svg( $svg ) unless $want_subgraph;
577 return $svg;
578 } else {
579 return $self->cached_svg;
b365fbae 580 }
8e1394aa 581}
582
b22576c6 583
0ecb975c 584=head2 as_dot( \%options )
b22576c6 585
0ecb975c 586Returns a string that is the collation graph expressed in dot
587(i.e. GraphViz) format. Options include:
b22576c6 588
0ecb975c 589=over 4
b22576c6 590
0ecb975c 591=item * from
b22576c6 592
0ecb975c 593=item * to
df6d9812 594
0ecb975c 595=item * color_common
596
597=back
df6d9812 598
599=cut
600
601sub as_dot {
0ecb975c 602 my( $self, $opts ) = @_;
603 my $startrank = $opts->{'from'} if $opts;
604 my $endrank = $opts->{'to'} if $opts;
605 my $color_common = $opts->{'color_common'} if $opts;
b365fbae 606 my $STRAIGHTENHACK = !$startrank && !$endrank && $self->end->rank
607 && $self->end->rank > 100;
6648ee3d 608 $STRAIGHTENHACK = 1 if $opts->{'straight'}; # even for subgraphs or small graphs
b365fbae 609
b22576c6 610 # Check the arguments
611 if( $startrank ) {
612 return if $endrank && $startrank > $endrank;
613 return if $startrank > $self->end->rank;
614 }
615 if( defined $endrank ) {
616 return if $endrank < 0;
f1b3b33a 617 $endrank = undef if $endrank == $self->end->rank;
b22576c6 618 }
619
67da8d6c 620 my $graph_name = $self->tradition->name;
621 $graph_name =~ s/[^\w\s]//g;
622 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 623
624 my %graph_attrs = (
625 'rankdir' => 'LR',
626 'bgcolor' => 'none',
627 );
628 my %node_attrs = (
b8990398 629 'fontsize' => 14,
f13b5582 630 'fillcolor' => 'white',
631 'style' => 'filled',
632 'shape' => 'ellipse'
633 );
634 my %edge_attrs = (
635 'arrowhead' => 'open',
636 'color' => '#000000',
637 'fontcolor' => '#000000',
638 );
639
67da8d6c 640 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 641 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
642 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 643
b22576c6 644 # Output substitute start/end readings if necessary
645 if( $startrank ) {
43c94341 646 $dot .= "\t\"__SUBSTART__\" [ label=\"...\",id=\"__START__\" ];\n";
b22576c6 647 }
648 if( $endrank ) {
43c94341 649 $dot .= "\t\"__SUBEND__\" [ label=\"...\",id=\"__END__\" ];\n";
b22576c6 650 }
b365fbae 651 if( $STRAIGHTENHACK ) {
652 ## HACK part 1
43c94341 653 my $startlabel = $startrank ? '__SUBSTART__' : '__START__';
654 $dot .= "\tsubgraph { rank=same \"$startlabel\" \"#SILENT#\" }\n";
b365fbae 655 $dot .= "\t\"#SILENT#\" [ shape=diamond,color=white,penwidth=0,label=\"\" ];"
656 }
b22576c6 657 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 658 # Sort the readings by rank if we have ranks; this speeds layout.
659 my @all_readings = $self->end->has_rank
660 ? sort { $a->rank <=> $b->rank } $self->readings
661 : $self->readings;
4633f9e4 662 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 663 foreach my $reading ( @all_readings ) {
b22576c6 664 # Only output readings within our rank range.
665 next if $startrank && $reading->rank < $startrank;
666 next if $endrank && $reading->rank > $endrank;
667 $used{$reading->id} = 1;
910a0a6d 668 # Need not output nodes without separate labels
3a2ebbf4 669 next if $reading->id eq $reading->text;
d4b75f44 670 my $rattrs;
30f0df34 671 my $label = $reading->text;
629e27b0 672 $label .= '-' if $reading->join_next;
673 $label = "-$label" if $reading->join_prior;
8f9cab7b 674 $label =~ s/\"/\\\"/g;
d4b75f44 675 $rattrs->{'label'} = $label;
10e4b1ac 676 $rattrs->{'id'} = $reading->id;
0ecb975c 677 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 678 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 679 }
3a2ebbf4 680
30ddc24c 681 # Add the real edges. Need to weight one edge per rank jump, in a
682 # continuous line.
b365fbae 683 # my $weighted = $self->_add_edge_weights;
b22576c6 684 my @edges = $self->paths;
3bdec618 685 my( %substart, %subend );
b22576c6 686 foreach my $edge ( @edges ) {
687 # Do we need to output this edge?
508fd430 688 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 689 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 690 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 691
b22576c6 692 # Account for the rank gap if necessary
30ddc24c 693 my $rank0 = $self->reading( $edge->[0] )->rank
694 if $self->reading( $edge->[0] )->has_rank;
695 my $rank1 = $self->reading( $edge->[1] )->rank
696 if $self->reading( $edge->[1] )->has_rank;
697 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
698 $variables->{'minlen'} = $rank1 - $rank0;
699 }
700
701 # Add the calculated edge weights
b365fbae 702 # if( exists $weighted->{$edge->[0]}
e247aad1 703 # && $weighted->{$edge->[0]} eq $edge->[1] ) {
704 # # $variables->{'color'} = 'red';
705 # $variables->{'weight'} = 3.0;
706 # }
30ddc24c 707
508fd430 708 # EXPERIMENTAL: make edge width reflect no. of witnesses
709 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
710 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
711
f13b5582 712 my $varopts = _dot_attr_string( $variables );
713 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
714 $edge->[0], $edge->[1], $varopts );
3bdec618 715 } elsif( $used{$edge->[0]} ) {
716 $subend{$edge->[0]} = 1;
717 } elsif( $used{$edge->[1]} ) {
718 $substart{$edge->[1]} = 1;
b22576c6 719 }
df6d9812 720 }
3bdec618 721 # Add substitute start and end edges if necessary
722 foreach my $node ( keys %substart ) {
027d819c 723 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 724 my $variables = { %edge_attrs, 'label' => $witstr };
725 my $varopts = _dot_attr_string( $variables );
43c94341 726 $dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;";
3bdec618 727 }
728 foreach my $node ( keys %subend ) {
027d819c 729 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 730 my $variables = { %edge_attrs, 'label' => $witstr };
731 my $varopts = _dot_attr_string( $variables );
43c94341 732 $dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;";
3bdec618 733 }
b365fbae 734 # HACK part 2
735 if( $STRAIGHTENHACK ) {
43c94341 736 my $endlabel = $endrank ? '__SUBEND__' : '__END__';
737 $dot .= "\t\"$endlabel\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
b365fbae 738 }
30ddc24c 739
df6d9812 740 $dot .= "}\n";
741 return $dot;
742}
743
f13b5582 744sub _dot_attr_string {
745 my( $hash ) = @_;
746 my @attrs;
747 foreach my $k ( sort keys %$hash ) {
748 my $v = $hash->{$k};
749 push( @attrs, $k.'="'.$v.'"' );
750 }
751 return( '[ ' . join( ', ', @attrs ) . ' ]' );
752}
753
30ddc24c 754sub _add_edge_weights {
755 my $self = shift;
756 # Walk the graph from START to END, choosing the successor node with
757 # the largest number of witness paths each time.
758 my $weighted = {};
759 my $curr = $self->start->id;
008fc8a6 760 my $ranked = $self->end->has_rank;
30ddc24c 761 while( $curr ne $self->end->id ) {
008fc8a6 762 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 763 my @succ = sort { $self->path_witnesses( $curr, $a )
764 <=> $self->path_witnesses( $curr, $b ) }
765 $self->sequence->successors( $curr );
766 my $next = pop @succ;
008fc8a6 767 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 768 # Try to avoid lacunae in the weighted path.
008fc8a6 769 while( @succ &&
770 ( $self->reading( $next )->is_lacuna ||
771 $nextrank - $rank > 1 ) ){
30ddc24c 772 $next = pop @succ;
773 }
774 $weighted->{$curr} = $next;
775 $curr = $next;
776 }
777 return $weighted;
778}
779
027d819c 780=head2 path_witnesses( $edge )
781
782Returns the list of sigils whose witnesses are associated with the given edge.
783The edge can be passed as either an array or an arrayref of ( $source, $target ).
784
785=cut
786
3a2ebbf4 787sub path_witnesses {
788 my( $self, @edge ) = @_;
789 # If edge is an arrayref, cope.
790 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
791 my $e = shift @edge;
792 @edge = @$e;
793 }
794 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 795 return @wits;
3a2ebbf4 796}
797
7f9f05e8 798# Helper function. Make a display label for the given witnesses, showing a.c.
799# witnesses only where the main witness is not also in the list.
027d819c 800sub _path_display_label {
508fd430 801 my $self = shift;
7f9f05e8 802 my %wits;
803 map { $wits{$_} = 1 } @_;
804
805 # If an a.c. wit is listed, remove it if the main wit is also listed.
806 # Otherwise keep it for explicit listing.
807 my $aclabel = $self->ac_label;
808 my @disp_ac;
809 foreach my $w ( sort keys %wits ) {
810 if( $w =~ /^(.*)\Q$aclabel\E$/ ) {
811 if( exists $wits{$1} ) {
812 delete $wits{$w};
813 } else {
814 push( @disp_ac, $w );
815 }
816 }
817 }
818
819 # See if we are in a majority situation.
8f9cab7b 820 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
1ff82d4f 821 $maj = $maj > 5 ? $maj : 5;
7f9f05e8 822 if( scalar keys %wits > $maj ) {
823 unshift( @disp_ac, 'majority' );
824 return join( ', ', @disp_ac );
8f9cab7b 825 } else {
7f9f05e8 826 return join( ', ', sort keys %wits );
8f9cab7b 827 }
828}
1dd07bda 829
bf6e338d 830=head2 readings_at_rank( $rank )
1dd07bda 831
bf6e338d 832Returns a list of readings at a given rank, taken from the alignment table.
1dd07bda 833
834=cut
835
bf6e338d 836sub readings_at_rank {
1dd07bda 837 my( $self, $rank ) = @_;
bf6e338d 838 my $table = $self->alignment_table;
839 # Table rank is real rank - 1.
840 my @elements = map { $_->{'tokens'}->[$rank-1] } @{$table->{'alignment'}};
841 my %readings;
842 foreach my $e ( @elements ) {
843 next unless ref( $e ) eq 'HASH';
844 next unless exists $e->{'t'};
845 $readings{$e->{'t'}->id} = $e->{'t'};
846 }
847 return values %readings;
1dd07bda 848}
8f9cab7b 849
4e5a7b2c 850=head2 as_graphml
8e1394aa 851
4e5a7b2c 852Returns a GraphML representation of the collation. The GraphML will contain
853two graphs. The first expresses the attributes of the readings and the witness
854paths that link them; the second expresses the relationships that link the
855readings. This is the native transfer format for a tradition.
8e1394aa 856
56eefa04 857=begin testing
858
859use Text::Tradition;
860
861my $READINGS = 311;
862my $PATHS = 361;
863
864my $datafile = 't/data/florilegium_tei_ps.xml';
865my $tradition = Text::Tradition->new( 'input' => 'TEI',
866 'name' => 'test0',
867 'file' => $datafile,
868 'linear' => 1 );
869
870ok( $tradition, "Got a tradition object" );
871is( scalar $tradition->witnesses, 13, "Found all witnesses" );
872ok( $tradition->collation, "Tradition has a collation" );
873
874my $c = $tradition->collation;
875is( scalar $c->readings, $READINGS, "Collation has all readings" );
876is( scalar $c->paths, $PATHS, "Collation has all paths" );
877is( scalar $c->relationships, 0, "Collation has all relationships" );
878
879# Add a few relationships
880$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
881$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
882$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
883
884# Now write it to GraphML and parse it again.
885
886my $graphml = $c->as_graphml;
887my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
888is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
889is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
890is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
891
2a812726 892# Now add a stemma, write to GraphML, and parse again.
893my $stemma = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
894is( ref( $stemma ), 'Text::Tradition::Stemma', "Parsed dotfile into stemma" );
895is( $tradition->stemmata, 1, "Tradition now has the stemma" );
896$graphml = $c->as_graphml;
897like( $graphml, qr/digraph/, "Digraph declaration exists in GraphML" );
898
56eefa04 899=end testing
900
8e1394aa 901=cut
902
903sub as_graphml {
a30ca502 904 my( $self, $options ) = @_;
3d14b48e 905 $self->calculate_ranks unless $self->_graphcalc_done;
906
a30ca502 907 my $start = $options->{'from'}
908 ? $self->reading( $options->{'from'} ) : $self->start;
909 my $end = $options->{'to'}
910 ? $self->reading( $options->{'to'} ) : $self->end;
911 if( $start->has_rank && $end->has_rank && $end->rank < $start->rank ) {
912 throw( 'Start node must be before end node' );
913 }
914 # The readings need to be ranked for this to work.
915 $start = $self->start unless $start->has_rank;
916 $end = $self->end unless $end->has_rank;
414cc046 917 my $rankoffset = 0;
918 unless( $start eq $self->start ) {
919 $rankoffset = $start->rank - 1;
920 }
a30ca502 921 my %use_readings;
922
8e1394aa 923 # Some namespaces
924 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
925 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
926 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 927 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 928
929 # Create the document and root node
930 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
931 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
932 $graphml->setDocumentElement( $root );
933 $root->setNamespace( $xsi_ns, 'xsi', 0 );
934 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
bbd064a9 935
936 # List of attribute types to save on our objects and their corresponding
937 # GraphML types
938 my %save_types = (
939 'Str' => 'string',
940 'Int' => 'int',
941 'Bool' => 'boolean',
10e4b1ac 942 'ReadingID' => 'string',
bbd064a9 943 'RelationshipType' => 'string',
944 'RelationshipScope' => 'string',
945 );
946
947 # List of attribute names *not* to save on our objects.
948 # We will also not save any attribute beginning with _.
949 my %skipsave;
950 map { $skipsave{$_} = 1 } qw/ cached_svg /;
8e1394aa 951
bbd064a9 952 # Add the data keys for the graph. Include an extra key 'version' for the
953 # GraphML output version.
e309421a 954 my %graph_data_keys;
955 my $gdi = 0;
bbd064a9 956 my %graph_attributes = ( 'version' => 'string' );
957 # Graph attributes include those of Tradition and those of Collation.
958 my %gattr_from;
959 my $tmeta = $self->tradition->meta;
960 my $cmeta = $self->meta;
961 map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
962 map { $gattr_from{$_->name} = 'Collation' } $cmeta->get_all_attributes;
963 foreach my $attr ( ( $tmeta->get_all_attributes, $cmeta->get_all_attributes ) ) {
964 next if $attr->name =~ /^_/;
965 next if $skipsave{$attr->name};
966 next unless $save_types{$attr->type_constraint->name};
967 $graph_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
968 }
2a812726 969 # Extra custom key for the tradition stemma(ta)
970 $graph_attributes{'stemmata'} = 'string';
bbd064a9 971
972 foreach my $datum ( sort keys %graph_attributes ) {
e309421a 973 $graph_data_keys{$datum} = 'dg'.$gdi++;
974 my $key = $root->addNewChild( $graphml_ns, 'key' );
975 $key->setAttribute( 'attr.name', $datum );
bbd064a9 976 $key->setAttribute( 'attr.type', $graph_attributes{$datum} );
e309421a 977 $key->setAttribute( 'for', 'graph' );
978 $key->setAttribute( 'id', $graph_data_keys{$datum} );
979 }
f6066bac 980
bbd064a9 981 # Add the data keys for reading nodes
982 my %reading_attributes;
983 my $rmeta = Text::Tradition::Collation::Reading->meta;
984 foreach my $attr( $rmeta->get_all_attributes ) {
985 next if $attr->name =~ /^_/;
986 next if $skipsave{$attr->name};
987 next unless $save_types{$attr->type_constraint->name};
988 $reading_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
989 }
7cd9f181 990 # Extra custom key for the reading morphology
991 $reading_attributes{'lexemes'} = 'string';
992
ef9d481f 993 my %node_data_keys;
994 my $ndi = 0;
bbd064a9 995 foreach my $datum ( sort keys %reading_attributes ) {
910a0a6d 996 $node_data_keys{$datum} = 'dn'.$ndi++;
997 my $key = $root->addNewChild( $graphml_ns, 'key' );
998 $key->setAttribute( 'attr.name', $datum );
bbd064a9 999 $key->setAttribute( 'attr.type', $reading_attributes{$datum} );
910a0a6d 1000 $key->setAttribute( 'for', 'node' );
1001 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 1002 }
1003
bbd064a9 1004 # Add the data keys for edges, that is, paths and relationships. Path
1005 # data does not come from a Moose class so is here manually.
ef9d481f 1006 my $edi = 0;
1007 my %edge_data_keys;
bbd064a9 1008 my %edge_attributes = (
3a2ebbf4 1009 witness => 'string', # ID/label for a path
3a2ebbf4 1010 extra => 'boolean', # Path key
3a2ebbf4 1011 );
bbd064a9 1012 my @path_attributes = keys %edge_attributes; # track our manual additions
1013 my $pmeta = Text::Tradition::Collation::Relationship->meta;
1014 foreach my $attr( $pmeta->get_all_attributes ) {
1015 next if $attr->name =~ /^_/;
1016 next if $skipsave{$attr->name};
1017 next unless $save_types{$attr->type_constraint->name};
1018 $edge_attributes{$attr->name} = $save_types{$attr->type_constraint->name};
1019 }
1020 foreach my $datum ( sort keys %edge_attributes ) {
3a2ebbf4 1021 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 1022 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 1023 $key->setAttribute( 'attr.name', $datum );
bbd064a9 1024 $key->setAttribute( 'attr.type', $edge_attributes{$datum} );
910a0a6d 1025 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 1026 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 1027 }
3a2ebbf4 1028
cc31ebaa 1029 # Add the collation graph itself. First, sanitize the name to a valid XML ID.
1030 my $xmlidname = $self->tradition->name;
1031 $xmlidname =~ s/(?!$xml10_namechar_rx)./_/g;
1032 if( $xmlidname !~ /^$xml10_namestartchar_rx/ ) {
1033 $xmlidname = '_'.$xmlidname;
1034 }
2c669bca 1035 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
1036 $sgraph->setAttribute( 'edgedefault', 'directed' );
cc31ebaa 1037 $sgraph->setAttribute( 'id', $xmlidname );
2c669bca 1038 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1039 $sgraph->setAttribute( 'parse.edges', 0 ); # fill in later
2c669bca 1040 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1041 $sgraph->setAttribute( 'parse.nodes', 0 ); # fill in later
2c669bca 1042 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 1043
2a812726 1044 # Tradition/collation attribute data
bbd064a9 1045 foreach my $datum ( keys %graph_attributes ) {
1046 my $value;
1047 if( $datum eq 'version' ) {
2a812726 1048 $value = '3.2';
1049 } elsif( $datum eq 'stemmata' ) {
1050 my @stemstrs;
1051 map { push( @stemstrs, $_->editable( {linesep => ''} ) ) }
1052 $self->tradition->stemmata;
1053 $value = join( "\n", @stemstrs );
bbd064a9 1054 } elsif( $gattr_from{$datum} eq 'Tradition' ) {
1055 $value = $self->tradition->$datum;
1056 } else {
1057 $value = $self->$datum;
1058 }
2c669bca 1059 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 1060 }
8e1394aa 1061
1062 my $node_ctr = 0;
1063 my %node_hash;
22222af9 1064 # Add our readings to the graph
3a2ebbf4 1065 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
a30ca502 1066 next if $n->has_rank && $n ne $self->start && $n ne $self->end &&
1067 ( $n->rank < $start->rank || $n->rank > $end->rank );
1068 $use_readings{$n->id} = 1;
2c669bca 1069 # Add to the main graph
1070 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 1071 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 1072 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 1073 $node_el->setAttribute( 'id', $node_xmlid );
bbd064a9 1074 foreach my $d ( keys %reading_attributes ) {
255875b8 1075 my $nval = $n->$d;
7cd9f181 1076 # Custom serialization
1077 if( $d eq 'lexemes' ) {
1078 # If nval is a true value, we have lexemes so we need to
1079 # serialize them. Otherwise set nval to undef so that the
1080 # key is excluded from this reading.
1081 $nval = $nval ? $n->_serialize_lexemes : undef;
1082 }
cc31ebaa 1083 if( $rankoffset && $d eq 'rank' && $n ne $self->start ) {
414cc046 1084 # Adjust the ranks within the subgraph.
cc31ebaa 1085 $nval = $n eq $self->end ? $end->rank - $rankoffset + 1
1086 : $nval - $rankoffset;
414cc046 1087 }
255875b8 1088 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
1089 if defined $nval;
1090 }
b15511bf 1091 }
1092
2c669bca 1093 # Add the path edges to the sequence graph
df6d9812 1094 my $edge_ctr = 0;
3a2ebbf4 1095 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
1096 # We add an edge in the graphml for every witness in $e.
a30ca502 1097 next unless( $use_readings{$e->[0]} || $use_readings{$e->[1]} );
1098 my @edge_wits = sort $self->path_witnesses( $e );
cc31ebaa 1099 $e->[0] = $self->start->id unless $use_readings{$e->[0]};
1100 $e->[1] = $self->end->id unless $use_readings{$e->[1]};
1101 # Skip any path from start to end; that witness is not in the subgraph.
1102 next if ( $e->[0] eq $self->start->id && $e->[1] eq $self->end->id );
a30ca502 1103 foreach my $wit ( @edge_wits ) {
3a2ebbf4 1104 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
1105 $node_hash{ $e->[0] },
1106 $node_hash{ $e->[1] } );
2c669bca 1107 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 1108 $edge_el->setAttribute( 'source', $from );
1109 $edge_el->setAttribute( 'target', $to );
1110 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 1111
1112 # It's a witness path, so add the witness
1113 my $base = $wit;
1114 my $key = $edge_data_keys{'witness'};
1115 # Is this an ante-corr witness?
1116 my $aclabel = $self->ac_label;
1117 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
1118 # Keep the base witness
1119 $base = $1;
1120 # ...and record that this is an 'extra' reading path
1121 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
1122 }
1123 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
1124 }
1125 }
1126
cc31ebaa 1127 # Report the actual number of nodes and edges that went in
1128 $sgraph->setAttribute( 'parse.edges', $edge_ctr );
1129 $sgraph->setAttribute( 'parse.nodes', $node_ctr );
1130
22222af9 1131 # Add the relationship graph to the XML
bbd064a9 1132 map { delete $edge_data_keys{$_} } @path_attributes;
826d8773 1133 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
1134 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 1135
94c00c71 1136 # Save and return the thing
1137 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 1138 return $result;
df6d9812 1139}
1140
b15511bf 1141sub _add_graphml_data {
1142 my( $el, $key, $value ) = @_;
b15511bf 1143 return unless defined $value;
c9bf3dbf 1144 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 1145 $data_el->setAttribute( 'key', $key );
1146 $data_el->appendText( $value );
8e1394aa 1147}
1148
4e5a7b2c 1149=head2 as_csv
910a0a6d 1150
1151Returns a CSV alignment table representation of the collation graph, one
2c669bca 1152row per witness (or witness uncorrected.)
910a0a6d 1153
1154=cut
1155
1156sub as_csv {
3a2ebbf4 1157 my( $self ) = @_;
1dd07bda 1158 my $table = $self->alignment_table;
82fa4d57 1159 my $csv = Text::CSV->new( { binary => 1, quote_null => 0 } );
910a0a6d 1160 my @result;
2c669bca 1161 # Make the header row
1162 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
1163 push( @result, decode_utf8( $csv->string ) );
1164 # Make the rest of the rows
1165 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 1166 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
1dd07bda 1167 my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
2c669bca 1168 $csv->combine( @row );
910a0a6d 1169 push( @result, decode_utf8( $csv->string ) );
1170 }
3a2ebbf4 1171 return join( "\n", @result );
910a0a6d 1172}
1173
1dd07bda 1174=head2 alignment_table( $use_refs, $include_witnesses )
2c669bca 1175
566f4595 1176Return a reference to an alignment table, in a slightly enhanced CollateX
1177format which looks like this:
1178
1179 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 1180 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1181 { witness => "SIG2",
4e5a7b2c 1182 tokens => [ { t => "TEXT" }, ... ] },
566f4595 1183 ... ],
1184 length => TEXTLEN };
1185
1186If $use_refs is set to 1, the reading object is returned in the table
1187instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 1188
1189If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 1190keys have a true hash value will be included.
2c669bca 1191
1192=cut
9f3ba6f7 1193
1dd07bda 1194sub alignment_table {
1195 my( $self ) = @_;
c1915ab9 1196 $self->calculate_ranks() unless $self->_graphcalc_done;
1dd07bda 1197 return $self->cached_table if $self->has_cached_table;
1198
0ecb975c 1199 # Make sure we can do this
1200 throw( "Need a linear graph in order to make an alignment table" )
1201 unless $self->linear;
1202 $self->calculate_ranks unless $self->end->has_rank;
1203
2c669bca 1204 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 1205 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 1206 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
eca16057 1207 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 1208 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1dd07bda 1209 my @row = _make_witness_row( \@wit_path, \@all_pos );
2c669bca 1210 push( @{$table->{'alignment'}},
1211 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 1212 if( $wit->is_layered ) {
1213 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 1214 $wit->sigil.$self->ac_label );
1dd07bda 1215 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos );
2c669bca 1216 push( @{$table->{'alignment'}},
1217 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 1218 }
1219 }
1dd07bda 1220 $self->cached_table( $table );
1221 return $table;
910a0a6d 1222}
1223
1224sub _make_witness_row {
1dd07bda 1225 my( $path, $positions ) = @_;
910a0a6d 1226 my %char_hash;
1227 map { $char_hash{$_} = undef } @$positions;
2c669bca 1228 my $debug = 0;
910a0a6d 1229 foreach my $rdg ( @$path ) {
eca16057 1230 my $rtext = $rdg->text;
1231 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 1232 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 1233 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
1dd07bda 1234 $char_hash{$rdg->rank} = { 't' => $rdg };
910a0a6d 1235 }
1236 my @row = map { $char_hash{$_} } @$positions;
eca16057 1237 # Fill in lacuna markers for undef spots in the row
1238 my $last_el = shift @row;
1239 my @filled_row = ( $last_el );
1240 foreach my $el ( @row ) {
0e476982 1241 # If we are using node reference, make the lacuna node appear many times
1242 # in the table. If not, use the lacuna tag.
1dd07bda 1243 if( $last_el && $last_el->{'t'}->is_lacuna && !defined $el ) {
1244 $el = $last_el;
eca16057 1245 }
1246 push( @filled_row, $el );
1247 $last_el = $el;
1248 }
1249 return @filled_row;
910a0a6d 1250}
1251
4e5a7b2c 1252=head1 NAVIGATION METHODS
910a0a6d 1253
4e5a7b2c 1254=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 1255
1256Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 1257with $last, for the witness given in $sigil. If a $backup sigil is
1258specified (e.g. when walking a layered witness), it will be used wherever
1259no $sigil path exists. If there is a base text reading, that will be
1260used wherever no path exists for $sigil or $backup.
e2902068 1261
1262=cut
1263
910a0a6d 1264# TODO Think about returning some lazy-eval iterator.
b0b4421a 1265# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1266
e2902068 1267sub reading_sequence {
861c3e27 1268 my( $self, $start, $end, $witness ) = @_;
e2902068 1269
930ff666 1270 $witness = $self->baselabel unless $witness;
e2902068 1271 my @readings = ( $start );
1272 my %seen;
1273 my $n = $start;
3a2ebbf4 1274 while( $n && $n->id ne $end->id ) {
1275 if( exists( $seen{$n->id} ) ) {
63778331 1276 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1277 }
3a2ebbf4 1278 $seen{$n->id} = 1;
910a0a6d 1279
861c3e27 1280 my $next = $self->next_reading( $n, $witness );
44771cf2 1281 unless( $next ) {
63778331 1282 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1283 }
910a0a6d 1284 push( @readings, $next );
1285 $n = $next;
e2902068 1286 }
1287 # Check that the last reading is our end reading.
1288 my $last = $readings[$#readings];
63778331 1289 throw( "Last reading found from " . $start->text .
1290 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1291 unless $last->id eq $end->id;
e2902068 1292
1293 return @readings;
1294}
1295
4e5a7b2c 1296=head2 next_reading( $reading, $sigil );
8e1394aa 1297
4a8828f0 1298Returns the reading that follows the given reading along the given witness
930ff666 1299path.
8e1394aa 1300
1301=cut
1302
4a8828f0 1303sub next_reading {
e2902068 1304 # Return the successor via the corresponding path.
8e1394aa 1305 my $self = shift;
3a2ebbf4 1306 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1307 return undef unless $answer;
3a2ebbf4 1308 return $self->reading( $answer );
8e1394aa 1309}
1310
4e5a7b2c 1311=head2 prior_reading( $reading, $sigil )
8e1394aa 1312
4a8828f0 1313Returns the reading that precedes the given reading along the given witness
930ff666 1314path.
8e1394aa 1315
1316=cut
1317
4a8828f0 1318sub prior_reading {
e2902068 1319 # Return the predecessor via the corresponding path.
8e1394aa 1320 my $self = shift;
3a2ebbf4 1321 my $answer = $self->_find_linked_reading( 'prior', @_ );
1322 return $self->reading( $answer );
8e1394aa 1323}
1324
4a8828f0 1325sub _find_linked_reading {
861c3e27 1326 my( $self, $direction, $node, $path ) = @_;
1327
1328 # Get a backup if we are dealing with a layered witness
1329 my $alt_path;
1330 my $aclabel = $self->ac_label;
1331 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1332 $alt_path = $1;
1333 }
1334
e2902068 1335 my @linked_paths = $direction eq 'next'
3a2ebbf4 1336 ? $self->sequence->edges_from( $node )
1337 : $self->sequence->edges_to( $node );
e2902068 1338 return undef unless scalar( @linked_paths );
8e1394aa 1339
e2902068 1340 # We have to find the linked path that contains all of the
1341 # witnesses supplied in $path.
1342 my( @path_wits, @alt_path_wits );
4e5a7b2c 1343 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1344 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1345 my $base_le;
1346 my $alt_le;
1347 foreach my $le ( @linked_paths ) {
3a2ebbf4 1348 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1349 $base_le = $le;
910a0a6d 1350 }
508fd430 1351 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1352 if( _is_within( \@path_wits, \@le_wits ) ) {
1353 # This is the right path.
1354 return $direction eq 'next' ? $le->[1] : $le->[0];
1355 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1356 $alt_le = $le;
1357 }
8e1394aa 1358 }
e2902068 1359 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1360 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1361 if $alt_le;
e2902068 1362
1363 # Got this far? Return the base path if it exists.
3a2ebbf4 1364 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1365 if $base_le;
e2902068 1366
1367 # Got this far? We have no appropriate path.
2c669bca 1368 warn "Could not find $direction node from " . $node->id
910a0a6d 1369 . " along path $path";
8e1394aa 1370 return undef;
1371}
1372
4a8828f0 1373# Some set logic.
1374sub _is_within {
1375 my( $set1, $set2 ) = @_;
7854e12e 1376 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1377 foreach my $el ( @$set1 ) {
910a0a6d 1378 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1379 }
1380 return $ret;
1381}
1382
4e5a7b2c 1383# Return the string that joins together a list of witnesses for
1384# display on a single path.
1385sub _witnesses_of_label {
1386 my( $self, $label ) = @_;
1387 my $regex = $self->wit_list_separator;
1388 my @answer = split( /\Q$regex\E/, $label );
1389 return @answer;
b0b4421a 1390}
1391
d4b75f44 1392=head2 common_readings
1393
1394Returns the list of common readings in the graph (i.e. those readings that are
1395shared by all non-lacunose witnesses.)
1396
1397=cut
1398
1399sub common_readings {
1400 my $self = shift;
1401 my @common = grep { $_->is_common } $self->readings;
1402 return @common;
1403}
1404
fae52efd 1405=head2 path_text( $sigil, [, $start, $end ] )
b0b4421a 1406
1407Returns the text of a witness (plus its backup, if we are using a layer)
1408as stored in the collation. The text is returned as a string, where the
1409individual readings are joined with spaces and the meta-readings (e.g.
1410lacunae) are omitted. Optional specification of $start and $end allows
1411the generation of a subset of the witness text.
4e5a7b2c 1412
b0b4421a 1413=cut
1414
1415sub path_text {
861c3e27 1416 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1417 $start = $self->start unless $start;
1418 $end = $self->end unless $end;
861c3e27 1419 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
629e27b0 1420 my $pathtext = '';
1421 my $last;
1422 foreach my $r ( @path ) {
6ad2ce78 1423 unless ( $r->join_prior || !$last || $last->join_next ) {
1424 $pathtext .= ' ';
1425 }
1426 $pathtext .= $r->text;
629e27b0 1427 $last = $r;
1428 }
1429 return $pathtext;
b0b4421a 1430}
4e5a7b2c 1431
1432=head1 INITIALIZATION METHODS
1433
1434These are mostly for use by parsers.
1435
1436=head2 make_witness_path( $witness )
1437
1438Link the array of readings contained in $witness->path (and in
1439$witness->uncorrected_path if it exists) into collation paths.
1440Clear out the arrays when finished.
de51424a 1441
4e5a7b2c 1442=head2 make_witness_paths
1443
1444Call make_witness_path for all witnesses in the tradition.
1445
1446=cut
930ff666 1447
7e450e44 1448# For use when a collation is constructed from a base text and an apparatus.
1449# We have the sequences of readings and just need to add path edges.
1f7aa795 1450# When we are done, clear out the witness path attributes, as they are no
1451# longer needed.
1452# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1453
6a222840 1454sub make_witness_paths {
1455 my( $self ) = @_;
910a0a6d 1456 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1457 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1458 $self->make_witness_path( $wit );
7854e12e 1459 }
7854e12e 1460}
1461
6a222840 1462sub make_witness_path {
7854e12e 1463 my( $self, $wit ) = @_;
1464 my @chain = @{$wit->path};
15d2d3df 1465 my $sig = $wit->sigil;
fae52efd 1466 # Add start and end if necessary
1467 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1468 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
7854e12e 1469 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1470 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1471 }
1f7aa795 1472 if( $wit->is_layered ) {
d9e873d0 1473 @chain = @{$wit->uncorrected_path};
fae52efd 1474 unshift( @chain, $self->start ) unless $chain[0] eq $self->start;
1475 push( @chain, $self->end ) unless $chain[-1] eq $self->end;
d9e873d0 1476 foreach my $idx( 0 .. $#chain-1 ) {
1477 my $source = $chain[$idx];
1478 my $target = $chain[$idx+1];
1479 $self->add_path( $source, $target, $sig.$self->ac_label )
1480 unless $self->has_path( $source, $target, $sig );
1481 }
15d2d3df 1482 }
1f7aa795 1483 $wit->clear_path;
1484 $wit->clear_uncorrected_path;
e2902068 1485}
1486
4e5a7b2c 1487=head2 calculate_ranks
1488
1489Calculate the reading ranks (that is, their aligned positions relative
1490to each other) for the graph. This can only be called on linear collations.
1491
b365fbae 1492=begin testing
1493
1494use Text::Tradition;
1495
1496my $cxfile = 't/data/Collatex-16.xml';
1497my $t = Text::Tradition->new(
1498 'name' => 'inline',
1499 'input' => 'CollateX',
1500 'file' => $cxfile,
1501 );
1502my $c = $t->collation;
1503
1504# Make an svg
bfcbcecb 1505my $table = $c->alignment_table;
1506ok( $c->has_cached_table, "Alignment table was cached" );
1507is( $c->alignment_table, $table, "Cached table returned upon second call" );
b365fbae 1508$c->calculate_ranks;
bfcbcecb 1509is( $c->alignment_table, $table, "Cached table retained with no rank change" );
679f17e1 1510$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
bfcbcecb 1511isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
b365fbae 1512
1513=end testing
1514
4e5a7b2c 1515=cut
1516
910a0a6d 1517sub calculate_ranks {
1518 my $self = shift;
b365fbae 1519 # Save the existing ranks, in case we need to invalidate the cached SVG.
1520 my %existing_ranks;
ac4d7ac2 1521 map { $existing_ranks{$_} = $_->rank } $self->readings;
359944f7 1522
1523 # Do the rankings based on the relationship equivalence graph, starting
1524 # with the start node.
1525 my $topo_start = $self->equivalence( $self->start->id );
c9bf3dbf 1526 my $node_ranks = { $topo_start => 0 };
910a0a6d 1527 my @curr_origin = ( $topo_start );
1528 # A little iterative function.
1529 while( @curr_origin ) {
359944f7 1530 @curr_origin = _assign_rank( $self->equivalence_graph,
1531 $node_ranks, @curr_origin );
910a0a6d 1532 }
1533 # Transfer our rankings from the topological graph to the real one.
1534 foreach my $r ( $self->readings ) {
359944f7 1535 if( defined $node_ranks->{$self->equivalence( $r->id )} ) {
1536 $r->rank( $node_ranks->{$self->equivalence( $r->id )} );
67da8d6c 1537 } else {
63778331 1538 # Die. Find the last rank we calculated.
359944f7 1539 my @all_defined = sort { ( $node_ranks->{$self->equivalence( $a->id )}||-1 )
1540 <=> ( $node_ranks->{$self->equivalence( $b->id )}||-1 ) }
63778331 1541 $self->readings;
1542 my $last = pop @all_defined;
1543 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1544 }
de51424a 1545 }
bfcbcecb 1546 # Do we need to invalidate the cached data?
1547 if( $self->has_cached_svg || $self->has_cached_table ) {
b365fbae 1548 foreach my $r ( $self->readings ) {
7c293912 1549 next if defined( $existing_ranks{$r} )
1550 && $existing_ranks{$r} == $r->rank;
c1915ab9 1551 # Something has changed, so clear the cache
bfcbcecb 1552 $self->_clear_cache;
c1915ab9 1553 # ...and recalculate the common readings.
1554 $self->calculate_common_readings();
b365fbae 1555 last;
1556 }
1557 }
c1915ab9 1558 # The graph calculation information is now up to date.
1559 $self->_graphcalc_done(1);
8e1394aa 1560}
3a1f2523 1561
910a0a6d 1562sub _assign_rank {
c9bf3dbf 1563 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1564 # Look at each of the children of @current_nodes. If all the child's
1565 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1566 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1567 # parent gets a rank.
910a0a6d 1568 my @next_nodes;
1569 foreach my $c ( @current_nodes ) {
c9bf3dbf 1570 warn "Current reading $c has no rank!"
1571 unless exists $node_ranks->{$c};
1572 # print STDERR "Looking at child of node $c, rank "
1573 # . $node_ranks->{$c} . "\n";
1574 foreach my $child ( $graph->successors( $c ) ) {
1575 next if exists $node_ranks->{$child};
910a0a6d 1576 my $highest_rank = -1;
1577 my $skip = 0;
c9bf3dbf 1578 foreach my $parent ( $graph->predecessors( $child ) ) {
1579 if( exists $node_ranks->{$parent} ) {
1580 $highest_rank = $node_ranks->{$parent}
1581 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1582 } else {
1583 $skip = 1;
1584 last;
1585 }
1586 }
1587 next if $skip;
c9bf3dbf 1588 my $c_rank = $highest_rank + 1;
1589 # print STDERR "Assigning rank $c_rank to node $child \n";
1590 $node_ranks->{$child} = $c_rank;
910a0a6d 1591 push( @next_nodes, $child );
1592 }
1593 }
1594 return @next_nodes;
4cdd82f1 1595}
910a0a6d 1596
c1915ab9 1597sub _clear_cache {
1598 my $self = shift;
1599 $self->wipe_svg if $self->has_cached_svg;
1600 $self->wipe_table if $self->has_cached_table;
1601}
1602
1603
4e5a7b2c 1604=head2 flatten_ranks
1605
1606A convenience method for parsing collation data. Searches the graph for readings
1607with the same text at the same rank, and merges any that are found.
1608
1609=cut
1610
0e476982 1611sub flatten_ranks {
1612 my $self = shift;
1613 my %unique_rank_rdg;
bf6e338d 1614 my $changed;
0e476982 1615 foreach my $rdg ( $self->readings ) {
1616 next unless $rdg->has_rank;
1617 my $key = $rdg->rank . "||" . $rdg->text;
1618 if( exists $unique_rank_rdg{$key} ) {
1619 # Combine!
56eefa04 1620 # print STDERR "Combining readings at same rank: $key\n";
bf6e338d 1621 $changed = 1;
0e476982 1622 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1623 # TODO see if this now makes a common point.
0e476982 1624 } else {
1625 $unique_rank_rdg{$key} = $rdg;
1626 }
1627 }
bf6e338d 1628 # If we merged readings, the ranks are still fine but the alignment
1629 # table is wrong. Wipe it.
1630 $self->wipe_table() if $changed;
0e476982 1631}
4633f9e4 1632
1633
d4b75f44 1634=head2 calculate_common_readings
1635
1636Goes through the graph identifying the readings that appear in every witness
1637(apart from those with lacunae at that spot.) Marks them as common and returns
1638the list.
1639
1640=begin testing
1641
1642use Text::Tradition;
1643
1644my $cxfile = 't/data/Collatex-16.xml';
1645my $t = Text::Tradition->new(
1646 'name' => 'inline',
1647 'input' => 'CollateX',
1648 'file' => $cxfile,
1649 );
1650my $c = $t->collation;
1651
1652my @common = $c->calculate_common_readings();
1653is( scalar @common, 8, "Found correct number of common readings" );
1654my @marked = sort $c->common_readings();
1655is( scalar @common, 8, "All common readings got marked as such" );
679f17e1 1656my @expected = qw/ n1 n11 n16 n19 n20 n5 n6 n7 /;
d4b75f44 1657is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1658
1659=end testing
1660
1661=cut
1662
1663sub calculate_common_readings {
1664 my $self = shift;
1665 my @common;
c1915ab9 1666 map { $_->is_common( 0 ) } $self->readings;
1667 # Implicitly calls calculate_ranks
1dd07bda 1668 my $table = $self->alignment_table;
d4b75f44 1669 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
7f52eac8 1670 my @row = map { $_->{'tokens'}->[$idx]
1671 ? $_->{'tokens'}->[$idx]->{'t'} : '' }
1672 @{$table->{'alignment'}};
d4b75f44 1673 my %hash;
1674 foreach my $r ( @row ) {
1675 if( $r ) {
1676 $hash{$r->id} = $r unless $r->is_meta;
1677 } else {
1678 $hash{'UNDEF'} = $r;
1679 }
1680 }
1681 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1682 my( $r ) = values %hash;
1683 $r->is_common( 1 );
1684 push( @common, $r );
1685 }
1686 }
1687 return @common;
1688}
1689
861c3e27 1690=head2 text_from_paths
1691
1692Calculate the text array for all witnesses from the path, for later consistency
1693checking. Only to be used if there is no non-graph-based way to know the
1694original texts.
1695
1696=cut
1697
1698sub text_from_paths {
1699 my $self = shift;
1700 foreach my $wit ( $self->tradition->witnesses ) {
5164a6f0 1701 my @readings = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
1702 my @text;
1703 foreach my $r ( @readings ) {
1704 next if $r->is_meta;
1705 push( @text, $r->text );
1706 }
861c3e27 1707 $wit->text( \@text );
1708 if( $wit->is_layered ) {
5164a6f0 1709 my @ucrdgs = $self->reading_sequence( $self->start, $self->end,
1710 $wit->sigil.$self->ac_label );
1711 my @uctext;
1712 foreach my $r ( @ucrdgs ) {
1713 next if $r->is_meta;
1714 push( @uctext, $r->text );
1715 }
1716 $wit->layertext( \@uctext );
861c3e27 1717 }
1718 }
1719}
0e476982 1720
4e5a7b2c 1721=head1 UTILITY FUNCTIONS
1722
1723=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1724
4e5a7b2c 1725Find the last reading that occurs in sequence before both the given readings.
414cc046 1726At the very least this should be $self->start.
4e5a7b2c 1727
1728=head2 common_successor( $reading_a, $reading_b )
1729
1730Find the first reading that occurs in sequence after both the given readings.
414cc046 1731At the very least this should be $self->end.
4e5a7b2c 1732
22222af9 1733=begin testing
1734
1735use Text::Tradition;
1736
1737my $cxfile = 't/data/Collatex-16.xml';
1738my $t = Text::Tradition->new(
1739 'name' => 'inline',
1740 'input' => 'CollateX',
1741 'file' => $cxfile,
1742 );
1743my $c = $t->collation;
1744
679f17e1 1745is( $c->common_predecessor( 'n24', 'n23' )->id,
22222af9 1746 'n20', "Found correct common predecessor" );
679f17e1 1747is( $c->common_successor( 'n24', 'n23' )->id,
10e4b1ac 1748 '__END__', "Found correct common successor" );
22222af9 1749
4e5a7b2c 1750is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1751 'n16', "Found correct common predecessor for readings on same path" );
679f17e1 1752is( $c->common_successor( 'n21', 'n10' )->id,
10e4b1ac 1753 '__END__', "Found correct common successor for readings on same path" );
22222af9 1754
1755=end testing
1756
1757=cut
1758
1759## Return the closest reading that is a predecessor of both the given readings.
1760sub common_predecessor {
1761 my $self = shift;
4e5a7b2c 1762 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1763 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1764}
1765
1766sub common_successor {
1767 my $self = shift;
4e5a7b2c 1768 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1769 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1770}
1771
414cc046 1772
1773# TODO think about how to do this without ranks...
027d819c 1774sub _common_in_path {
22222af9 1775 my( $self, $r1, $r2, $dir ) = @_;
414cc046 1776 my $iter = $self->end->rank;
22222af9 1777 my @candidates;
414cc046 1778 my @last_r1 = ( $r1 );
1779 my @last_r2 = ( $r2 );
1780 # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
22222af9 1781 my %all_seen;
414cc046 1782 # print STDERR "Finding common $dir for $r1, $r2\n";
22222af9 1783 while( !@candidates ) {
414cc046 1784 last unless $iter--; # Avoid looping infinitely
1785 # Iterate separately down the graph from r1 and r2
1786 my( @new_lc1, @new_lc2 );
1787 foreach my $lc ( @last_r1 ) {
1788 foreach my $p ( $lc->$dir ) {
1789 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
1790 # print STDERR "Path candidate $p from $lc\n";
1791 push( @candidates, $p );
002e3600 1792 } elsif( !$all_seen{$p->id} ) {
414cc046 1793 $all_seen{$p->id} = 'r1';
1794 push( @new_lc1, $p );
1795 }
1796 }
1797 }
1798 foreach my $lc ( @last_r2 ) {
22222af9 1799 foreach my $p ( $lc->$dir ) {
414cc046 1800 if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
1801 # print STDERR "Path candidate $p from $lc\n";
22222af9 1802 push( @candidates, $p );
002e3600 1803 } elsif( !$all_seen{$p->id} ) {
414cc046 1804 $all_seen{$p->id} = 'r2';
1805 push( @new_lc2, $p );
22222af9 1806 }
1807 }
1808 }
414cc046 1809 @last_r1 = @new_lc1;
1810 @last_r2 = @new_lc2;
22222af9 1811 }
1812 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1813 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1814}
1815
63778331 1816sub throw {
1817 Text::Tradition::Error->throw(
1818 'ident' => 'Collation error',
1819 'message' => $_[0],
1820 );
1821}
1822
dd3b58b0 1823no Moose;
1824__PACKAGE__->meta->make_immutable;
e867486f 1825
027d819c 1826=head1 LICENSE
e867486f 1827
027d819c 1828This package is free software and is provided "as is" without express
1829or implied warranty. You can redistribute it and/or modify it under
1830the same terms as Perl itself.
e867486f 1831
027d819c 1832=head1 AUTHOR
e867486f 1833
027d819c 1834Tara L Andrews E<lt>aurum@cpan.orgE<gt>