add relationship deletion functionality
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
63778331 10use Text::Tradition::Error;
df6d9812 11use XML::LibXML;
a344ffcf 12use XML::LibXML::XPathContext;
dd3b58b0 13use Moose;
14
3a2ebbf4 15has 'sequence' => (
d047cd52 16 is => 'ro',
3a2ebbf4 17 isa => 'Graph',
18 default => sub { Graph->new() },
d047cd52 19 handles => {
3a2ebbf4 20 paths => 'edges',
d047cd52 21 },
d047cd52 22 );
3a2ebbf4 23
24has 'relations' => (
25 is => 'ro',
22222af9 26 isa => 'Text::Tradition::Collation::RelationshipStore',
27 handles => {
28 relationships => 'relationships',
29 related_readings => 'related_readings',
ee801e17 30 del_relationship => 'del_relationship',
22222af9 31 },
32 writer => '_set_relations',
3a2ebbf4 33 );
dd3b58b0 34
3a2ebbf4 35has 'tradition' => (
36 is => 'ro',
d047cd52 37 isa => 'Text::Tradition',
8d9a1cd8 38 weak_ref => 1,
d047cd52 39 );
dd3b58b0 40
3a2ebbf4 41has 'readings' => (
42 isa => 'HashRef[Text::Tradition::Collation::Reading]',
43 traits => ['Hash'],
44 handles => {
45 reading => 'get',
46 _add_reading => 'set',
47 del_reading => 'delete',
48 has_reading => 'exists',
49 readings => 'values',
50 },
51 default => sub { {} },
52 );
910a0a6d 53
4a8828f0 54has 'wit_list_separator' => (
7854e12e 55 is => 'rw',
56 isa => 'Str',
57 default => ', ',
58 );
59
60has 'baselabel' => (
61 is => 'rw',
62 isa => 'Str',
63 default => 'base text',
64 );
4a8828f0 65
15d2d3df 66has 'linear' => (
67 is => 'rw',
68 isa => 'Bool',
69 default => 1,
70 );
c84275ff 71
ef9d481f 72has 'ac_label' => (
73 is => 'rw',
74 isa => 'Str',
75 default => ' (a.c.)',
76 );
3a2ebbf4 77
78has 'start' => (
79 is => 'ro',
80 isa => 'Text::Tradition::Collation::Reading',
81 writer => '_set_start',
82 weak_ref => 1,
83 );
84
85has 'end' => (
86 is => 'ro',
87 isa => 'Text::Tradition::Collation::Reading',
88 writer => '_set_end',
89 weak_ref => 1,
90 );
1f563ac3 91
4e5a7b2c 92=head1 NAME
93
94Text::Tradition::Collation - a software model for a text collation
95
96=head1 SYNOPSIS
97
98 use Text::Tradition;
99 my $t = Text::Tradition->new(
100 'name' => 'this is a text',
101 'input' => 'TEI',
102 'file' => '/path/to/tei_parallel_seg_file.xml' );
103
104 my $c = $t->collation;
105 my @readings = $c->readings;
106 my @paths = $c->paths;
107 my @relationships = $c->relationships;
108
109 my $svg_variant_graph = $t->collation->as_svg();
110
111=head1 DESCRIPTION
112
113Text::Tradition is a library for representation and analysis of collated
114texts, particularly medieval ones. The Collation is the central feature of
115a Tradition, where the text, its sequence of readings, and its relationships
116between readings are actually kept.
117
118=head1 CONSTRUCTOR
119
120=head2 new
121
122The constructor. Takes a hash or hashref of the following arguments:
123
124=over
125
126=item * tradition - The Text::Tradition object to which the collation
127belongs. Required.
128
129=item * linear - Whether the collation should be linear; that is, whether
130transposed readings should be treated as two linked readings rather than one,
131and therefore whether the collation graph is acyclic. Defaults to true.
132
4e5a7b2c 133=item * baselabel - The default label for the path taken by a base text
134(if any). Defaults to 'base text'.
135
136=item * wit_list_separator - The string to join a list of witnesses for
137purposes of making labels in display graphs. Defaults to ', '.
138
139=item * ac_label - The extra label to tack onto a witness sigil when
140representing another layer of path for the given witness - that is, when
141a text has more than one possible reading due to scribal corrections or
142the like. Defaults to ' (a.c.)'.
143
144=back
145
146=head1 ACCESSORS
147
148=head2 tradition
149
150=head2 linear
151
4e5a7b2c 152=head2 wit_list_separator
153
154=head2 baselabel
155
156=head2 ac_label
157
158Simple accessors for collation attributes.
159
160=head2 start
161
162The meta-reading at the start of every witness path.
163
164=head2 end
165
166The meta-reading at the end of every witness path.
167
168=head2 readings
169
170Returns all Reading objects in the graph.
171
172=head2 reading( $id )
173
174Returns the Reading object corresponding to the given ID.
175
176=head2 add_reading( $reading_args )
177
178Adds a new reading object to the collation.
179See L<Text::Tradition::Collation::Reading> for the available arguments.
180
181=head2 del_reading( $object_or_id )
182
183Removes the given reading from the collation, implicitly removing its
184paths and relationships.
185
186=head2 merge_readings( $main, $second )
187
188Merges the $second reading into the $main one.
189The arguments may be either readings or reading IDs.
190
191=head2 has_reading( $id )
192
193Predicate to see whether a given reading ID is in the graph.
194
195=head2 reading_witnesses( $object_or_id )
196
197Returns a list of sigils whose witnesses contain the reading.
198
199=head2 paths
200
201Returns all reading paths within the document - that is, all edges in the
202collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
203
204=head2 add_path( $source, $target, $sigil )
205
206Links the given readings in the collation in sequence, under the given witness
207sigil. The readings may be specified by object or ID.
208
209=head2 del_path( $source, $target, $sigil )
210
211Links the given readings in the collation in sequence, under the given witness
212sigil. The readings may be specified by object or ID.
213
214=head2 has_path( $source, $target );
215
216Returns true if the two readings are linked in sequence in any witness.
217The readings may be specified by object or ID.
218
219=head2 relationships
220
221Returns all Relationship objects in the collation.
222
223=head2 add_relationship( $reading, $other_reading, $options )
224
225Adds a new relationship of the type given in $options between the two readings,
226which may be specified by object or ID. Returns a value of ( $status, @vectors)
227where $status is true on success, and @vectors is a list of relationship edges
228that were ultimately added.
229See L<Text::Tradition::Collation::Relationship> for the available options.
230
231=cut
dd3b58b0 232
d047cd52 233sub BUILD {
3a2ebbf4 234 my $self = shift;
22222af9 235 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 236 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
237 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 238}
784877d9 239
3a2ebbf4 240### Reading construct/destruct functions
241
242sub add_reading {
243 my( $self, $reading ) = @_;
244 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
245 my %args = %$reading;
246 $reading = Text::Tradition::Collation::Reading->new(
247 'collation' => $self,
248 %args );
249 }
250 # First check to see if a reading with this ID exists.
251 if( $self->reading( $reading->id ) ) {
63778331 252 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 253 }
254 $self->_add_reading( $reading->id => $reading );
255 # Once the reading has been added, put it in both graphs.
256 $self->sequence->add_vertex( $reading->id );
22222af9 257 $self->relations->add_reading( $reading->id );
3a2ebbf4 258 return $reading;
eca16057 259};
260
3a2ebbf4 261around del_reading => sub {
262 my $orig = shift;
263 my $self = shift;
264 my $arg = shift;
265
266 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
267 $arg = $arg->id;
268 }
3a2ebbf4 269 # Remove the reading from the graphs.
270 $self->sequence->delete_vertex( $arg );
22222af9 271 $self->relations->delete_reading( $arg );
3a2ebbf4 272
273 # Carry on.
274 $self->$orig( $arg );
275};
7854e12e 276
3a2ebbf4 277# merge_readings( $main, $to_be_deleted );
7854e12e 278
3a2ebbf4 279sub merge_readings {
280 my $self = shift;
281
282 # We only need the IDs for adding paths to the graph, not the reading
283 # objects themselves.
49d4f2ac 284 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 285
286 # The kept reading should inherit the paths and the relationships
287 # of the deleted reading.
288 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
289 my @vector = ( $kept );
290 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
291 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 292 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 293 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
294 $self->sequence->add_edge( @vector );
295 my $fwits = $self->sequence->get_edge_attributes( @vector );
296 @wits{keys %$fwits} = values %$fwits;
297 $self->sequence->set_edge_attributes( @vector, \%wits );
298 }
22222af9 299 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 300
301 # Do the deletion deed.
49d4f2ac 302 if( $combine_char ) {
303 my $kept_obj = $self->reading( $kept );
304 my $new_text = join( $combine_char, $kept_obj->text,
305 $self->reading( $deleted )->text );
306 $kept_obj->alter_text( $new_text );
307 }
3a2ebbf4 308 $self->del_reading( $deleted );
309}
7854e12e 310
3265b0ce 311
3a2ebbf4 312# Helper function for manipulating the graph.
313sub _stringify_args {
314 my( $self, $first, $second, $arg ) = @_;
315 $first = $first->id
316 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
317 $second = $second->id
318 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
319 return( $first, $second, $arg );
320}
df6d9812 321
4e5a7b2c 322# Helper function for manipulating the graph.
323sub _objectify_args {
324 my( $self, $first, $second, $arg ) = @_;
325 $first = $self->reading( $first )
326 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
327 $second = $self->reading( $second )
328 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
329 return( $first, $second, $arg );
330}
3a2ebbf4 331### Path logic
332
333sub add_path {
334 my $self = shift;
335
336 # We only need the IDs for adding paths to the graph, not the reading
337 # objects themselves.
338 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
339
340 # Connect the readings
341 $self->sequence->add_edge( $source, $target );
342 # Note the witness in question
343 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 344};
345
3a2ebbf4 346sub del_path {
347 my $self = shift;
49d4f2ac 348 my @args;
349 if( ref( $_[0] ) eq 'ARRAY' ) {
350 my $e = shift @_;
351 @args = ( @$e, @_ );
352 } else {
353 @args = @_;
354 }
3a2ebbf4 355
356 # We only need the IDs for adding paths to the graph, not the reading
357 # objects themselves.
49d4f2ac 358 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 359
360 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 361 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 362 }
363 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
364 $self->sequence->delete_edge( $source, $target );
365 }
784877d9 366}
367
3a2ebbf4 368
15d2d3df 369# Extra graph-alike utility
370sub has_path {
3a2ebbf4 371 my $self = shift;
372 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
373 return undef unless $self->sequence->has_edge( $source, $target );
374 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 375}
376
4e5a7b2c 377=head2 clear_witness( @sigil_list )
3a2ebbf4 378
4e5a7b2c 379Clear the given witnesses out of the collation entirely, removing references
380to them in paths, and removing readings that belong only to them. Should only
381be called via $tradition->del_witness.
3a2ebbf4 382
383=cut
384
4e5a7b2c 385sub clear_witness {
386 my( $self, @sigils ) = @_;
387
388 # Clear the witness(es) out of the paths
389 foreach my $e ( $self->paths ) {
390 foreach my $sig ( @sigils ) {
391 $self->del_path( $e, $sig );
392 }
393 }
394
395 # Clear out the newly unused readings
396 foreach my $r ( $self->readings ) {
397 unless( $self->reading_witnesses( $r ) ) {
398 $self->del_reading( $r );
399 }
400 }
401}
3a2ebbf4 402
403sub add_relationship {
404 my $self = shift;
22222af9 405 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 406 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 407 $self->reading( $source ), $target, $self->reading( $target ), $opts );
408 # Force a full rank recalculation every time. Yuck.
63778331 409 $self->calculate_ranks() if $self->end->has_rank;
410 return @vectors;
22222af9 411}
ef9d481f 412
22222af9 413=head2 reading_witnesses( $reading )
910a0a6d 414
22222af9 415Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 416
22222af9 417=cut
1d310495 418
1d310495 419sub reading_witnesses {
420 my( $self, $reading ) = @_;
421 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 422 # arbitrarily chosen "incoming". Thus, special-case the start node.
423 if( $reading eq $self->start ) {
424 return map { $_->sigil } $self->tradition->witnesses;
425 }
1d310495 426 my %all_witnesses;
427 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
428 my $wits = $self->sequence->get_edge_attributes( @$e );
429 @all_witnesses{ keys %$wits } = 1;
430 }
431 return keys %all_witnesses;
910a0a6d 432}
433
4e5a7b2c 434=head1 OUTPUT METHODS
8e1394aa 435
0ecb975c 436=head2 as_svg( \%options )
8e1394aa 437
0068967c 438Returns an SVG string that represents the graph, via as_dot and graphviz.
0ecb975c 439See as_dot for a list of options.
8e1394aa 440
441=cut
442
443sub as_svg {
0ecb975c 444 my( $self, $opts ) = @_;
3a2ebbf4 445
8e1394aa 446 my @cmd = qw/dot -Tsvg/;
447 my( $svg, $err );
910a0a6d 448 my $dotfile = File::Temp->new();
a344ffcf 449 ## USE FOR DEBUGGING
eca16057 450 # $dotfile->unlink_on_destroy(0);
910a0a6d 451 binmode $dotfile, ':utf8';
0ecb975c 452 print $dotfile $self->as_dot( $opts );
910a0a6d 453 push( @cmd, $dotfile->filename );
454 run( \@cmd, ">", binary(), \$svg );
364c71b1 455 return decode_utf8( $svg );
8e1394aa 456}
457
b22576c6 458
0ecb975c 459=head2 as_dot( \%options )
b22576c6 460
0ecb975c 461Returns a string that is the collation graph expressed in dot
462(i.e. GraphViz) format. Options include:
b22576c6 463
0ecb975c 464=over 4
b22576c6 465
0ecb975c 466=item * from
b22576c6 467
0ecb975c 468=item * to
df6d9812 469
0ecb975c 470=item * color_common
471
472=back
df6d9812 473
474=cut
475
476sub as_dot {
0ecb975c 477 my( $self, $opts ) = @_;
478 my $startrank = $opts->{'from'} if $opts;
479 my $endrank = $opts->{'to'} if $opts;
480 my $color_common = $opts->{'color_common'} if $opts;
b22576c6 481
482 # Check the arguments
483 if( $startrank ) {
484 return if $endrank && $startrank > $endrank;
485 return if $startrank > $self->end->rank;
486 }
487 if( defined $endrank ) {
488 return if $endrank < 0;
f1b3b33a 489 $endrank = undef if $endrank == $self->end->rank;
b22576c6 490 }
491
df6d9812 492 # TODO consider making some of these things configurable
67da8d6c 493 my $graph_name = $self->tradition->name;
494 $graph_name =~ s/[^\w\s]//g;
495 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 496
497 my %graph_attrs = (
498 'rankdir' => 'LR',
499 'bgcolor' => 'none',
500 );
501 my %node_attrs = (
b8990398 502 'fontsize' => 14,
f13b5582 503 'fillcolor' => 'white',
504 'style' => 'filled',
505 'shape' => 'ellipse'
506 );
507 my %edge_attrs = (
508 'arrowhead' => 'open',
509 'color' => '#000000',
510 'fontcolor' => '#000000',
511 );
512
67da8d6c 513 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 514 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
515 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 516
b22576c6 517 # Output substitute start/end readings if necessary
518 if( $startrank ) {
519 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
520 }
521 if( $endrank ) {
522 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
523 }
30ddc24c 524
b22576c6 525 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 526 # Sort the readings by rank if we have ranks; this speeds layout.
527 my @all_readings = $self->end->has_rank
528 ? sort { $a->rank <=> $b->rank } $self->readings
529 : $self->readings;
530 foreach my $reading ( @all_readings ) {
b22576c6 531 # Only output readings within our rank range.
532 next if $startrank && $reading->rank < $startrank;
533 next if $endrank && $reading->rank > $endrank;
534 $used{$reading->id} = 1;
910a0a6d 535 # Need not output nodes without separate labels
3a2ebbf4 536 next if $reading->id eq $reading->text;
d4b75f44 537 my $rattrs;
30f0df34 538 my $label = $reading->text;
8f9cab7b 539 $label =~ s/\"/\\\"/g;
d4b75f44 540 $rattrs->{'label'} = $label;
0ecb975c 541 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 542 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 543 }
3a2ebbf4 544
30ddc24c 545 # Add the real edges. Need to weight one edge per rank jump, in a
546 # continuous line.
547 my $weighted = $self->_add_edge_weights;
b22576c6 548 my @edges = $self->paths;
3bdec618 549 my( %substart, %subend );
b22576c6 550 foreach my $edge ( @edges ) {
551 # Do we need to output this edge?
508fd430 552 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 553 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 554 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 555
b22576c6 556 # Account for the rank gap if necessary
30ddc24c 557 my $rank0 = $self->reading( $edge->[0] )->rank
558 if $self->reading( $edge->[0] )->has_rank;
559 my $rank1 = $self->reading( $edge->[1] )->rank
560 if $self->reading( $edge->[1] )->has_rank;
561 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
562 $variables->{'minlen'} = $rank1 - $rank0;
563 }
564
565 # Add the calculated edge weights
566 if( exists $weighted->{$edge->[0]}
567 && $weighted->{$edge->[0]} eq $edge->[1] ) {
568 # $variables->{'color'} = 'red';
569 $variables->{'weight'} = 3.0;
b0b4421a 570 }
30ddc24c 571
508fd430 572 # EXPERIMENTAL: make edge width reflect no. of witnesses
573 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
574 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
575
f13b5582 576 my $varopts = _dot_attr_string( $variables );
577 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
578 $edge->[0], $edge->[1], $varopts );
3bdec618 579 } elsif( $used{$edge->[0]} ) {
580 $subend{$edge->[0]} = 1;
581 } elsif( $used{$edge->[1]} ) {
582 $substart{$edge->[1]} = 1;
b22576c6 583 }
df6d9812 584 }
3bdec618 585 # Add substitute start and end edges if necessary
586 foreach my $node ( keys %substart ) {
027d819c 587 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 588 my $variables = { %edge_attrs, 'label' => $witstr };
589 my $varopts = _dot_attr_string( $variables );
590 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 591 }
592 foreach my $node ( keys %subend ) {
027d819c 593 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 594 my $variables = { %edge_attrs, 'label' => $witstr };
595 my $varopts = _dot_attr_string( $variables );
596 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 597 }
30ddc24c 598
df6d9812 599 $dot .= "}\n";
600 return $dot;
601}
602
f13b5582 603sub _dot_attr_string {
604 my( $hash ) = @_;
605 my @attrs;
606 foreach my $k ( sort keys %$hash ) {
607 my $v = $hash->{$k};
608 push( @attrs, $k.'="'.$v.'"' );
609 }
610 return( '[ ' . join( ', ', @attrs ) . ' ]' );
611}
612
30ddc24c 613sub _add_edge_weights {
614 my $self = shift;
615 # Walk the graph from START to END, choosing the successor node with
616 # the largest number of witness paths each time.
617 my $weighted = {};
618 my $curr = $self->start->id;
619 while( $curr ne $self->end->id ) {
620 my @succ = sort { $self->path_witnesses( $curr, $a )
621 <=> $self->path_witnesses( $curr, $b ) }
622 $self->sequence->successors( $curr );
623 my $next = pop @succ;
624 # Try to avoid lacunae in the weighted path.
625 while( $self->reading( $next )->is_lacuna && @succ ) {
626 $next = pop @succ;
627 }
628 $weighted->{$curr} = $next;
629 $curr = $next;
630 }
631 return $weighted;
632}
633
027d819c 634=head2 path_witnesses( $edge )
635
636Returns the list of sigils whose witnesses are associated with the given edge.
637The edge can be passed as either an array or an arrayref of ( $source, $target ).
638
639=cut
640
3a2ebbf4 641sub path_witnesses {
642 my( $self, @edge ) = @_;
643 # If edge is an arrayref, cope.
644 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
645 my $e = shift @edge;
646 @edge = @$e;
647 }
648 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 649 return @wits;
3a2ebbf4 650}
651
027d819c 652sub _path_display_label {
508fd430 653 my $self = shift;
654 my @wits = sort @_;
8f9cab7b 655 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
656 if( scalar @wits > $maj ) {
f13b5582 657 # TODO break out a.c. wits
8f9cab7b 658 return 'majority';
659 } else {
660 return join( ', ', @wits );
661 }
662}
663
664
4e5a7b2c 665=head2 as_graphml
8e1394aa 666
4e5a7b2c 667Returns a GraphML representation of the collation. The GraphML will contain
668two graphs. The first expresses the attributes of the readings and the witness
669paths that link them; the second expresses the relationships that link the
670readings. This is the native transfer format for a tradition.
8e1394aa 671
56eefa04 672=begin testing
673
674use Text::Tradition;
675
676my $READINGS = 311;
677my $PATHS = 361;
678
679my $datafile = 't/data/florilegium_tei_ps.xml';
680my $tradition = Text::Tradition->new( 'input' => 'TEI',
681 'name' => 'test0',
682 'file' => $datafile,
683 'linear' => 1 );
684
685ok( $tradition, "Got a tradition object" );
686is( scalar $tradition->witnesses, 13, "Found all witnesses" );
687ok( $tradition->collation, "Tradition has a collation" );
688
689my $c = $tradition->collation;
690is( scalar $c->readings, $READINGS, "Collation has all readings" );
691is( scalar $c->paths, $PATHS, "Collation has all paths" );
692is( scalar $c->relationships, 0, "Collation has all relationships" );
693
694# Add a few relationships
695$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
696$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
697$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
698
699# Now write it to GraphML and parse it again.
700
701my $graphml = $c->as_graphml;
702my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
703is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
704is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
705is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
706
707=end testing
708
8e1394aa 709=cut
710
711sub as_graphml {
3a2ebbf4 712 my( $self ) = @_;
8e1394aa 713
714 # Some namespaces
715 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
716 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
717 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 718 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 719
720 # Create the document and root node
721 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
722 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
723 $graphml->setDocumentElement( $root );
724 $root->setNamespace( $xsi_ns, 'xsi', 0 );
725 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
726
e309421a 727 # Add the data keys for the graph
728 my %graph_data_keys;
729 my $gdi = 0;
1d310495 730 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 731 foreach my $datum ( @graph_attributes ) {
732 $graph_data_keys{$datum} = 'dg'.$gdi++;
733 my $key = $root->addNewChild( $graphml_ns, 'key' );
734 $key->setAttribute( 'attr.name', $datum );
735 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
736 $key->setAttribute( 'for', 'graph' );
737 $key->setAttribute( 'id', $graph_data_keys{$datum} );
738 }
f6066bac 739
8e1394aa 740 # Add the data keys for nodes
ef9d481f 741 my %node_data_keys;
742 my $ndi = 0;
3a2ebbf4 743 my %node_data = (
744 id => 'string',
255875b8 745 text => 'string',
3a2ebbf4 746 rank => 'string',
747 is_start => 'boolean',
748 is_end => 'boolean',
749 is_lacuna => 'boolean',
750 );
751 foreach my $datum ( keys %node_data ) {
910a0a6d 752 $node_data_keys{$datum} = 'dn'.$ndi++;
753 my $key = $root->addNewChild( $graphml_ns, 'key' );
754 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 755 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 756 $key->setAttribute( 'for', 'node' );
757 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 758 }
759
df6d9812 760 # Add the data keys for edges, i.e. witnesses
ef9d481f 761 my $edi = 0;
762 my %edge_data_keys;
3a2ebbf4 763 my %edge_data = (
f523c7a8 764 class => 'string', # Class, deprecated soon
3a2ebbf4 765 witness => 'string', # ID/label for a path
766 relationship => 'string', # ID/label for a relationship
767 extra => 'boolean', # Path key
c84275ff 768 scope => 'string', # Relationship key
3a2ebbf4 769 non_correctable => 'boolean', # Relationship key
770 non_independent => 'boolean', # Relationship key
771 );
772 foreach my $datum ( keys %edge_data ) {
773 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 774 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 775 $key->setAttribute( 'attr.name', $datum );
776 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 777 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 778 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 779 }
3a2ebbf4 780
22222af9 781 # Add the collation graph itself
2c669bca 782 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
783 $sgraph->setAttribute( 'edgedefault', 'directed' );
784 $sgraph->setAttribute( 'id', $self->tradition->name );
785 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
786 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
787 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
788 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
789 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 790
e309421a 791 # Collation attribute data
792 foreach my $datum ( @graph_attributes ) {
2c669bca 793 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
794 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 795 }
8e1394aa 796
797 my $node_ctr = 0;
798 my %node_hash;
22222af9 799 # Add our readings to the graph
3a2ebbf4 800 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 801 # Add to the main graph
802 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 803 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 804 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 805 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 806 foreach my $d ( keys %node_data ) {
807 my $nval = $n->$d;
808 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
809 if defined $nval;
810 }
b15511bf 811 }
812
2c669bca 813 # Add the path edges to the sequence graph
df6d9812 814 my $edge_ctr = 0;
3a2ebbf4 815 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
816 # We add an edge in the graphml for every witness in $e.
508fd430 817 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 818 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
819 $node_hash{ $e->[0] },
820 $node_hash{ $e->[1] } );
2c669bca 821 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 822 $edge_el->setAttribute( 'source', $from );
823 $edge_el->setAttribute( 'target', $to );
824 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 825
826 # It's a witness path, so add the witness
827 my $base = $wit;
828 my $key = $edge_data_keys{'witness'};
829 # Is this an ante-corr witness?
830 my $aclabel = $self->ac_label;
831 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
832 # Keep the base witness
833 $base = $1;
834 # ...and record that this is an 'extra' reading path
835 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
836 }
837 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 838 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 839 }
840 }
841
22222af9 842 # Add the relationship graph to the XML
027d819c 843 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 844 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 845
94c00c71 846 # Save and return the thing
847 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 848 return $result;
df6d9812 849}
850
b15511bf 851sub _add_graphml_data {
852 my( $el, $key, $value ) = @_;
b15511bf 853 return unless defined $value;
c9bf3dbf 854 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 855 $data_el->setAttribute( 'key', $key );
856 $data_el->appendText( $value );
8e1394aa 857}
858
4e5a7b2c 859=head2 as_csv
910a0a6d 860
861Returns a CSV alignment table representation of the collation graph, one
2c669bca 862row per witness (or witness uncorrected.)
910a0a6d 863
864=cut
865
866sub as_csv {
3a2ebbf4 867 my( $self ) = @_;
910a0a6d 868 my $table = $self->make_alignment_table;
869 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
870 my @result;
2c669bca 871 # Make the header row
872 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
873 push( @result, decode_utf8( $csv->string ) );
874 # Make the rest of the rows
875 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 876 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 877 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
878 $csv->combine( @row );
910a0a6d 879 push( @result, decode_utf8( $csv->string ) );
880 }
3a2ebbf4 881 return join( "\n", @result );
910a0a6d 882}
883
4e5a7b2c 884=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 885
566f4595 886Return a reference to an alignment table, in a slightly enhanced CollateX
887format which looks like this:
888
889 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 890 tokens => [ { t => "TEXT" }, ... ] },
566f4595 891 { witness => "SIG2",
4e5a7b2c 892 tokens => [ { t => "TEXT" }, ... ] },
566f4595 893 ... ],
894 length => TEXTLEN };
895
896If $use_refs is set to 1, the reading object is returned in the table
897instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 898
899If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 900keys have a true hash value will be included.
2c669bca 901
902=cut
9f3ba6f7 903
910a0a6d 904sub make_alignment_table {
08e0fb85 905 my( $self, $noderefs, $include ) = @_;
0ecb975c 906 # Make sure we can do this
907 throw( "Need a linear graph in order to make an alignment table" )
908 unless $self->linear;
909 $self->calculate_ranks unless $self->end->has_rank;
910
2c669bca 911 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 912 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 913 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 914 if( $include ) {
566f4595 915 next unless $include->{$wit->sigil};
2c669bca 916 }
eca16057 917 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 918 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
919 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 920 push( @{$table->{'alignment'}},
921 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 922 if( $wit->is_layered ) {
923 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 924 $wit->sigil.$self->ac_label );
1f7aa795 925 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 926 push( @{$table->{'alignment'}},
927 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 928 }
929 }
2c669bca 930 return $table;
910a0a6d 931}
932
933sub _make_witness_row {
0e476982 934 my( $path, $positions, $noderefs ) = @_;
910a0a6d 935 my %char_hash;
936 map { $char_hash{$_} = undef } @$positions;
2c669bca 937 my $debug = 0;
910a0a6d 938 foreach my $rdg ( @$path ) {
eca16057 939 my $rtext = $rdg->text;
940 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 941 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 942 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 943 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
944 : { 't' => $rtext };
910a0a6d 945 }
946 my @row = map { $char_hash{$_} } @$positions;
eca16057 947 # Fill in lacuna markers for undef spots in the row
948 my $last_el = shift @row;
949 my @filled_row = ( $last_el );
950 foreach my $el ( @row ) {
0e476982 951 # If we are using node reference, make the lacuna node appear many times
952 # in the table. If not, use the lacuna tag.
953 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 954 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 955 }
956 push( @filled_row, $el );
957 $last_el = $el;
958 }
959 return @filled_row;
910a0a6d 960}
961
0e476982 962# Tiny utility function to say if a table element is a lacuna
963sub _el_is_lacuna {
964 my $el = shift;
2c669bca 965 return 1 if $el->{'t'} eq '#LACUNA#';
966 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
967 && $el->{'t'}->is_lacuna;
0e476982 968 return 0;
969}
970
910a0a6d 971# Helper to turn the witnesses along columns rather than rows. Assumes
972# equal-sized rows.
973sub _turn_table {
974 my( $table ) = @_;
975 my $result = [];
976 return $result unless scalar @$table;
977 my $nrows = scalar @{$table->[0]};
978 foreach my $idx ( 0 .. $nrows - 1 ) {
979 foreach my $wit ( 0 .. $#{$table} ) {
980 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
981 }
982 }
983 return $result;
984}
985
4e5a7b2c 986=head1 NAVIGATION METHODS
910a0a6d 987
4e5a7b2c 988=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 989
990Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 991with $last, for the witness given in $sigil. If a $backup sigil is
992specified (e.g. when walking a layered witness), it will be used wherever
993no $sigil path exists. If there is a base text reading, that will be
994used wherever no path exists for $sigil or $backup.
e2902068 995
996=cut
997
910a0a6d 998# TODO Think about returning some lazy-eval iterator.
b0b4421a 999# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1000
e2902068 1001sub reading_sequence {
861c3e27 1002 my( $self, $start, $end, $witness ) = @_;
e2902068 1003
930ff666 1004 $witness = $self->baselabel unless $witness;
e2902068 1005 my @readings = ( $start );
1006 my %seen;
1007 my $n = $start;
3a2ebbf4 1008 while( $n && $n->id ne $end->id ) {
1009 if( exists( $seen{$n->id} ) ) {
63778331 1010 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1011 }
3a2ebbf4 1012 $seen{$n->id} = 1;
910a0a6d 1013
861c3e27 1014 my $next = $self->next_reading( $n, $witness );
44771cf2 1015 unless( $next ) {
63778331 1016 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1017 }
910a0a6d 1018 push( @readings, $next );
1019 $n = $next;
e2902068 1020 }
1021 # Check that the last reading is our end reading.
1022 my $last = $readings[$#readings];
63778331 1023 throw( "Last reading found from " . $start->text .
1024 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1025 unless $last->id eq $end->id;
e2902068 1026
1027 return @readings;
1028}
1029
4e5a7b2c 1030=head2 next_reading( $reading, $sigil );
8e1394aa 1031
4a8828f0 1032Returns the reading that follows the given reading along the given witness
930ff666 1033path.
8e1394aa 1034
1035=cut
1036
4a8828f0 1037sub next_reading {
e2902068 1038 # Return the successor via the corresponding path.
8e1394aa 1039 my $self = shift;
3a2ebbf4 1040 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1041 return undef unless $answer;
3a2ebbf4 1042 return $self->reading( $answer );
8e1394aa 1043}
1044
4e5a7b2c 1045=head2 prior_reading( $reading, $sigil )
8e1394aa 1046
4a8828f0 1047Returns the reading that precedes the given reading along the given witness
930ff666 1048path.
8e1394aa 1049
1050=cut
1051
4a8828f0 1052sub prior_reading {
e2902068 1053 # Return the predecessor via the corresponding path.
8e1394aa 1054 my $self = shift;
3a2ebbf4 1055 my $answer = $self->_find_linked_reading( 'prior', @_ );
1056 return $self->reading( $answer );
8e1394aa 1057}
1058
4a8828f0 1059sub _find_linked_reading {
861c3e27 1060 my( $self, $direction, $node, $path ) = @_;
1061
1062 # Get a backup if we are dealing with a layered witness
1063 my $alt_path;
1064 my $aclabel = $self->ac_label;
1065 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1066 $alt_path = $1;
1067 }
1068
e2902068 1069 my @linked_paths = $direction eq 'next'
3a2ebbf4 1070 ? $self->sequence->edges_from( $node )
1071 : $self->sequence->edges_to( $node );
e2902068 1072 return undef unless scalar( @linked_paths );
8e1394aa 1073
e2902068 1074 # We have to find the linked path that contains all of the
1075 # witnesses supplied in $path.
1076 my( @path_wits, @alt_path_wits );
4e5a7b2c 1077 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1078 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1079 my $base_le;
1080 my $alt_le;
1081 foreach my $le ( @linked_paths ) {
3a2ebbf4 1082 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1083 $base_le = $le;
910a0a6d 1084 }
508fd430 1085 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1086 if( _is_within( \@path_wits, \@le_wits ) ) {
1087 # This is the right path.
1088 return $direction eq 'next' ? $le->[1] : $le->[0];
1089 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1090 $alt_le = $le;
1091 }
8e1394aa 1092 }
e2902068 1093 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1094 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1095 if $alt_le;
e2902068 1096
1097 # Got this far? Return the base path if it exists.
3a2ebbf4 1098 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1099 if $base_le;
e2902068 1100
1101 # Got this far? We have no appropriate path.
2c669bca 1102 warn "Could not find $direction node from " . $node->id
910a0a6d 1103 . " along path $path";
8e1394aa 1104 return undef;
1105}
1106
4a8828f0 1107# Some set logic.
1108sub _is_within {
1109 my( $set1, $set2 ) = @_;
7854e12e 1110 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1111 foreach my $el ( @$set1 ) {
910a0a6d 1112 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1113 }
1114 return $ret;
1115}
1116
4e5a7b2c 1117# Return the string that joins together a list of witnesses for
1118# display on a single path.
1119sub _witnesses_of_label {
1120 my( $self, $label ) = @_;
1121 my $regex = $self->wit_list_separator;
1122 my @answer = split( /\Q$regex\E/, $label );
1123 return @answer;
b0b4421a 1124}
1125
d4b75f44 1126=head2 common_readings
1127
1128Returns the list of common readings in the graph (i.e. those readings that are
1129shared by all non-lacunose witnesses.)
1130
1131=cut
1132
1133sub common_readings {
1134 my $self = shift;
1135 my @common = grep { $_->is_common } $self->readings;
1136 return @common;
1137}
1138
b0b4421a 1139=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1140
1141Returns the text of a witness (plus its backup, if we are using a layer)
1142as stored in the collation. The text is returned as a string, where the
1143individual readings are joined with spaces and the meta-readings (e.g.
1144lacunae) are omitted. Optional specification of $start and $end allows
1145the generation of a subset of the witness text.
4e5a7b2c 1146
b0b4421a 1147=cut
1148
1149sub path_text {
861c3e27 1150 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1151 $start = $self->start unless $start;
1152 $end = $self->end unless $end;
861c3e27 1153 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1154 return join( ' ', map { $_->text } @path );
1155}
4e5a7b2c 1156
1157=head1 INITIALIZATION METHODS
1158
1159These are mostly for use by parsers.
1160
1161=head2 make_witness_path( $witness )
1162
1163Link the array of readings contained in $witness->path (and in
1164$witness->uncorrected_path if it exists) into collation paths.
1165Clear out the arrays when finished.
de51424a 1166
4e5a7b2c 1167=head2 make_witness_paths
1168
1169Call make_witness_path for all witnesses in the tradition.
1170
1171=cut
930ff666 1172
7e450e44 1173# For use when a collation is constructed from a base text and an apparatus.
1174# We have the sequences of readings and just need to add path edges.
1f7aa795 1175# When we are done, clear out the witness path attributes, as they are no
1176# longer needed.
1177# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1178
6a222840 1179sub make_witness_paths {
1180 my( $self ) = @_;
910a0a6d 1181 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1182 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1183 $self->make_witness_path( $wit );
7854e12e 1184 }
7854e12e 1185}
1186
6a222840 1187sub make_witness_path {
7854e12e 1188 my( $self, $wit ) = @_;
1189 my @chain = @{$wit->path};
15d2d3df 1190 my $sig = $wit->sigil;
7854e12e 1191 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1192 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1193 }
1f7aa795 1194 if( $wit->is_layered ) {
d9e873d0 1195 @chain = @{$wit->uncorrected_path};
1196 foreach my $idx( 0 .. $#chain-1 ) {
1197 my $source = $chain[$idx];
1198 my $target = $chain[$idx+1];
1199 $self->add_path( $source, $target, $sig.$self->ac_label )
1200 unless $self->has_path( $source, $target, $sig );
1201 }
15d2d3df 1202 }
1f7aa795 1203 $wit->clear_path;
1204 $wit->clear_uncorrected_path;
e2902068 1205}
1206
4e5a7b2c 1207=head2 calculate_ranks
1208
1209Calculate the reading ranks (that is, their aligned positions relative
1210to each other) for the graph. This can only be called on linear collations.
1211
1212=cut
1213
910a0a6d 1214sub calculate_ranks {
1215 my $self = shift;
1216 # Walk a version of the graph where every node linked by a relationship
1217 # edge is fundamentally the same node, and do a topological ranking on
1218 # the nodes in this graph.
c9bf3dbf 1219 my $topo_graph = Graph->new();
910a0a6d 1220 my %rel_containers;
1221 my $rel_ctr = 0;
1222 # Add the nodes
1223 foreach my $r ( $self->readings ) {
3a2ebbf4 1224 next if exists $rel_containers{$r->id};
910a0a6d 1225 my @rels = $r->related_readings( 'colocated' );
1226 if( @rels ) {
1227 # Make a relationship container.
1228 push( @rels, $r );
c9bf3dbf 1229 my $rn = 'rel_container_' . $rel_ctr++;
1230 $topo_graph->add_vertex( $rn );
910a0a6d 1231 foreach( @rels ) {
3a2ebbf4 1232 $rel_containers{$_->id} = $rn;
910a0a6d 1233 }
1234 } else {
1235 # Add a new node to mirror the old node.
3a2ebbf4 1236 $rel_containers{$r->id} = $r->id;
1237 $topo_graph->add_vertex( $r->id );
910a0a6d 1238 }
4a8828f0 1239 }
3a1f2523 1240
3a2ebbf4 1241 # Add the edges.
910a0a6d 1242 foreach my $r ( $self->readings ) {
3a2ebbf4 1243 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1244 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1245 $rel_containers{$n} );
4e5a7b2c 1246 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1247 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1248 }
1249 }
1250
1251 # Now do the rankings, starting with the start node.
3a2ebbf4 1252 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1253 my $node_ranks = { $topo_start => 0 };
910a0a6d 1254 my @curr_origin = ( $topo_start );
1255 # A little iterative function.
1256 while( @curr_origin ) {
c9bf3dbf 1257 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1258 }
1259 # Transfer our rankings from the topological graph to the real one.
1260 foreach my $r ( $self->readings ) {
3a2ebbf4 1261 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1262 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1263 } else {
63778331 1264 # Die. Find the last rank we calculated.
1265 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1266 <=> $node_ranks->{$rel_containers{$b->id}} }
1267 $self->readings;
1268 my $last = pop @all_defined;
1269 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1270 }
de51424a 1271 }
8e1394aa 1272}
3a1f2523 1273
910a0a6d 1274sub _assign_rank {
c9bf3dbf 1275 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1276 # Look at each of the children of @current_nodes. If all the child's
1277 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1278 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1279 # parent gets a rank.
910a0a6d 1280 my @next_nodes;
1281 foreach my $c ( @current_nodes ) {
c9bf3dbf 1282 warn "Current reading $c has no rank!"
1283 unless exists $node_ranks->{$c};
1284 # print STDERR "Looking at child of node $c, rank "
1285 # . $node_ranks->{$c} . "\n";
1286 foreach my $child ( $graph->successors( $c ) ) {
1287 next if exists $node_ranks->{$child};
910a0a6d 1288 my $highest_rank = -1;
1289 my $skip = 0;
c9bf3dbf 1290 foreach my $parent ( $graph->predecessors( $child ) ) {
1291 if( exists $node_ranks->{$parent} ) {
1292 $highest_rank = $node_ranks->{$parent}
1293 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1294 } else {
1295 $skip = 1;
1296 last;
1297 }
1298 }
1299 next if $skip;
c9bf3dbf 1300 my $c_rank = $highest_rank + 1;
1301 # print STDERR "Assigning rank $c_rank to node $child \n";
1302 $node_ranks->{$child} = $c_rank;
910a0a6d 1303 push( @next_nodes, $child );
1304 }
1305 }
1306 return @next_nodes;
4cdd82f1 1307}
910a0a6d 1308
4e5a7b2c 1309=head2 flatten_ranks
1310
1311A convenience method for parsing collation data. Searches the graph for readings
1312with the same text at the same rank, and merges any that are found.
1313
1314=cut
1315
0e476982 1316sub flatten_ranks {
1317 my $self = shift;
1318 my %unique_rank_rdg;
1319 foreach my $rdg ( $self->readings ) {
1320 next unless $rdg->has_rank;
1321 my $key = $rdg->rank . "||" . $rdg->text;
1322 if( exists $unique_rank_rdg{$key} ) {
1323 # Combine!
56eefa04 1324 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1325 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1326 # TODO see if this now makes a common point.
0e476982 1327 } else {
1328 $unique_rank_rdg{$key} = $rdg;
1329 }
1330 }
1331}
1332
d4b75f44 1333=head2 calculate_common_readings
1334
1335Goes through the graph identifying the readings that appear in every witness
1336(apart from those with lacunae at that spot.) Marks them as common and returns
1337the list.
1338
1339=begin testing
1340
1341use Text::Tradition;
1342
1343my $cxfile = 't/data/Collatex-16.xml';
1344my $t = Text::Tradition->new(
1345 'name' => 'inline',
1346 'input' => 'CollateX',
1347 'file' => $cxfile,
1348 );
1349my $c = $t->collation;
1350
1351my @common = $c->calculate_common_readings();
1352is( scalar @common, 8, "Found correct number of common readings" );
1353my @marked = sort $c->common_readings();
1354is( scalar @common, 8, "All common readings got marked as such" );
1355my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1356is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1357
1358=end testing
1359
1360=cut
1361
1362sub calculate_common_readings {
1363 my $self = shift;
1364 my @common;
1365 my $table = $self->make_alignment_table( 1 );
1366 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1367 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1368 my %hash;
1369 foreach my $r ( @row ) {
1370 if( $r ) {
1371 $hash{$r->id} = $r unless $r->is_meta;
1372 } else {
1373 $hash{'UNDEF'} = $r;
1374 }
1375 }
1376 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1377 my( $r ) = values %hash;
1378 $r->is_common( 1 );
1379 push( @common, $r );
1380 }
1381 }
1382 return @common;
1383}
1384
861c3e27 1385=head2 text_from_paths
1386
1387Calculate the text array for all witnesses from the path, for later consistency
1388checking. Only to be used if there is no non-graph-based way to know the
1389original texts.
1390
1391=cut
1392
1393sub text_from_paths {
1394 my $self = shift;
1395 foreach my $wit ( $self->tradition->witnesses ) {
1396 my @text = split( /\s+/,
1397 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1398 $wit->text( \@text );
1399 if( $wit->is_layered ) {
1400 my @uctext = split( /\s+/,
1401 $self->reading_sequence( $self->start, $self->end,
1402 $wit->sigil.$self->ac_label ) );
1403 $wit->text( \@uctext );
1404 }
1405 }
1406}
0e476982 1407
4e5a7b2c 1408=head1 UTILITY FUNCTIONS
1409
1410=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1411
4e5a7b2c 1412Find the last reading that occurs in sequence before both the given readings.
1413
1414=head2 common_successor( $reading_a, $reading_b )
1415
1416Find the first reading that occurs in sequence after both the given readings.
1417
22222af9 1418=begin testing
1419
1420use Text::Tradition;
1421
1422my $cxfile = 't/data/Collatex-16.xml';
1423my $t = Text::Tradition->new(
1424 'name' => 'inline',
1425 'input' => 'CollateX',
1426 'file' => $cxfile,
1427 );
1428my $c = $t->collation;
1429
4e5a7b2c 1430is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1431 'n20', "Found correct common predecessor" );
4e5a7b2c 1432is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1433 '#END#', "Found correct common successor" );
1434
4e5a7b2c 1435is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1436 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1437is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1438 '#END#', "Found correct common successor for readings on same path" );
1439
1440=end testing
1441
1442=cut
1443
1444## Return the closest reading that is a predecessor of both the given readings.
1445sub common_predecessor {
1446 my $self = shift;
4e5a7b2c 1447 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1448 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1449}
1450
1451sub common_successor {
1452 my $self = shift;
4e5a7b2c 1453 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1454 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1455}
1456
027d819c 1457sub _common_in_path {
22222af9 1458 my( $self, $r1, $r2, $dir ) = @_;
1459 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1460 $iter = $self->end->rank - $iter if $dir eq 'successors';
1461 my @candidates;
1462 my @last_checked = ( $r1, $r2 );
1463 my %all_seen;
1464 while( !@candidates ) {
1465 my @new_lc;
1466 foreach my $lc ( @last_checked ) {
1467 foreach my $p ( $lc->$dir ) {
1468 if( $all_seen{$p->id} ) {
1469 push( @candidates, $p );
1470 } else {
1471 $all_seen{$p->id} = 1;
1472 push( @new_lc, $p );
1473 }
1474 }
1475 }
1476 @last_checked = @new_lc;
1477 }
1478 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1479 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1480}
1481
63778331 1482sub throw {
1483 Text::Tradition::Error->throw(
1484 'ident' => 'Collation error',
1485 'message' => $_[0],
1486 );
1487}
1488
dd3b58b0 1489no Moose;
1490__PACKAGE__->meta->make_immutable;
e867486f 1491
027d819c 1492=head1 LICENSE
e867486f 1493
027d819c 1494This package is free software and is provided "as is" without express
1495or implied warranty. You can redistribute it and/or modify it under
1496the same terms as Perl itself.
e867486f 1497
027d819c 1498=head1 AUTHOR
e867486f 1499
027d819c 1500Tara L Andrews E<lt>aurum@cpan.orgE<gt>