get rid of undefined-value comparison warning
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
63778331 10use Text::Tradition::Error;
df6d9812 11use XML::LibXML;
a344ffcf 12use XML::LibXML::XPathContext;
dd3b58b0 13use Moose;
14
3a2ebbf4 15has 'sequence' => (
d047cd52 16 is => 'ro',
3a2ebbf4 17 isa => 'Graph',
18 default => sub { Graph->new() },
d047cd52 19 handles => {
3a2ebbf4 20 paths => 'edges',
d047cd52 21 },
d047cd52 22 );
3a2ebbf4 23
24has 'relations' => (
25 is => 'ro',
22222af9 26 isa => 'Text::Tradition::Collation::RelationshipStore',
27 handles => {
28 relationships => 'relationships',
29 related_readings => 'related_readings',
30 },
31 writer => '_set_relations',
3a2ebbf4 32 );
dd3b58b0 33
3a2ebbf4 34has 'tradition' => (
35 is => 'ro',
d047cd52 36 isa => 'Text::Tradition',
8d9a1cd8 37 weak_ref => 1,
d047cd52 38 );
dd3b58b0 39
3a2ebbf4 40has 'readings' => (
41 isa => 'HashRef[Text::Tradition::Collation::Reading]',
42 traits => ['Hash'],
43 handles => {
44 reading => 'get',
45 _add_reading => 'set',
46 del_reading => 'delete',
47 has_reading => 'exists',
48 readings => 'values',
49 },
50 default => sub { {} },
51 );
910a0a6d 52
4a8828f0 53has 'wit_list_separator' => (
7854e12e 54 is => 'rw',
55 isa => 'Str',
56 default => ', ',
57 );
58
59has 'baselabel' => (
60 is => 'rw',
61 isa => 'Str',
62 default => 'base text',
63 );
4a8828f0 64
15d2d3df 65has 'linear' => (
66 is => 'rw',
67 isa => 'Bool',
68 default => 1,
69 );
c84275ff 70
ef9d481f 71has 'ac_label' => (
72 is => 'rw',
73 isa => 'Str',
74 default => ' (a.c.)',
75 );
3a2ebbf4 76
77has 'start' => (
78 is => 'ro',
79 isa => 'Text::Tradition::Collation::Reading',
80 writer => '_set_start',
81 weak_ref => 1,
82 );
83
84has 'end' => (
85 is => 'ro',
86 isa => 'Text::Tradition::Collation::Reading',
87 writer => '_set_end',
88 weak_ref => 1,
89 );
1f563ac3 90
4e5a7b2c 91=head1 NAME
92
93Text::Tradition::Collation - a software model for a text collation
94
95=head1 SYNOPSIS
96
97 use Text::Tradition;
98 my $t = Text::Tradition->new(
99 'name' => 'this is a text',
100 'input' => 'TEI',
101 'file' => '/path/to/tei_parallel_seg_file.xml' );
102
103 my $c = $t->collation;
104 my @readings = $c->readings;
105 my @paths = $c->paths;
106 my @relationships = $c->relationships;
107
108 my $svg_variant_graph = $t->collation->as_svg();
109
110=head1 DESCRIPTION
111
112Text::Tradition is a library for representation and analysis of collated
113texts, particularly medieval ones. The Collation is the central feature of
114a Tradition, where the text, its sequence of readings, and its relationships
115between readings are actually kept.
116
117=head1 CONSTRUCTOR
118
119=head2 new
120
121The constructor. Takes a hash or hashref of the following arguments:
122
123=over
124
125=item * tradition - The Text::Tradition object to which the collation
126belongs. Required.
127
128=item * linear - Whether the collation should be linear; that is, whether
129transposed readings should be treated as two linked readings rather than one,
130and therefore whether the collation graph is acyclic. Defaults to true.
131
4e5a7b2c 132=item * baselabel - The default label for the path taken by a base text
133(if any). Defaults to 'base text'.
134
135=item * wit_list_separator - The string to join a list of witnesses for
136purposes of making labels in display graphs. Defaults to ', '.
137
138=item * ac_label - The extra label to tack onto a witness sigil when
139representing another layer of path for the given witness - that is, when
140a text has more than one possible reading due to scribal corrections or
141the like. Defaults to ' (a.c.)'.
142
143=back
144
145=head1 ACCESSORS
146
147=head2 tradition
148
149=head2 linear
150
4e5a7b2c 151=head2 wit_list_separator
152
153=head2 baselabel
154
155=head2 ac_label
156
157Simple accessors for collation attributes.
158
159=head2 start
160
161The meta-reading at the start of every witness path.
162
163=head2 end
164
165The meta-reading at the end of every witness path.
166
167=head2 readings
168
169Returns all Reading objects in the graph.
170
171=head2 reading( $id )
172
173Returns the Reading object corresponding to the given ID.
174
175=head2 add_reading( $reading_args )
176
177Adds a new reading object to the collation.
178See L<Text::Tradition::Collation::Reading> for the available arguments.
179
180=head2 del_reading( $object_or_id )
181
182Removes the given reading from the collation, implicitly removing its
183paths and relationships.
184
185=head2 merge_readings( $main, $second )
186
187Merges the $second reading into the $main one.
188The arguments may be either readings or reading IDs.
189
190=head2 has_reading( $id )
191
192Predicate to see whether a given reading ID is in the graph.
193
194=head2 reading_witnesses( $object_or_id )
195
196Returns a list of sigils whose witnesses contain the reading.
197
198=head2 paths
199
200Returns all reading paths within the document - that is, all edges in the
201collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
202
203=head2 add_path( $source, $target, $sigil )
204
205Links the given readings in the collation in sequence, under the given witness
206sigil. The readings may be specified by object or ID.
207
208=head2 del_path( $source, $target, $sigil )
209
210Links the given readings in the collation in sequence, under the given witness
211sigil. The readings may be specified by object or ID.
212
213=head2 has_path( $source, $target );
214
215Returns true if the two readings are linked in sequence in any witness.
216The readings may be specified by object or ID.
217
218=head2 relationships
219
220Returns all Relationship objects in the collation.
221
222=head2 add_relationship( $reading, $other_reading, $options )
223
224Adds a new relationship of the type given in $options between the two readings,
225which may be specified by object or ID. Returns a value of ( $status, @vectors)
226where $status is true on success, and @vectors is a list of relationship edges
227that were ultimately added.
228See L<Text::Tradition::Collation::Relationship> for the available options.
229
230=cut
dd3b58b0 231
d047cd52 232sub BUILD {
3a2ebbf4 233 my $self = shift;
22222af9 234 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 235 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
236 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 237}
784877d9 238
3a2ebbf4 239### Reading construct/destruct functions
240
241sub add_reading {
242 my( $self, $reading ) = @_;
243 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
244 my %args = %$reading;
245 $reading = Text::Tradition::Collation::Reading->new(
246 'collation' => $self,
247 %args );
248 }
249 # First check to see if a reading with this ID exists.
250 if( $self->reading( $reading->id ) ) {
63778331 251 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 252 }
253 $self->_add_reading( $reading->id => $reading );
254 # Once the reading has been added, put it in both graphs.
255 $self->sequence->add_vertex( $reading->id );
22222af9 256 $self->relations->add_reading( $reading->id );
3a2ebbf4 257 return $reading;
eca16057 258};
259
3a2ebbf4 260around del_reading => sub {
261 my $orig = shift;
262 my $self = shift;
263 my $arg = shift;
264
265 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
266 $arg = $arg->id;
267 }
3a2ebbf4 268 # Remove the reading from the graphs.
269 $self->sequence->delete_vertex( $arg );
22222af9 270 $self->relations->delete_reading( $arg );
3a2ebbf4 271
272 # Carry on.
273 $self->$orig( $arg );
274};
7854e12e 275
3a2ebbf4 276# merge_readings( $main, $to_be_deleted );
7854e12e 277
3a2ebbf4 278sub merge_readings {
279 my $self = shift;
280
281 # We only need the IDs for adding paths to the graph, not the reading
282 # objects themselves.
49d4f2ac 283 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 284
285 # The kept reading should inherit the paths and the relationships
286 # of the deleted reading.
287 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
288 my @vector = ( $kept );
289 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
290 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 291 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 292 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
293 $self->sequence->add_edge( @vector );
294 my $fwits = $self->sequence->get_edge_attributes( @vector );
295 @wits{keys %$fwits} = values %$fwits;
296 $self->sequence->set_edge_attributes( @vector, \%wits );
297 }
22222af9 298 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 299
300 # Do the deletion deed.
49d4f2ac 301 if( $combine_char ) {
302 my $kept_obj = $self->reading( $kept );
303 my $new_text = join( $combine_char, $kept_obj->text,
304 $self->reading( $deleted )->text );
305 $kept_obj->alter_text( $new_text );
306 }
3a2ebbf4 307 $self->del_reading( $deleted );
308}
7854e12e 309
3265b0ce 310
3a2ebbf4 311# Helper function for manipulating the graph.
312sub _stringify_args {
313 my( $self, $first, $second, $arg ) = @_;
314 $first = $first->id
315 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
316 $second = $second->id
317 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
318 return( $first, $second, $arg );
319}
df6d9812 320
4e5a7b2c 321# Helper function for manipulating the graph.
322sub _objectify_args {
323 my( $self, $first, $second, $arg ) = @_;
324 $first = $self->reading( $first )
325 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
326 $second = $self->reading( $second )
327 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
328 return( $first, $second, $arg );
329}
3a2ebbf4 330### Path logic
331
332sub add_path {
333 my $self = shift;
334
335 # We only need the IDs for adding paths to the graph, not the reading
336 # objects themselves.
337 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
338
339 # Connect the readings
340 $self->sequence->add_edge( $source, $target );
341 # Note the witness in question
342 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 343};
344
3a2ebbf4 345sub del_path {
346 my $self = shift;
49d4f2ac 347 my @args;
348 if( ref( $_[0] ) eq 'ARRAY' ) {
349 my $e = shift @_;
350 @args = ( @$e, @_ );
351 } else {
352 @args = @_;
353 }
3a2ebbf4 354
355 # We only need the IDs for adding paths to the graph, not the reading
356 # objects themselves.
49d4f2ac 357 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 358
359 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 360 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 361 }
362 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
363 $self->sequence->delete_edge( $source, $target );
364 }
784877d9 365}
366
3a2ebbf4 367
15d2d3df 368# Extra graph-alike utility
369sub has_path {
3a2ebbf4 370 my $self = shift;
371 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
372 return undef unless $self->sequence->has_edge( $source, $target );
373 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 374}
375
4e5a7b2c 376=head2 clear_witness( @sigil_list )
3a2ebbf4 377
4e5a7b2c 378Clear the given witnesses out of the collation entirely, removing references
379to them in paths, and removing readings that belong only to them. Should only
380be called via $tradition->del_witness.
3a2ebbf4 381
382=cut
383
4e5a7b2c 384sub clear_witness {
385 my( $self, @sigils ) = @_;
386
387 # Clear the witness(es) out of the paths
388 foreach my $e ( $self->paths ) {
389 foreach my $sig ( @sigils ) {
390 $self->del_path( $e, $sig );
391 }
392 }
393
394 # Clear out the newly unused readings
395 foreach my $r ( $self->readings ) {
396 unless( $self->reading_witnesses( $r ) ) {
397 $self->del_reading( $r );
398 }
399 }
400}
3a2ebbf4 401
402sub add_relationship {
403 my $self = shift;
22222af9 404 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 405 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 406 $self->reading( $source ), $target, $self->reading( $target ), $opts );
407 # Force a full rank recalculation every time. Yuck.
63778331 408 $self->calculate_ranks() if $self->end->has_rank;
409 return @vectors;
22222af9 410}
ef9d481f 411
22222af9 412=head2 reading_witnesses( $reading )
910a0a6d 413
22222af9 414Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 415
22222af9 416=cut
1d310495 417
1d310495 418sub reading_witnesses {
419 my( $self, $reading ) = @_;
420 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 421 # arbitrarily chosen "incoming". Thus, special-case the start node.
422 if( $reading eq $self->start ) {
423 return map { $_->sigil } $self->tradition->witnesses;
424 }
1d310495 425 my %all_witnesses;
426 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
427 my $wits = $self->sequence->get_edge_attributes( @$e );
428 @all_witnesses{ keys %$wits } = 1;
429 }
430 return keys %all_witnesses;
910a0a6d 431}
432
4e5a7b2c 433=head1 OUTPUT METHODS
8e1394aa 434
0ecb975c 435=head2 as_svg( \%options )
8e1394aa 436
0068967c 437Returns an SVG string that represents the graph, via as_dot and graphviz.
0ecb975c 438See as_dot for a list of options.
8e1394aa 439
440=cut
441
442sub as_svg {
0ecb975c 443 my( $self, $opts ) = @_;
3a2ebbf4 444
8e1394aa 445 my @cmd = qw/dot -Tsvg/;
446 my( $svg, $err );
910a0a6d 447 my $dotfile = File::Temp->new();
a344ffcf 448 ## USE FOR DEBUGGING
eca16057 449 # $dotfile->unlink_on_destroy(0);
910a0a6d 450 binmode $dotfile, ':utf8';
0ecb975c 451 print $dotfile $self->as_dot( $opts );
910a0a6d 452 push( @cmd, $dotfile->filename );
453 run( \@cmd, ">", binary(), \$svg );
364c71b1 454 return decode_utf8( $svg );
8e1394aa 455}
456
b22576c6 457
0ecb975c 458=head2 as_dot( \%options )
b22576c6 459
0ecb975c 460Returns a string that is the collation graph expressed in dot
461(i.e. GraphViz) format. Options include:
b22576c6 462
0ecb975c 463=over 4
b22576c6 464
0ecb975c 465=item * from
b22576c6 466
0ecb975c 467=item * to
df6d9812 468
0ecb975c 469=item * color_common
470
471=back
df6d9812 472
473=cut
474
475sub as_dot {
0ecb975c 476 my( $self, $opts ) = @_;
477 my $startrank = $opts->{'from'} if $opts;
478 my $endrank = $opts->{'to'} if $opts;
479 my $color_common = $opts->{'color_common'} if $opts;
b22576c6 480
481 # Check the arguments
482 if( $startrank ) {
483 return if $endrank && $startrank > $endrank;
484 return if $startrank > $self->end->rank;
485 }
486 if( defined $endrank ) {
487 return if $endrank < 0;
f1b3b33a 488 $endrank = undef if $endrank == $self->end->rank;
b22576c6 489 }
490
67da8d6c 491 my $graph_name = $self->tradition->name;
492 $graph_name =~ s/[^\w\s]//g;
493 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 494
495 my %graph_attrs = (
496 'rankdir' => 'LR',
497 'bgcolor' => 'none',
498 );
499 my %node_attrs = (
b8990398 500 'fontsize' => 14,
f13b5582 501 'fillcolor' => 'white',
502 'style' => 'filled',
503 'shape' => 'ellipse'
504 );
505 my %edge_attrs = (
506 'arrowhead' => 'open',
507 'color' => '#000000',
508 'fontcolor' => '#000000',
509 );
510
67da8d6c 511 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 512 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
513 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 514
b22576c6 515 # Output substitute start/end readings if necessary
516 if( $startrank ) {
517 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
518 }
519 if( $endrank ) {
520 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
521 }
30ddc24c 522
b22576c6 523 my %used; # Keep track of the readings that actually appear in the graph
30ddc24c 524 # Sort the readings by rank if we have ranks; this speeds layout.
525 my @all_readings = $self->end->has_rank
526 ? sort { $a->rank <=> $b->rank } $self->readings
527 : $self->readings;
4633f9e4 528 # TODO Refrain from outputting lacuna nodes - just grey out the edges.
30ddc24c 529 foreach my $reading ( @all_readings ) {
b22576c6 530 # Only output readings within our rank range.
531 next if $startrank && $reading->rank < $startrank;
532 next if $endrank && $reading->rank > $endrank;
533 $used{$reading->id} = 1;
910a0a6d 534 # Need not output nodes without separate labels
3a2ebbf4 535 next if $reading->id eq $reading->text;
d4b75f44 536 my $rattrs;
30f0df34 537 my $label = $reading->text;
8f9cab7b 538 $label =~ s/\"/\\\"/g;
d4b75f44 539 $rattrs->{'label'} = $label;
0ecb975c 540 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 541 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 542 }
3a2ebbf4 543
30ddc24c 544 # Add the real edges. Need to weight one edge per rank jump, in a
545 # continuous line.
546 my $weighted = $self->_add_edge_weights;
b22576c6 547 my @edges = $self->paths;
3bdec618 548 my( %substart, %subend );
b22576c6 549 foreach my $edge ( @edges ) {
550 # Do we need to output this edge?
508fd430 551 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
027d819c 552 my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
f13b5582 553 my $variables = { %edge_attrs, 'label' => $label };
30ddc24c 554
b22576c6 555 # Account for the rank gap if necessary
30ddc24c 556 my $rank0 = $self->reading( $edge->[0] )->rank
557 if $self->reading( $edge->[0] )->has_rank;
558 my $rank1 = $self->reading( $edge->[1] )->rank
559 if $self->reading( $edge->[1] )->has_rank;
560 if( defined $rank0 && defined $rank1 && $rank1 - $rank0 > 1 ) {
561 $variables->{'minlen'} = $rank1 - $rank0;
562 }
563
564 # Add the calculated edge weights
565 if( exists $weighted->{$edge->[0]}
566 && $weighted->{$edge->[0]} eq $edge->[1] ) {
567 # $variables->{'color'} = 'red';
568 $variables->{'weight'} = 3.0;
b0b4421a 569 }
30ddc24c 570
508fd430 571 # EXPERIMENTAL: make edge width reflect no. of witnesses
572 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
573 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
574
f13b5582 575 my $varopts = _dot_attr_string( $variables );
576 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
577 $edge->[0], $edge->[1], $varopts );
3bdec618 578 } elsif( $used{$edge->[0]} ) {
579 $subend{$edge->[0]} = 1;
580 } elsif( $used{$edge->[1]} ) {
581 $substart{$edge->[1]} = 1;
b22576c6 582 }
df6d9812 583 }
3bdec618 584 # Add substitute start and end edges if necessary
585 foreach my $node ( keys %substart ) {
027d819c 586 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 587 my $variables = { %edge_attrs, 'label' => $witstr };
588 my $varopts = _dot_attr_string( $variables );
589 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 590 }
591 foreach my $node ( keys %subend ) {
027d819c 592 my $witstr = $self->_path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 593 my $variables = { %edge_attrs, 'label' => $witstr };
594 my $varopts = _dot_attr_string( $variables );
595 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 596 }
30ddc24c 597
df6d9812 598 $dot .= "}\n";
599 return $dot;
600}
601
f13b5582 602sub _dot_attr_string {
603 my( $hash ) = @_;
604 my @attrs;
605 foreach my $k ( sort keys %$hash ) {
606 my $v = $hash->{$k};
607 push( @attrs, $k.'="'.$v.'"' );
608 }
609 return( '[ ' . join( ', ', @attrs ) . ' ]' );
610}
611
30ddc24c 612sub _add_edge_weights {
613 my $self = shift;
614 # Walk the graph from START to END, choosing the successor node with
615 # the largest number of witness paths each time.
616 my $weighted = {};
617 my $curr = $self->start->id;
008fc8a6 618 my $ranked = $self->end->has_rank;
30ddc24c 619 while( $curr ne $self->end->id ) {
008fc8a6 620 my $rank = $ranked ? $self->reading( $curr )->rank : 0;
30ddc24c 621 my @succ = sort { $self->path_witnesses( $curr, $a )
622 <=> $self->path_witnesses( $curr, $b ) }
623 $self->sequence->successors( $curr );
624 my $next = pop @succ;
008fc8a6 625 my $nextrank = $ranked ? $self->reading( $next )->rank : 0;
30ddc24c 626 # Try to avoid lacunae in the weighted path.
008fc8a6 627 while( @succ &&
628 ( $self->reading( $next )->is_lacuna ||
629 $nextrank - $rank > 1 ) ){
30ddc24c 630 $next = pop @succ;
631 }
632 $weighted->{$curr} = $next;
633 $curr = $next;
634 }
635 return $weighted;
636}
637
027d819c 638=head2 path_witnesses( $edge )
639
640Returns the list of sigils whose witnesses are associated with the given edge.
641The edge can be passed as either an array or an arrayref of ( $source, $target ).
642
643=cut
644
3a2ebbf4 645sub path_witnesses {
646 my( $self, @edge ) = @_;
647 # If edge is an arrayref, cope.
648 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
649 my $e = shift @edge;
650 @edge = @$e;
651 }
652 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 653 return @wits;
3a2ebbf4 654}
655
027d819c 656sub _path_display_label {
508fd430 657 my $self = shift;
658 my @wits = sort @_;
8f9cab7b 659 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
660 if( scalar @wits > $maj ) {
f13b5582 661 # TODO break out a.c. wits
8f9cab7b 662 return 'majority';
663 } else {
664 return join( ', ', @wits );
665 }
666}
667
668
4e5a7b2c 669=head2 as_graphml
8e1394aa 670
4e5a7b2c 671Returns a GraphML representation of the collation. The GraphML will contain
672two graphs. The first expresses the attributes of the readings and the witness
673paths that link them; the second expresses the relationships that link the
674readings. This is the native transfer format for a tradition.
8e1394aa 675
56eefa04 676=begin testing
677
678use Text::Tradition;
679
680my $READINGS = 311;
681my $PATHS = 361;
682
683my $datafile = 't/data/florilegium_tei_ps.xml';
684my $tradition = Text::Tradition->new( 'input' => 'TEI',
685 'name' => 'test0',
686 'file' => $datafile,
687 'linear' => 1 );
688
689ok( $tradition, "Got a tradition object" );
690is( scalar $tradition->witnesses, 13, "Found all witnesses" );
691ok( $tradition->collation, "Tradition has a collation" );
692
693my $c = $tradition->collation;
694is( scalar $c->readings, $READINGS, "Collation has all readings" );
695is( scalar $c->paths, $PATHS, "Collation has all paths" );
696is( scalar $c->relationships, 0, "Collation has all relationships" );
697
698# Add a few relationships
699$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
700$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
701$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
702
703# Now write it to GraphML and parse it again.
704
705my $graphml = $c->as_graphml;
706my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
707is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
708is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
709is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
710
711=end testing
712
8e1394aa 713=cut
714
715sub as_graphml {
3a2ebbf4 716 my( $self ) = @_;
8e1394aa 717
718 # Some namespaces
719 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
720 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
721 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 722 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 723
724 # Create the document and root node
725 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
726 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
727 $graphml->setDocumentElement( $root );
728 $root->setNamespace( $xsi_ns, 'xsi', 0 );
729 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
730
e309421a 731 # Add the data keys for the graph
732 my %graph_data_keys;
733 my $gdi = 0;
1d310495 734 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 735 foreach my $datum ( @graph_attributes ) {
736 $graph_data_keys{$datum} = 'dg'.$gdi++;
737 my $key = $root->addNewChild( $graphml_ns, 'key' );
738 $key->setAttribute( 'attr.name', $datum );
739 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
740 $key->setAttribute( 'for', 'graph' );
741 $key->setAttribute( 'id', $graph_data_keys{$datum} );
742 }
f6066bac 743
8e1394aa 744 # Add the data keys for nodes
ef9d481f 745 my %node_data_keys;
746 my $ndi = 0;
3a2ebbf4 747 my %node_data = (
748 id => 'string',
255875b8 749 text => 'string',
3a2ebbf4 750 rank => 'string',
751 is_start => 'boolean',
752 is_end => 'boolean',
753 is_lacuna => 'boolean',
754 );
755 foreach my $datum ( keys %node_data ) {
910a0a6d 756 $node_data_keys{$datum} = 'dn'.$ndi++;
757 my $key = $root->addNewChild( $graphml_ns, 'key' );
758 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 759 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 760 $key->setAttribute( 'for', 'node' );
761 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 762 }
763
df6d9812 764 # Add the data keys for edges, i.e. witnesses
ef9d481f 765 my $edi = 0;
766 my %edge_data_keys;
3a2ebbf4 767 my %edge_data = (
f523c7a8 768 class => 'string', # Class, deprecated soon
3a2ebbf4 769 witness => 'string', # ID/label for a path
770 relationship => 'string', # ID/label for a relationship
771 extra => 'boolean', # Path key
c84275ff 772 scope => 'string', # Relationship key
3a2ebbf4 773 non_correctable => 'boolean', # Relationship key
774 non_independent => 'boolean', # Relationship key
775 );
776 foreach my $datum ( keys %edge_data ) {
777 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 778 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 779 $key->setAttribute( 'attr.name', $datum );
780 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 781 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 782 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 783 }
3a2ebbf4 784
22222af9 785 # Add the collation graph itself
2c669bca 786 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
787 $sgraph->setAttribute( 'edgedefault', 'directed' );
788 $sgraph->setAttribute( 'id', $self->tradition->name );
789 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
790 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
791 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
792 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
793 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 794
e309421a 795 # Collation attribute data
796 foreach my $datum ( @graph_attributes ) {
2c669bca 797 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
798 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 799 }
8e1394aa 800
801 my $node_ctr = 0;
802 my %node_hash;
22222af9 803 # Add our readings to the graph
3a2ebbf4 804 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 805 # Add to the main graph
806 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 807 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 808 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 809 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 810 foreach my $d ( keys %node_data ) {
811 my $nval = $n->$d;
812 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
813 if defined $nval;
814 }
b15511bf 815 }
816
2c669bca 817 # Add the path edges to the sequence graph
df6d9812 818 my $edge_ctr = 0;
3a2ebbf4 819 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
820 # We add an edge in the graphml for every witness in $e.
508fd430 821 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 822 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
823 $node_hash{ $e->[0] },
824 $node_hash{ $e->[1] } );
2c669bca 825 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 826 $edge_el->setAttribute( 'source', $from );
827 $edge_el->setAttribute( 'target', $to );
828 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 829
830 # It's a witness path, so add the witness
831 my $base = $wit;
832 my $key = $edge_data_keys{'witness'};
833 # Is this an ante-corr witness?
834 my $aclabel = $self->ac_label;
835 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
836 # Keep the base witness
837 $base = $1;
838 # ...and record that this is an 'extra' reading path
839 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
840 }
841 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 842 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 843 }
844 }
845
22222af9 846 # Add the relationship graph to the XML
027d819c 847 $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash,
2626f709 848 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 849
94c00c71 850 # Save and return the thing
851 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 852 return $result;
df6d9812 853}
854
b15511bf 855sub _add_graphml_data {
856 my( $el, $key, $value ) = @_;
b15511bf 857 return unless defined $value;
c9bf3dbf 858 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 859 $data_el->setAttribute( 'key', $key );
860 $data_el->appendText( $value );
8e1394aa 861}
862
4e5a7b2c 863=head2 as_csv
910a0a6d 864
865Returns a CSV alignment table representation of the collation graph, one
2c669bca 866row per witness (or witness uncorrected.)
910a0a6d 867
868=cut
869
870sub as_csv {
3a2ebbf4 871 my( $self ) = @_;
910a0a6d 872 my $table = $self->make_alignment_table;
873 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
874 my @result;
2c669bca 875 # Make the header row
876 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
877 push( @result, decode_utf8( $csv->string ) );
878 # Make the rest of the rows
879 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 880 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 881 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
882 $csv->combine( @row );
910a0a6d 883 push( @result, decode_utf8( $csv->string ) );
884 }
3a2ebbf4 885 return join( "\n", @result );
910a0a6d 886}
887
4e5a7b2c 888=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 889
566f4595 890Return a reference to an alignment table, in a slightly enhanced CollateX
891format which looks like this:
892
893 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 894 tokens => [ { t => "TEXT" }, ... ] },
566f4595 895 { witness => "SIG2",
4e5a7b2c 896 tokens => [ { t => "TEXT" }, ... ] },
566f4595 897 ... ],
898 length => TEXTLEN };
899
900If $use_refs is set to 1, the reading object is returned in the table
901instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 902
903If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 904keys have a true hash value will be included.
2c669bca 905
906=cut
9f3ba6f7 907
910a0a6d 908sub make_alignment_table {
08e0fb85 909 my( $self, $noderefs, $include ) = @_;
0ecb975c 910 # Make sure we can do this
911 throw( "Need a linear graph in order to make an alignment table" )
912 unless $self->linear;
913 $self->calculate_ranks unless $self->end->has_rank;
914
2c669bca 915 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 916 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 917 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 918 if( $include ) {
566f4595 919 next unless $include->{$wit->sigil};
2c669bca 920 }
eca16057 921 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 922 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
923 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 924 push( @{$table->{'alignment'}},
925 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 926 if( $wit->is_layered ) {
927 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 928 $wit->sigil.$self->ac_label );
1f7aa795 929 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 930 push( @{$table->{'alignment'}},
931 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 932 }
933 }
2c669bca 934 return $table;
910a0a6d 935}
936
937sub _make_witness_row {
0e476982 938 my( $path, $positions, $noderefs ) = @_;
910a0a6d 939 my %char_hash;
940 map { $char_hash{$_} = undef } @$positions;
2c669bca 941 my $debug = 0;
910a0a6d 942 foreach my $rdg ( @$path ) {
eca16057 943 my $rtext = $rdg->text;
944 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 945 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 946 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 947 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
948 : { 't' => $rtext };
910a0a6d 949 }
950 my @row = map { $char_hash{$_} } @$positions;
eca16057 951 # Fill in lacuna markers for undef spots in the row
952 my $last_el = shift @row;
953 my @filled_row = ( $last_el );
954 foreach my $el ( @row ) {
0e476982 955 # If we are using node reference, make the lacuna node appear many times
956 # in the table. If not, use the lacuna tag.
957 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 958 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 959 }
960 push( @filled_row, $el );
961 $last_el = $el;
962 }
963 return @filled_row;
910a0a6d 964}
965
0e476982 966# Tiny utility function to say if a table element is a lacuna
967sub _el_is_lacuna {
968 my $el = shift;
2c669bca 969 return 1 if $el->{'t'} eq '#LACUNA#';
970 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
971 && $el->{'t'}->is_lacuna;
0e476982 972 return 0;
973}
974
910a0a6d 975# Helper to turn the witnesses along columns rather than rows. Assumes
976# equal-sized rows.
977sub _turn_table {
978 my( $table ) = @_;
979 my $result = [];
980 return $result unless scalar @$table;
981 my $nrows = scalar @{$table->[0]};
982 foreach my $idx ( 0 .. $nrows - 1 ) {
983 foreach my $wit ( 0 .. $#{$table} ) {
984 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
985 }
986 }
987 return $result;
988}
989
4e5a7b2c 990=head1 NAVIGATION METHODS
910a0a6d 991
4e5a7b2c 992=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 993
994Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 995with $last, for the witness given in $sigil. If a $backup sigil is
996specified (e.g. when walking a layered witness), it will be used wherever
997no $sigil path exists. If there is a base text reading, that will be
998used wherever no path exists for $sigil or $backup.
e2902068 999
1000=cut
1001
910a0a6d 1002# TODO Think about returning some lazy-eval iterator.
b0b4421a 1003# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 1004
e2902068 1005sub reading_sequence {
861c3e27 1006 my( $self, $start, $end, $witness ) = @_;
e2902068 1007
930ff666 1008 $witness = $self->baselabel unless $witness;
e2902068 1009 my @readings = ( $start );
1010 my %seen;
1011 my $n = $start;
3a2ebbf4 1012 while( $n && $n->id ne $end->id ) {
1013 if( exists( $seen{$n->id} ) ) {
63778331 1014 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 1015 }
3a2ebbf4 1016 $seen{$n->id} = 1;
910a0a6d 1017
861c3e27 1018 my $next = $self->next_reading( $n, $witness );
44771cf2 1019 unless( $next ) {
63778331 1020 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1021 }
910a0a6d 1022 push( @readings, $next );
1023 $n = $next;
e2902068 1024 }
1025 # Check that the last reading is our end reading.
1026 my $last = $readings[$#readings];
63778331 1027 throw( "Last reading found from " . $start->text .
1028 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1029 unless $last->id eq $end->id;
e2902068 1030
1031 return @readings;
1032}
1033
4e5a7b2c 1034=head2 next_reading( $reading, $sigil );
8e1394aa 1035
4a8828f0 1036Returns the reading that follows the given reading along the given witness
930ff666 1037path.
8e1394aa 1038
1039=cut
1040
4a8828f0 1041sub next_reading {
e2902068 1042 # Return the successor via the corresponding path.
8e1394aa 1043 my $self = shift;
3a2ebbf4 1044 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1045 return undef unless $answer;
3a2ebbf4 1046 return $self->reading( $answer );
8e1394aa 1047}
1048
4e5a7b2c 1049=head2 prior_reading( $reading, $sigil )
8e1394aa 1050
4a8828f0 1051Returns the reading that precedes the given reading along the given witness
930ff666 1052path.
8e1394aa 1053
1054=cut
1055
4a8828f0 1056sub prior_reading {
e2902068 1057 # Return the predecessor via the corresponding path.
8e1394aa 1058 my $self = shift;
3a2ebbf4 1059 my $answer = $self->_find_linked_reading( 'prior', @_ );
1060 return $self->reading( $answer );
8e1394aa 1061}
1062
4a8828f0 1063sub _find_linked_reading {
861c3e27 1064 my( $self, $direction, $node, $path ) = @_;
1065
1066 # Get a backup if we are dealing with a layered witness
1067 my $alt_path;
1068 my $aclabel = $self->ac_label;
1069 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1070 $alt_path = $1;
1071 }
1072
e2902068 1073 my @linked_paths = $direction eq 'next'
3a2ebbf4 1074 ? $self->sequence->edges_from( $node )
1075 : $self->sequence->edges_to( $node );
e2902068 1076 return undef unless scalar( @linked_paths );
8e1394aa 1077
e2902068 1078 # We have to find the linked path that contains all of the
1079 # witnesses supplied in $path.
1080 my( @path_wits, @alt_path_wits );
4e5a7b2c 1081 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1082 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1083 my $base_le;
1084 my $alt_le;
1085 foreach my $le ( @linked_paths ) {
3a2ebbf4 1086 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1087 $base_le = $le;
910a0a6d 1088 }
508fd430 1089 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1090 if( _is_within( \@path_wits, \@le_wits ) ) {
1091 # This is the right path.
1092 return $direction eq 'next' ? $le->[1] : $le->[0];
1093 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1094 $alt_le = $le;
1095 }
8e1394aa 1096 }
e2902068 1097 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1098 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1099 if $alt_le;
e2902068 1100
1101 # Got this far? Return the base path if it exists.
3a2ebbf4 1102 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1103 if $base_le;
e2902068 1104
1105 # Got this far? We have no appropriate path.
2c669bca 1106 warn "Could not find $direction node from " . $node->id
910a0a6d 1107 . " along path $path";
8e1394aa 1108 return undef;
1109}
1110
4a8828f0 1111# Some set logic.
1112sub _is_within {
1113 my( $set1, $set2 ) = @_;
7854e12e 1114 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1115 foreach my $el ( @$set1 ) {
910a0a6d 1116 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1117 }
1118 return $ret;
1119}
1120
4e5a7b2c 1121# Return the string that joins together a list of witnesses for
1122# display on a single path.
1123sub _witnesses_of_label {
1124 my( $self, $label ) = @_;
1125 my $regex = $self->wit_list_separator;
1126 my @answer = split( /\Q$regex\E/, $label );
1127 return @answer;
b0b4421a 1128}
1129
d4b75f44 1130=head2 common_readings
1131
1132Returns the list of common readings in the graph (i.e. those readings that are
1133shared by all non-lacunose witnesses.)
1134
1135=cut
1136
1137sub common_readings {
1138 my $self = shift;
1139 my @common = grep { $_->is_common } $self->readings;
1140 return @common;
1141}
1142
b0b4421a 1143=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1144
1145Returns the text of a witness (plus its backup, if we are using a layer)
1146as stored in the collation. The text is returned as a string, where the
1147individual readings are joined with spaces and the meta-readings (e.g.
1148lacunae) are omitted. Optional specification of $start and $end allows
1149the generation of a subset of the witness text.
4e5a7b2c 1150
b0b4421a 1151=cut
1152
1153sub path_text {
861c3e27 1154 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1155 $start = $self->start unless $start;
1156 $end = $self->end unless $end;
861c3e27 1157 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1158 return join( ' ', map { $_->text } @path );
1159}
4e5a7b2c 1160
1161=head1 INITIALIZATION METHODS
1162
1163These are mostly for use by parsers.
1164
1165=head2 make_witness_path( $witness )
1166
1167Link the array of readings contained in $witness->path (and in
1168$witness->uncorrected_path if it exists) into collation paths.
1169Clear out the arrays when finished.
de51424a 1170
4e5a7b2c 1171=head2 make_witness_paths
1172
1173Call make_witness_path for all witnesses in the tradition.
1174
1175=cut
930ff666 1176
7e450e44 1177# For use when a collation is constructed from a base text and an apparatus.
1178# We have the sequences of readings and just need to add path edges.
1f7aa795 1179# When we are done, clear out the witness path attributes, as they are no
1180# longer needed.
1181# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1182
6a222840 1183sub make_witness_paths {
1184 my( $self ) = @_;
910a0a6d 1185 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1186 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1187 $self->make_witness_path( $wit );
7854e12e 1188 }
7854e12e 1189}
1190
6a222840 1191sub make_witness_path {
7854e12e 1192 my( $self, $wit ) = @_;
1193 my @chain = @{$wit->path};
15d2d3df 1194 my $sig = $wit->sigil;
7854e12e 1195 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1196 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1197 }
1f7aa795 1198 if( $wit->is_layered ) {
d9e873d0 1199 @chain = @{$wit->uncorrected_path};
1200 foreach my $idx( 0 .. $#chain-1 ) {
1201 my $source = $chain[$idx];
1202 my $target = $chain[$idx+1];
1203 $self->add_path( $source, $target, $sig.$self->ac_label )
1204 unless $self->has_path( $source, $target, $sig );
1205 }
15d2d3df 1206 }
1f7aa795 1207 $wit->clear_path;
1208 $wit->clear_uncorrected_path;
e2902068 1209}
1210
4e5a7b2c 1211=head2 calculate_ranks
1212
1213Calculate the reading ranks (that is, their aligned positions relative
1214to each other) for the graph. This can only be called on linear collations.
1215
1216=cut
1217
910a0a6d 1218sub calculate_ranks {
1219 my $self = shift;
1220 # Walk a version of the graph where every node linked by a relationship
1221 # edge is fundamentally the same node, and do a topological ranking on
1222 # the nodes in this graph.
c9bf3dbf 1223 my $topo_graph = Graph->new();
910a0a6d 1224 my %rel_containers;
1225 my $rel_ctr = 0;
1226 # Add the nodes
1227 foreach my $r ( $self->readings ) {
3a2ebbf4 1228 next if exists $rel_containers{$r->id};
910a0a6d 1229 my @rels = $r->related_readings( 'colocated' );
1230 if( @rels ) {
1231 # Make a relationship container.
1232 push( @rels, $r );
c9bf3dbf 1233 my $rn = 'rel_container_' . $rel_ctr++;
1234 $topo_graph->add_vertex( $rn );
910a0a6d 1235 foreach( @rels ) {
3a2ebbf4 1236 $rel_containers{$_->id} = $rn;
910a0a6d 1237 }
1238 } else {
1239 # Add a new node to mirror the old node.
3a2ebbf4 1240 $rel_containers{$r->id} = $r->id;
1241 $topo_graph->add_vertex( $r->id );
910a0a6d 1242 }
4a8828f0 1243 }
3a1f2523 1244
3a2ebbf4 1245 # Add the edges.
910a0a6d 1246 foreach my $r ( $self->readings ) {
3a2ebbf4 1247 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1248 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1249 $rel_containers{$n} );
4e5a7b2c 1250 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1251 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1252 }
1253 }
1254
1255 # Now do the rankings, starting with the start node.
3a2ebbf4 1256 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1257 my $node_ranks = { $topo_start => 0 };
910a0a6d 1258 my @curr_origin = ( $topo_start );
1259 # A little iterative function.
1260 while( @curr_origin ) {
c9bf3dbf 1261 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1262 }
1263 # Transfer our rankings from the topological graph to the real one.
1264 foreach my $r ( $self->readings ) {
3a2ebbf4 1265 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1266 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1267 } else {
63778331 1268 # Die. Find the last rank we calculated.
1269 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1270 <=> $node_ranks->{$rel_containers{$b->id}} }
1271 $self->readings;
1272 my $last = pop @all_defined;
1273 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1274 }
de51424a 1275 }
8e1394aa 1276}
3a1f2523 1277
910a0a6d 1278sub _assign_rank {
c9bf3dbf 1279 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1280 # Look at each of the children of @current_nodes. If all the child's
1281 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1282 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1283 # parent gets a rank.
910a0a6d 1284 my @next_nodes;
1285 foreach my $c ( @current_nodes ) {
c9bf3dbf 1286 warn "Current reading $c has no rank!"
1287 unless exists $node_ranks->{$c};
1288 # print STDERR "Looking at child of node $c, rank "
1289 # . $node_ranks->{$c} . "\n";
1290 foreach my $child ( $graph->successors( $c ) ) {
1291 next if exists $node_ranks->{$child};
910a0a6d 1292 my $highest_rank = -1;
1293 my $skip = 0;
c9bf3dbf 1294 foreach my $parent ( $graph->predecessors( $child ) ) {
1295 if( exists $node_ranks->{$parent} ) {
1296 $highest_rank = $node_ranks->{$parent}
1297 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1298 } else {
1299 $skip = 1;
1300 last;
1301 }
1302 }
1303 next if $skip;
c9bf3dbf 1304 my $c_rank = $highest_rank + 1;
1305 # print STDERR "Assigning rank $c_rank to node $child \n";
1306 $node_ranks->{$child} = $c_rank;
910a0a6d 1307 push( @next_nodes, $child );
1308 }
1309 }
1310 return @next_nodes;
4cdd82f1 1311}
910a0a6d 1312
4e5a7b2c 1313=head2 flatten_ranks
1314
1315A convenience method for parsing collation data. Searches the graph for readings
1316with the same text at the same rank, and merges any that are found.
1317
1318=cut
1319
0e476982 1320sub flatten_ranks {
1321 my $self = shift;
1322 my %unique_rank_rdg;
1323 foreach my $rdg ( $self->readings ) {
1324 next unless $rdg->has_rank;
1325 my $key = $rdg->rank . "||" . $rdg->text;
1326 if( exists $unique_rank_rdg{$key} ) {
1327 # Combine!
56eefa04 1328 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1329 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1330 # TODO see if this now makes a common point.
0e476982 1331 } else {
1332 $unique_rank_rdg{$key} = $rdg;
1333 }
1334 }
1335}
1336
4633f9e4 1337=head2 remove_collations
1338
1339Another convenience method for parsing. Removes all 'collation' relationships
1340that were defined in order to get the reading ranks to be correct.
1341
1342=begin testing
1343
1344use Text::Tradition;
1345
1346my $cxfile = 't/data/Collatex-16.xml';
1347my $t = Text::Tradition->new(
1348 'name' => 'inline',
1349 'input' => 'CollateX',
1350 'file' => $cxfile,
1351 );
1352my $c = $t->collation;
1353
1354isnt( $c->reading('n23')->rank, $c->reading('n9')->rank, "Rank skew exists" );
1355$c->add_relationship( 'n23', 'n9', { 'type' => 'collated', 'scope' => 'local' } );
1356is( scalar $c->relationships, 4, "Found all expected relationships" );
1357$c->remove_collations;
1358is( scalar $c->relationships, 3, "Collated relationships now gone" );
1359is( $c->reading('n23')->rank, $c->reading('n9')->rank, "Aligned ranks were preserved" );
1360
1361=end testing
1362
1363=cut
1364
1365sub remove_collations {
1366 my $self = shift;
1367 foreach my $reledge ( $self->relationships ) {
1368 my $relobj = $self->relations->get_relationship( $reledge );
1369 if( $relobj && $relobj->type eq 'collated' ) {
1370 $self->relations->delete_relationship( $reledge );
1371 }
1372 }
1373}
1374
1375
d4b75f44 1376=head2 calculate_common_readings
1377
1378Goes through the graph identifying the readings that appear in every witness
1379(apart from those with lacunae at that spot.) Marks them as common and returns
1380the list.
1381
1382=begin testing
1383
1384use Text::Tradition;
1385
1386my $cxfile = 't/data/Collatex-16.xml';
1387my $t = Text::Tradition->new(
1388 'name' => 'inline',
1389 'input' => 'CollateX',
1390 'file' => $cxfile,
1391 );
1392my $c = $t->collation;
1393
1394my @common = $c->calculate_common_readings();
1395is( scalar @common, 8, "Found correct number of common readings" );
1396my @marked = sort $c->common_readings();
1397is( scalar @common, 8, "All common readings got marked as such" );
1398my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1399is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1400
1401=end testing
1402
1403=cut
1404
1405sub calculate_common_readings {
1406 my $self = shift;
1407 my @common;
1408 my $table = $self->make_alignment_table( 1 );
1409 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1410 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1411 my %hash;
1412 foreach my $r ( @row ) {
1413 if( $r ) {
1414 $hash{$r->id} = $r unless $r->is_meta;
1415 } else {
1416 $hash{'UNDEF'} = $r;
1417 }
1418 }
1419 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1420 my( $r ) = values %hash;
1421 $r->is_common( 1 );
1422 push( @common, $r );
1423 }
1424 }
1425 return @common;
1426}
1427
861c3e27 1428=head2 text_from_paths
1429
1430Calculate the text array for all witnesses from the path, for later consistency
1431checking. Only to be used if there is no non-graph-based way to know the
1432original texts.
1433
1434=cut
1435
1436sub text_from_paths {
1437 my $self = shift;
1438 foreach my $wit ( $self->tradition->witnesses ) {
1439 my @text = split( /\s+/,
1440 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1441 $wit->text( \@text );
1442 if( $wit->is_layered ) {
1443 my @uctext = split( /\s+/,
1444 $self->reading_sequence( $self->start, $self->end,
1445 $wit->sigil.$self->ac_label ) );
1446 $wit->text( \@uctext );
1447 }
1448 }
1449}
0e476982 1450
4e5a7b2c 1451=head1 UTILITY FUNCTIONS
1452
1453=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1454
4e5a7b2c 1455Find the last reading that occurs in sequence before both the given readings.
1456
1457=head2 common_successor( $reading_a, $reading_b )
1458
1459Find the first reading that occurs in sequence after both the given readings.
1460
22222af9 1461=begin testing
1462
1463use Text::Tradition;
1464
1465my $cxfile = 't/data/Collatex-16.xml';
1466my $t = Text::Tradition->new(
1467 'name' => 'inline',
1468 'input' => 'CollateX',
1469 'file' => $cxfile,
1470 );
1471my $c = $t->collation;
1472
4e5a7b2c 1473is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1474 'n20', "Found correct common predecessor" );
4e5a7b2c 1475is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1476 '#END#', "Found correct common successor" );
1477
4e5a7b2c 1478is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1479 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1480is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1481 '#END#', "Found correct common successor for readings on same path" );
1482
1483=end testing
1484
1485=cut
1486
1487## Return the closest reading that is a predecessor of both the given readings.
1488sub common_predecessor {
1489 my $self = shift;
4e5a7b2c 1490 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1491 return $self->_common_in_path( $r1, $r2, 'predecessors' );
22222af9 1492}
1493
1494sub common_successor {
1495 my $self = shift;
4e5a7b2c 1496 my( $r1, $r2 ) = $self->_objectify_args( @_ );
027d819c 1497 return $self->_common_in_path( $r1, $r2, 'successors' );
22222af9 1498}
1499
027d819c 1500sub _common_in_path {
22222af9 1501 my( $self, $r1, $r2, $dir ) = @_;
1502 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1503 $iter = $self->end->rank - $iter if $dir eq 'successors';
1504 my @candidates;
1505 my @last_checked = ( $r1, $r2 );
1506 my %all_seen;
1507 while( !@candidates ) {
1508 my @new_lc;
1509 foreach my $lc ( @last_checked ) {
1510 foreach my $p ( $lc->$dir ) {
1511 if( $all_seen{$p->id} ) {
1512 push( @candidates, $p );
1513 } else {
1514 $all_seen{$p->id} = 1;
1515 push( @new_lc, $p );
1516 }
1517 }
1518 }
1519 @last_checked = @new_lc;
1520 }
1521 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1522 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1523}
1524
63778331 1525sub throw {
1526 Text::Tradition::Error->throw(
1527 'ident' => 'Collation error',
1528 'message' => $_[0],
1529 );
1530}
1531
dd3b58b0 1532no Moose;
1533__PACKAGE__->meta->make_immutable;
e867486f 1534
027d819c 1535=head1 LICENSE
e867486f 1536
027d819c 1537This package is free software and is provided "as is" without express
1538or implied warranty. You can redistribute it and/or modify it under
1539the same terms as Perl itself.
e867486f 1540
027d819c 1541=head1 AUTHOR
e867486f 1542
027d819c 1543Tara L Andrews E<lt>aurum@cpan.orgE<gt>