allow specification of options for dot/svg;
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
63778331 10use Text::Tradition::Error;
df6d9812 11use XML::LibXML;
a344ffcf 12use XML::LibXML::XPathContext;
dd3b58b0 13use Moose;
14
3a2ebbf4 15has 'sequence' => (
d047cd52 16 is => 'ro',
3a2ebbf4 17 isa => 'Graph',
18 default => sub { Graph->new() },
d047cd52 19 handles => {
3a2ebbf4 20 paths => 'edges',
d047cd52 21 },
d047cd52 22 );
3a2ebbf4 23
24has 'relations' => (
25 is => 'ro',
22222af9 26 isa => 'Text::Tradition::Collation::RelationshipStore',
27 handles => {
28 relationships => 'relationships',
29 related_readings => 'related_readings',
30 },
31 writer => '_set_relations',
3a2ebbf4 32 );
dd3b58b0 33
3a2ebbf4 34has 'tradition' => (
35 is => 'ro',
d047cd52 36 isa => 'Text::Tradition',
8d9a1cd8 37 weak_ref => 1,
d047cd52 38 );
dd3b58b0 39
3a2ebbf4 40has 'readings' => (
41 isa => 'HashRef[Text::Tradition::Collation::Reading]',
42 traits => ['Hash'],
43 handles => {
44 reading => 'get',
45 _add_reading => 'set',
46 del_reading => 'delete',
47 has_reading => 'exists',
48 readings => 'values',
49 },
50 default => sub { {} },
51 );
910a0a6d 52
4a8828f0 53has 'wit_list_separator' => (
7854e12e 54 is => 'rw',
55 isa => 'Str',
56 default => ', ',
57 );
58
59has 'baselabel' => (
60 is => 'rw',
61 isa => 'Str',
62 default => 'base text',
63 );
4a8828f0 64
15d2d3df 65has 'linear' => (
66 is => 'rw',
67 isa => 'Bool',
68 default => 1,
69 );
c84275ff 70
ef9d481f 71has 'ac_label' => (
72 is => 'rw',
73 isa => 'Str',
74 default => ' (a.c.)',
75 );
3a2ebbf4 76
77has 'start' => (
78 is => 'ro',
79 isa => 'Text::Tradition::Collation::Reading',
80 writer => '_set_start',
81 weak_ref => 1,
82 );
83
84has 'end' => (
85 is => 'ro',
86 isa => 'Text::Tradition::Collation::Reading',
87 writer => '_set_end',
88 weak_ref => 1,
89 );
1f563ac3 90
4e5a7b2c 91=head1 NAME
92
93Text::Tradition::Collation - a software model for a text collation
94
95=head1 SYNOPSIS
96
97 use Text::Tradition;
98 my $t = Text::Tradition->new(
99 'name' => 'this is a text',
100 'input' => 'TEI',
101 'file' => '/path/to/tei_parallel_seg_file.xml' );
102
103 my $c = $t->collation;
104 my @readings = $c->readings;
105 my @paths = $c->paths;
106 my @relationships = $c->relationships;
107
108 my $svg_variant_graph = $t->collation->as_svg();
109
110=head1 DESCRIPTION
111
112Text::Tradition is a library for representation and analysis of collated
113texts, particularly medieval ones. The Collation is the central feature of
114a Tradition, where the text, its sequence of readings, and its relationships
115between readings are actually kept.
116
117=head1 CONSTRUCTOR
118
119=head2 new
120
121The constructor. Takes a hash or hashref of the following arguments:
122
123=over
124
125=item * tradition - The Text::Tradition object to which the collation
126belongs. Required.
127
128=item * linear - Whether the collation should be linear; that is, whether
129transposed readings should be treated as two linked readings rather than one,
130and therefore whether the collation graph is acyclic. Defaults to true.
131
4e5a7b2c 132=item * baselabel - The default label for the path taken by a base text
133(if any). Defaults to 'base text'.
134
135=item * wit_list_separator - The string to join a list of witnesses for
136purposes of making labels in display graphs. Defaults to ', '.
137
138=item * ac_label - The extra label to tack onto a witness sigil when
139representing another layer of path for the given witness - that is, when
140a text has more than one possible reading due to scribal corrections or
141the like. Defaults to ' (a.c.)'.
142
143=back
144
145=head1 ACCESSORS
146
147=head2 tradition
148
149=head2 linear
150
4e5a7b2c 151=head2 wit_list_separator
152
153=head2 baselabel
154
155=head2 ac_label
156
157Simple accessors for collation attributes.
158
159=head2 start
160
161The meta-reading at the start of every witness path.
162
163=head2 end
164
165The meta-reading at the end of every witness path.
166
167=head2 readings
168
169Returns all Reading objects in the graph.
170
171=head2 reading( $id )
172
173Returns the Reading object corresponding to the given ID.
174
175=head2 add_reading( $reading_args )
176
177Adds a new reading object to the collation.
178See L<Text::Tradition::Collation::Reading> for the available arguments.
179
180=head2 del_reading( $object_or_id )
181
182Removes the given reading from the collation, implicitly removing its
183paths and relationships.
184
185=head2 merge_readings( $main, $second )
186
187Merges the $second reading into the $main one.
188The arguments may be either readings or reading IDs.
189
190=head2 has_reading( $id )
191
192Predicate to see whether a given reading ID is in the graph.
193
194=head2 reading_witnesses( $object_or_id )
195
196Returns a list of sigils whose witnesses contain the reading.
197
198=head2 paths
199
200Returns all reading paths within the document - that is, all edges in the
201collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
202
203=head2 add_path( $source, $target, $sigil )
204
205Links the given readings in the collation in sequence, under the given witness
206sigil. The readings may be specified by object or ID.
207
208=head2 del_path( $source, $target, $sigil )
209
210Links the given readings in the collation in sequence, under the given witness
211sigil. The readings may be specified by object or ID.
212
213=head2 has_path( $source, $target );
214
215Returns true if the two readings are linked in sequence in any witness.
216The readings may be specified by object or ID.
217
218=head2 relationships
219
220Returns all Relationship objects in the collation.
221
222=head2 add_relationship( $reading, $other_reading, $options )
223
224Adds a new relationship of the type given in $options between the two readings,
225which may be specified by object or ID. Returns a value of ( $status, @vectors)
226where $status is true on success, and @vectors is a list of relationship edges
227that were ultimately added.
228See L<Text::Tradition::Collation::Relationship> for the available options.
229
230=cut
dd3b58b0 231
d047cd52 232sub BUILD {
3a2ebbf4 233 my $self = shift;
22222af9 234 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 235 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
236 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 237}
784877d9 238
3a2ebbf4 239### Reading construct/destruct functions
240
241sub add_reading {
242 my( $self, $reading ) = @_;
243 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
244 my %args = %$reading;
245 $reading = Text::Tradition::Collation::Reading->new(
246 'collation' => $self,
247 %args );
248 }
249 # First check to see if a reading with this ID exists.
250 if( $self->reading( $reading->id ) ) {
63778331 251 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 252 }
253 $self->_add_reading( $reading->id => $reading );
254 # Once the reading has been added, put it in both graphs.
255 $self->sequence->add_vertex( $reading->id );
22222af9 256 $self->relations->add_reading( $reading->id );
3a2ebbf4 257 return $reading;
eca16057 258};
259
3a2ebbf4 260around del_reading => sub {
261 my $orig = shift;
262 my $self = shift;
263 my $arg = shift;
264
265 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
266 $arg = $arg->id;
267 }
3a2ebbf4 268 # Remove the reading from the graphs.
269 $self->sequence->delete_vertex( $arg );
22222af9 270 $self->relations->delete_reading( $arg );
3a2ebbf4 271
272 # Carry on.
273 $self->$orig( $arg );
274};
7854e12e 275
3a2ebbf4 276# merge_readings( $main, $to_be_deleted );
7854e12e 277
3a2ebbf4 278sub merge_readings {
279 my $self = shift;
280
281 # We only need the IDs for adding paths to the graph, not the reading
282 # objects themselves.
49d4f2ac 283 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 284
285 # The kept reading should inherit the paths and the relationships
286 # of the deleted reading.
287 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
288 my @vector = ( $kept );
289 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
290 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 291 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 292 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
293 $self->sequence->add_edge( @vector );
294 my $fwits = $self->sequence->get_edge_attributes( @vector );
295 @wits{keys %$fwits} = values %$fwits;
296 $self->sequence->set_edge_attributes( @vector, \%wits );
297 }
22222af9 298 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 299
300 # Do the deletion deed.
49d4f2ac 301 if( $combine_char ) {
302 my $kept_obj = $self->reading( $kept );
303 my $new_text = join( $combine_char, $kept_obj->text,
304 $self->reading( $deleted )->text );
305 $kept_obj->alter_text( $new_text );
306 }
3a2ebbf4 307 $self->del_reading( $deleted );
308}
7854e12e 309
3265b0ce 310
3a2ebbf4 311# Helper function for manipulating the graph.
312sub _stringify_args {
313 my( $self, $first, $second, $arg ) = @_;
314 $first = $first->id
315 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
316 $second = $second->id
317 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
318 return( $first, $second, $arg );
319}
df6d9812 320
4e5a7b2c 321# Helper function for manipulating the graph.
322sub _objectify_args {
323 my( $self, $first, $second, $arg ) = @_;
324 $first = $self->reading( $first )
325 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
326 $second = $self->reading( $second )
327 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
328 return( $first, $second, $arg );
329}
3a2ebbf4 330### Path logic
331
332sub add_path {
333 my $self = shift;
334
335 # We only need the IDs for adding paths to the graph, not the reading
336 # objects themselves.
337 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
338
339 # Connect the readings
340 $self->sequence->add_edge( $source, $target );
341 # Note the witness in question
342 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 343};
344
3a2ebbf4 345sub del_path {
346 my $self = shift;
49d4f2ac 347 my @args;
348 if( ref( $_[0] ) eq 'ARRAY' ) {
349 my $e = shift @_;
350 @args = ( @$e, @_ );
351 } else {
352 @args = @_;
353 }
3a2ebbf4 354
355 # We only need the IDs for adding paths to the graph, not the reading
356 # objects themselves.
49d4f2ac 357 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 358
359 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 360 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 361 }
362 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
363 $self->sequence->delete_edge( $source, $target );
364 }
784877d9 365}
366
3a2ebbf4 367
15d2d3df 368# Extra graph-alike utility
369sub has_path {
3a2ebbf4 370 my $self = shift;
371 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
372 return undef unless $self->sequence->has_edge( $source, $target );
373 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 374}
375
4e5a7b2c 376=head2 clear_witness( @sigil_list )
3a2ebbf4 377
4e5a7b2c 378Clear the given witnesses out of the collation entirely, removing references
379to them in paths, and removing readings that belong only to them. Should only
380be called via $tradition->del_witness.
3a2ebbf4 381
382=cut
383
4e5a7b2c 384sub clear_witness {
385 my( $self, @sigils ) = @_;
386
387 # Clear the witness(es) out of the paths
388 foreach my $e ( $self->paths ) {
389 foreach my $sig ( @sigils ) {
390 $self->del_path( $e, $sig );
391 }
392 }
393
394 # Clear out the newly unused readings
395 foreach my $r ( $self->readings ) {
396 unless( $self->reading_witnesses( $r ) ) {
397 $self->del_reading( $r );
398 }
399 }
400}
3a2ebbf4 401
402sub add_relationship {
403 my $self = shift;
22222af9 404 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 405 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 406 $self->reading( $source ), $target, $self->reading( $target ), $opts );
407 # Force a full rank recalculation every time. Yuck.
63778331 408 $self->calculate_ranks() if $self->end->has_rank;
409 return @vectors;
22222af9 410}
ef9d481f 411
22222af9 412=head2 reading_witnesses( $reading )
910a0a6d 413
22222af9 414Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 415
22222af9 416=cut
1d310495 417
1d310495 418sub reading_witnesses {
419 my( $self, $reading ) = @_;
420 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 421 # arbitrarily chosen "incoming". Thus, special-case the start node.
422 if( $reading eq $self->start ) {
423 return map { $_->sigil } $self->tradition->witnesses;
424 }
1d310495 425 my %all_witnesses;
426 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
427 my $wits = $self->sequence->get_edge_attributes( @$e );
428 @all_witnesses{ keys %$wits } = 1;
429 }
430 return keys %all_witnesses;
910a0a6d 431}
432
4e5a7b2c 433=head1 OUTPUT METHODS
8e1394aa 434
0ecb975c 435=head2 as_svg( \%options )
8e1394aa 436
0068967c 437Returns an SVG string that represents the graph, via as_dot and graphviz.
0ecb975c 438See as_dot for a list of options.
8e1394aa 439
440=cut
441
442sub as_svg {
0ecb975c 443 my( $self, $opts ) = @_;
3a2ebbf4 444
8e1394aa 445 my @cmd = qw/dot -Tsvg/;
446 my( $svg, $err );
910a0a6d 447 my $dotfile = File::Temp->new();
a344ffcf 448 ## USE FOR DEBUGGING
eca16057 449 # $dotfile->unlink_on_destroy(0);
910a0a6d 450 binmode $dotfile, ':utf8';
0ecb975c 451 print $dotfile $self->as_dot( $opts );
910a0a6d 452 push( @cmd, $dotfile->filename );
453 run( \@cmd, ">", binary(), \$svg );
a344ffcf 454 # HACK part 3 - remove silent node+edge
455 my $parser = XML::LibXML->new();
456 my $svgdom = $parser->parse_string( $svg );
457 my $xpc = XML::LibXML::XPathContext->new( $svgdom->documentElement );
458 $xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
459 my @hacknodes = $xpc->findnodes( '//svg:g[contains(child::svg:title, "#SILENT#")]' );
460 foreach my $h ( @hacknodes ) {
461 $h->parentNode->removeChild( $h );
462 }
463 return decode_utf8( $svgdom->toString() );
8e1394aa 464}
465
b22576c6 466
0ecb975c 467=head2 as_dot( \%options )
b22576c6 468
0ecb975c 469Returns a string that is the collation graph expressed in dot
470(i.e. GraphViz) format. Options include:
b22576c6 471
0ecb975c 472=over 4
b22576c6 473
0ecb975c 474=item * from
b22576c6 475
0ecb975c 476=item * to
df6d9812 477
0ecb975c 478=item * color_common
479
480=back
df6d9812 481
482=cut
483
484sub as_dot {
0ecb975c 485 my( $self, $opts ) = @_;
486 my $startrank = $opts->{'from'} if $opts;
487 my $endrank = $opts->{'to'} if $opts;
488 my $color_common = $opts->{'color_common'} if $opts;
b22576c6 489
490 # Check the arguments
491 if( $startrank ) {
492 return if $endrank && $startrank > $endrank;
493 return if $startrank > $self->end->rank;
494 }
495 if( defined $endrank ) {
496 return if $endrank < 0;
f1b3b33a 497 $endrank = undef if $endrank == $self->end->rank;
b22576c6 498 }
499
df6d9812 500 # TODO consider making some of these things configurable
67da8d6c 501 my $graph_name = $self->tradition->name;
502 $graph_name =~ s/[^\w\s]//g;
503 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 504
505 my %graph_attrs = (
506 'rankdir' => 'LR',
507 'bgcolor' => 'none',
508 );
509 my %node_attrs = (
510 'fontsize' => 11,
511 'fillcolor' => 'white',
512 'style' => 'filled',
513 'shape' => 'ellipse'
514 );
515 my %edge_attrs = (
516 'arrowhead' => 'open',
517 'color' => '#000000',
518 'fontcolor' => '#000000',
519 );
520
67da8d6c 521 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 522 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
523 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 524
b22576c6 525 # Output substitute start/end readings if necessary
526 if( $startrank ) {
527 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
528 }
529 if( $endrank ) {
530 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
531 }
a344ffcf 532 if( !$startrank && !$endrank ) {
533 ## HACK part 1
534 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
535 $dot .= "\t\"#SILENT#\" [ color=white,penwidth=0,label=\"\" ];"
536 }
b22576c6 537 my %used; # Keep track of the readings that actually appear in the graph
df6d9812 538 foreach my $reading ( $self->readings ) {
b22576c6 539 # Only output readings within our rank range.
540 next if $startrank && $reading->rank < $startrank;
541 next if $endrank && $reading->rank > $endrank;
542 $used{$reading->id} = 1;
910a0a6d 543 # Need not output nodes without separate labels
3a2ebbf4 544 next if $reading->id eq $reading->text;
d4b75f44 545 my $rattrs;
30f0df34 546 my $label = $reading->text;
8f9cab7b 547 $label =~ s/\"/\\\"/g;
d4b75f44 548 $rattrs->{'label'} = $label;
0ecb975c 549 $rattrs->{'fillcolor'} = '#b3f36d' if $reading->is_common && $color_common;
d4b75f44 550 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 551 }
3a2ebbf4 552
b22576c6 553 # Add the real edges
554 my @edges = $self->paths;
3bdec618 555 my( %substart, %subend );
b22576c6 556 foreach my $edge ( @edges ) {
557 # Do we need to output this edge?
508fd430 558 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
f13b5582 559 my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
560 my $variables = { %edge_attrs, 'label' => $label };
b22576c6 561 # Account for the rank gap if necessary
b0b4421a 562 if( $self->reading( $edge->[1] )->has_rank
563 && $self->reading( $edge->[0] )->has_rank
564 && $self->reading( $edge->[1] )->rank
565 - $self->reading( $edge->[0] )->rank > 1 ) {
566 $variables->{'minlen'} = $self->reading( $edge->[1] )->rank
b22576c6 567 - $self->reading( $edge->[0] )->rank;
b0b4421a 568 }
508fd430 569 # EXPERIMENTAL: make edge width reflect no. of witnesses
570 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
571 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
572
f13b5582 573 my $varopts = _dot_attr_string( $variables );
574 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
575 $edge->[0], $edge->[1], $varopts );
3bdec618 576 } elsif( $used{$edge->[0]} ) {
577 $subend{$edge->[0]} = 1;
578 } elsif( $used{$edge->[1]} ) {
579 $substart{$edge->[1]} = 1;
b22576c6 580 }
df6d9812 581 }
3bdec618 582 # Add substitute start and end edges if necessary
583 foreach my $node ( keys %substart ) {
584 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 585 my $variables = { %edge_attrs, 'label' => $witstr };
586 my $varopts = _dot_attr_string( $variables );
587 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 588 }
589 foreach my $node ( keys %subend ) {
590 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 591 my $variables = { %edge_attrs, 'label' => $witstr };
592 my $varopts = _dot_attr_string( $variables );
593 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 594 }
a344ffcf 595 # HACK part 2
596 if( !$startrank && !$endrank ) {
597 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
598 }
3bdec618 599
df6d9812 600 $dot .= "}\n";
601 return $dot;
602}
603
f13b5582 604sub _dot_attr_string {
605 my( $hash ) = @_;
606 my @attrs;
607 foreach my $k ( sort keys %$hash ) {
608 my $v = $hash->{$k};
609 push( @attrs, $k.'="'.$v.'"' );
610 }
611 return( '[ ' . join( ', ', @attrs ) . ' ]' );
612}
613
3a2ebbf4 614sub path_witnesses {
615 my( $self, @edge ) = @_;
616 # If edge is an arrayref, cope.
617 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
618 my $e = shift @edge;
619 @edge = @$e;
620 }
621 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 622 return @wits;
3a2ebbf4 623}
624
8f9cab7b 625sub path_display_label {
508fd430 626 my $self = shift;
627 my @wits = sort @_;
8f9cab7b 628 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
629 if( scalar @wits > $maj ) {
f13b5582 630 # TODO break out a.c. wits
8f9cab7b 631 return 'majority';
632 } else {
633 return join( ', ', @wits );
634 }
635}
636
637
4e5a7b2c 638=head2 as_graphml
8e1394aa 639
4e5a7b2c 640Returns a GraphML representation of the collation. The GraphML will contain
641two graphs. The first expresses the attributes of the readings and the witness
642paths that link them; the second expresses the relationships that link the
643readings. This is the native transfer format for a tradition.
8e1394aa 644
56eefa04 645=begin testing
646
647use Text::Tradition;
648
649my $READINGS = 311;
650my $PATHS = 361;
651
652my $datafile = 't/data/florilegium_tei_ps.xml';
653my $tradition = Text::Tradition->new( 'input' => 'TEI',
654 'name' => 'test0',
655 'file' => $datafile,
656 'linear' => 1 );
657
658ok( $tradition, "Got a tradition object" );
659is( scalar $tradition->witnesses, 13, "Found all witnesses" );
660ok( $tradition->collation, "Tradition has a collation" );
661
662my $c = $tradition->collation;
663is( scalar $c->readings, $READINGS, "Collation has all readings" );
664is( scalar $c->paths, $PATHS, "Collation has all paths" );
665is( scalar $c->relationships, 0, "Collation has all relationships" );
666
667# Add a few relationships
668$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
669$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
670$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
671
672# Now write it to GraphML and parse it again.
673
674my $graphml = $c->as_graphml;
675my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
676is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
677is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
678is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
679
680=end testing
681
8e1394aa 682=cut
683
684sub as_graphml {
3a2ebbf4 685 my( $self ) = @_;
8e1394aa 686
687 # Some namespaces
688 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
689 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
690 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 691 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 692
693 # Create the document and root node
694 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
695 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
696 $graphml->setDocumentElement( $root );
697 $root->setNamespace( $xsi_ns, 'xsi', 0 );
698 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
699
e309421a 700 # Add the data keys for the graph
701 my %graph_data_keys;
702 my $gdi = 0;
1d310495 703 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 704 foreach my $datum ( @graph_attributes ) {
705 $graph_data_keys{$datum} = 'dg'.$gdi++;
706 my $key = $root->addNewChild( $graphml_ns, 'key' );
707 $key->setAttribute( 'attr.name', $datum );
708 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
709 $key->setAttribute( 'for', 'graph' );
710 $key->setAttribute( 'id', $graph_data_keys{$datum} );
711 }
f6066bac 712
8e1394aa 713 # Add the data keys for nodes
ef9d481f 714 my %node_data_keys;
715 my $ndi = 0;
3a2ebbf4 716 my %node_data = (
717 id => 'string',
255875b8 718 text => 'string',
3a2ebbf4 719 rank => 'string',
720 is_start => 'boolean',
721 is_end => 'boolean',
722 is_lacuna => 'boolean',
723 );
724 foreach my $datum ( keys %node_data ) {
910a0a6d 725 $node_data_keys{$datum} = 'dn'.$ndi++;
726 my $key = $root->addNewChild( $graphml_ns, 'key' );
727 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 728 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 729 $key->setAttribute( 'for', 'node' );
730 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 731 }
732
df6d9812 733 # Add the data keys for edges, i.e. witnesses
ef9d481f 734 my $edi = 0;
735 my %edge_data_keys;
3a2ebbf4 736 my %edge_data = (
f523c7a8 737 class => 'string', # Class, deprecated soon
3a2ebbf4 738 witness => 'string', # ID/label for a path
739 relationship => 'string', # ID/label for a relationship
740 extra => 'boolean', # Path key
c84275ff 741 scope => 'string', # Relationship key
3a2ebbf4 742 non_correctable => 'boolean', # Relationship key
743 non_independent => 'boolean', # Relationship key
744 );
745 foreach my $datum ( keys %edge_data ) {
746 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 747 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 748 $key->setAttribute( 'attr.name', $datum );
749 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 750 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 751 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 752 }
3a2ebbf4 753
22222af9 754 # Add the collation graph itself
2c669bca 755 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
756 $sgraph->setAttribute( 'edgedefault', 'directed' );
757 $sgraph->setAttribute( 'id', $self->tradition->name );
758 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
759 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
760 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
761 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
762 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 763
e309421a 764 # Collation attribute data
765 foreach my $datum ( @graph_attributes ) {
2c669bca 766 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
767 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 768 }
8e1394aa 769
770 my $node_ctr = 0;
771 my %node_hash;
22222af9 772 # Add our readings to the graph
3a2ebbf4 773 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 774 # Add to the main graph
775 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 776 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 777 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 778 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 779 foreach my $d ( keys %node_data ) {
780 my $nval = $n->$d;
781 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
782 if defined $nval;
783 }
b15511bf 784 }
785
2c669bca 786 # Add the path edges to the sequence graph
df6d9812 787 my $edge_ctr = 0;
3a2ebbf4 788 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
789 # We add an edge in the graphml for every witness in $e.
508fd430 790 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 791 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
792 $node_hash{ $e->[0] },
793 $node_hash{ $e->[1] } );
2c669bca 794 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 795 $edge_el->setAttribute( 'source', $from );
796 $edge_el->setAttribute( 'target', $to );
797 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 798
799 # It's a witness path, so add the witness
800 my $base = $wit;
801 my $key = $edge_data_keys{'witness'};
802 # Is this an ante-corr witness?
803 my $aclabel = $self->ac_label;
804 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
805 # Keep the base witness
806 $base = $1;
807 # ...and record that this is an 'extra' reading path
808 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
809 }
810 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 811 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 812 }
813 }
814
22222af9 815 # Add the relationship graph to the XML
2626f709 816 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
817 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 818
94c00c71 819 # Save and return the thing
820 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 821 return $result;
df6d9812 822}
823
b15511bf 824sub _add_graphml_data {
825 my( $el, $key, $value ) = @_;
b15511bf 826 return unless defined $value;
c9bf3dbf 827 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 828 $data_el->setAttribute( 'key', $key );
829 $data_el->appendText( $value );
8e1394aa 830}
831
4e5a7b2c 832=head2 as_csv
910a0a6d 833
834Returns a CSV alignment table representation of the collation graph, one
2c669bca 835row per witness (or witness uncorrected.)
910a0a6d 836
837=cut
838
839sub as_csv {
3a2ebbf4 840 my( $self ) = @_;
910a0a6d 841 my $table = $self->make_alignment_table;
842 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
843 my @result;
2c669bca 844 # Make the header row
845 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
846 push( @result, decode_utf8( $csv->string ) );
847 # Make the rest of the rows
848 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 849 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 850 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
851 $csv->combine( @row );
910a0a6d 852 push( @result, decode_utf8( $csv->string ) );
853 }
3a2ebbf4 854 return join( "\n", @result );
910a0a6d 855}
856
4e5a7b2c 857=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 858
566f4595 859Return a reference to an alignment table, in a slightly enhanced CollateX
860format which looks like this:
861
862 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 863 tokens => [ { t => "TEXT" }, ... ] },
566f4595 864 { witness => "SIG2",
4e5a7b2c 865 tokens => [ { t => "TEXT" }, ... ] },
566f4595 866 ... ],
867 length => TEXTLEN };
868
869If $use_refs is set to 1, the reading object is returned in the table
870instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 871
872If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 873keys have a true hash value will be included.
2c669bca 874
875=cut
9f3ba6f7 876
910a0a6d 877sub make_alignment_table {
08e0fb85 878 my( $self, $noderefs, $include ) = @_;
0ecb975c 879 # Make sure we can do this
880 throw( "Need a linear graph in order to make an alignment table" )
881 unless $self->linear;
882 $self->calculate_ranks unless $self->end->has_rank;
883
2c669bca 884 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 885 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 886 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 887 if( $include ) {
566f4595 888 next unless $include->{$wit->sigil};
2c669bca 889 }
eca16057 890 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 891 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
892 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 893 push( @{$table->{'alignment'}},
894 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 895 if( $wit->is_layered ) {
896 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 897 $wit->sigil.$self->ac_label );
1f7aa795 898 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 899 push( @{$table->{'alignment'}},
900 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 901 }
902 }
2c669bca 903 return $table;
910a0a6d 904}
905
906sub _make_witness_row {
0e476982 907 my( $path, $positions, $noderefs ) = @_;
910a0a6d 908 my %char_hash;
909 map { $char_hash{$_} = undef } @$positions;
2c669bca 910 my $debug = 0;
910a0a6d 911 foreach my $rdg ( @$path ) {
eca16057 912 my $rtext = $rdg->text;
913 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 914 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 915 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 916 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
917 : { 't' => $rtext };
910a0a6d 918 }
919 my @row = map { $char_hash{$_} } @$positions;
eca16057 920 # Fill in lacuna markers for undef spots in the row
921 my $last_el = shift @row;
922 my @filled_row = ( $last_el );
923 foreach my $el ( @row ) {
0e476982 924 # If we are using node reference, make the lacuna node appear many times
925 # in the table. If not, use the lacuna tag.
926 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 927 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 928 }
929 push( @filled_row, $el );
930 $last_el = $el;
931 }
932 return @filled_row;
910a0a6d 933}
934
0e476982 935# Tiny utility function to say if a table element is a lacuna
936sub _el_is_lacuna {
937 my $el = shift;
2c669bca 938 return 1 if $el->{'t'} eq '#LACUNA#';
939 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
940 && $el->{'t'}->is_lacuna;
0e476982 941 return 0;
942}
943
910a0a6d 944# Helper to turn the witnesses along columns rather than rows. Assumes
945# equal-sized rows.
946sub _turn_table {
947 my( $table ) = @_;
948 my $result = [];
949 return $result unless scalar @$table;
950 my $nrows = scalar @{$table->[0]};
951 foreach my $idx ( 0 .. $nrows - 1 ) {
952 foreach my $wit ( 0 .. $#{$table} ) {
953 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
954 }
955 }
956 return $result;
957}
958
4e5a7b2c 959=head1 NAVIGATION METHODS
910a0a6d 960
4e5a7b2c 961=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 962
963Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 964with $last, for the witness given in $sigil. If a $backup sigil is
965specified (e.g. when walking a layered witness), it will be used wherever
966no $sigil path exists. If there is a base text reading, that will be
967used wherever no path exists for $sigil or $backup.
e2902068 968
969=cut
970
910a0a6d 971# TODO Think about returning some lazy-eval iterator.
b0b4421a 972# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 973
e2902068 974sub reading_sequence {
861c3e27 975 my( $self, $start, $end, $witness ) = @_;
e2902068 976
930ff666 977 $witness = $self->baselabel unless $witness;
e2902068 978 my @readings = ( $start );
979 my %seen;
980 my $n = $start;
3a2ebbf4 981 while( $n && $n->id ne $end->id ) {
982 if( exists( $seen{$n->id} ) ) {
63778331 983 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 984 }
3a2ebbf4 985 $seen{$n->id} = 1;
910a0a6d 986
861c3e27 987 my $next = $self->next_reading( $n, $witness );
44771cf2 988 unless( $next ) {
63778331 989 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 990 }
910a0a6d 991 push( @readings, $next );
992 $n = $next;
e2902068 993 }
994 # Check that the last reading is our end reading.
995 my $last = $readings[$#readings];
63778331 996 throw( "Last reading found from " . $start->text .
997 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 998 unless $last->id eq $end->id;
e2902068 999
1000 return @readings;
1001}
1002
4e5a7b2c 1003=head2 next_reading( $reading, $sigil );
8e1394aa 1004
4a8828f0 1005Returns the reading that follows the given reading along the given witness
930ff666 1006path.
8e1394aa 1007
1008=cut
1009
4a8828f0 1010sub next_reading {
e2902068 1011 # Return the successor via the corresponding path.
8e1394aa 1012 my $self = shift;
3a2ebbf4 1013 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1014 return undef unless $answer;
3a2ebbf4 1015 return $self->reading( $answer );
8e1394aa 1016}
1017
4e5a7b2c 1018=head2 prior_reading( $reading, $sigil )
8e1394aa 1019
4a8828f0 1020Returns the reading that precedes the given reading along the given witness
930ff666 1021path.
8e1394aa 1022
1023=cut
1024
4a8828f0 1025sub prior_reading {
e2902068 1026 # Return the predecessor via the corresponding path.
8e1394aa 1027 my $self = shift;
3a2ebbf4 1028 my $answer = $self->_find_linked_reading( 'prior', @_ );
1029 return $self->reading( $answer );
8e1394aa 1030}
1031
4a8828f0 1032sub _find_linked_reading {
861c3e27 1033 my( $self, $direction, $node, $path ) = @_;
1034
1035 # Get a backup if we are dealing with a layered witness
1036 my $alt_path;
1037 my $aclabel = $self->ac_label;
1038 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1039 $alt_path = $1;
1040 }
1041
e2902068 1042 my @linked_paths = $direction eq 'next'
3a2ebbf4 1043 ? $self->sequence->edges_from( $node )
1044 : $self->sequence->edges_to( $node );
e2902068 1045 return undef unless scalar( @linked_paths );
8e1394aa 1046
e2902068 1047 # We have to find the linked path that contains all of the
1048 # witnesses supplied in $path.
1049 my( @path_wits, @alt_path_wits );
4e5a7b2c 1050 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1051 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1052 my $base_le;
1053 my $alt_le;
1054 foreach my $le ( @linked_paths ) {
3a2ebbf4 1055 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1056 $base_le = $le;
910a0a6d 1057 }
508fd430 1058 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1059 if( _is_within( \@path_wits, \@le_wits ) ) {
1060 # This is the right path.
1061 return $direction eq 'next' ? $le->[1] : $le->[0];
1062 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1063 $alt_le = $le;
1064 }
8e1394aa 1065 }
e2902068 1066 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1067 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1068 if $alt_le;
e2902068 1069
1070 # Got this far? Return the base path if it exists.
3a2ebbf4 1071 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1072 if $base_le;
e2902068 1073
1074 # Got this far? We have no appropriate path.
2c669bca 1075 warn "Could not find $direction node from " . $node->id
910a0a6d 1076 . " along path $path";
8e1394aa 1077 return undef;
1078}
1079
4a8828f0 1080# Some set logic.
1081sub _is_within {
1082 my( $set1, $set2 ) = @_;
7854e12e 1083 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1084 foreach my $el ( @$set1 ) {
910a0a6d 1085 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1086 }
1087 return $ret;
1088}
1089
4e5a7b2c 1090# Return the string that joins together a list of witnesses for
1091# display on a single path.
1092sub _witnesses_of_label {
1093 my( $self, $label ) = @_;
1094 my $regex = $self->wit_list_separator;
1095 my @answer = split( /\Q$regex\E/, $label );
1096 return @answer;
b0b4421a 1097}
1098
d4b75f44 1099=head2 common_readings
1100
1101Returns the list of common readings in the graph (i.e. those readings that are
1102shared by all non-lacunose witnesses.)
1103
1104=cut
1105
1106sub common_readings {
1107 my $self = shift;
1108 my @common = grep { $_->is_common } $self->readings;
1109 return @common;
1110}
1111
b0b4421a 1112=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1113
1114Returns the text of a witness (plus its backup, if we are using a layer)
1115as stored in the collation. The text is returned as a string, where the
1116individual readings are joined with spaces and the meta-readings (e.g.
1117lacunae) are omitted. Optional specification of $start and $end allows
1118the generation of a subset of the witness text.
4e5a7b2c 1119
b0b4421a 1120=cut
1121
1122sub path_text {
861c3e27 1123 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1124 $start = $self->start unless $start;
1125 $end = $self->end unless $end;
861c3e27 1126 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1127 return join( ' ', map { $_->text } @path );
1128}
4e5a7b2c 1129
1130=head1 INITIALIZATION METHODS
1131
1132These are mostly for use by parsers.
1133
1134=head2 make_witness_path( $witness )
1135
1136Link the array of readings contained in $witness->path (and in
1137$witness->uncorrected_path if it exists) into collation paths.
1138Clear out the arrays when finished.
de51424a 1139
4e5a7b2c 1140=head2 make_witness_paths
1141
1142Call make_witness_path for all witnesses in the tradition.
1143
1144=cut
930ff666 1145
7e450e44 1146# For use when a collation is constructed from a base text and an apparatus.
1147# We have the sequences of readings and just need to add path edges.
1f7aa795 1148# When we are done, clear out the witness path attributes, as they are no
1149# longer needed.
1150# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1151
6a222840 1152sub make_witness_paths {
1153 my( $self ) = @_;
910a0a6d 1154 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1155 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1156 $self->make_witness_path( $wit );
7854e12e 1157 }
7854e12e 1158}
1159
6a222840 1160sub make_witness_path {
7854e12e 1161 my( $self, $wit ) = @_;
1162 my @chain = @{$wit->path};
15d2d3df 1163 my $sig = $wit->sigil;
7854e12e 1164 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1165 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1166 }
1f7aa795 1167 if( $wit->is_layered ) {
d9e873d0 1168 @chain = @{$wit->uncorrected_path};
1169 foreach my $idx( 0 .. $#chain-1 ) {
1170 my $source = $chain[$idx];
1171 my $target = $chain[$idx+1];
1172 $self->add_path( $source, $target, $sig.$self->ac_label )
1173 unless $self->has_path( $source, $target, $sig );
1174 }
15d2d3df 1175 }
1f7aa795 1176 $wit->clear_path;
1177 $wit->clear_uncorrected_path;
e2902068 1178}
1179
4e5a7b2c 1180=head2 calculate_ranks
1181
1182Calculate the reading ranks (that is, their aligned positions relative
1183to each other) for the graph. This can only be called on linear collations.
1184
1185=cut
1186
910a0a6d 1187sub calculate_ranks {
1188 my $self = shift;
1189 # Walk a version of the graph where every node linked by a relationship
1190 # edge is fundamentally the same node, and do a topological ranking on
1191 # the nodes in this graph.
c9bf3dbf 1192 my $topo_graph = Graph->new();
910a0a6d 1193 my %rel_containers;
1194 my $rel_ctr = 0;
1195 # Add the nodes
1196 foreach my $r ( $self->readings ) {
3a2ebbf4 1197 next if exists $rel_containers{$r->id};
910a0a6d 1198 my @rels = $r->related_readings( 'colocated' );
1199 if( @rels ) {
1200 # Make a relationship container.
1201 push( @rels, $r );
c9bf3dbf 1202 my $rn = 'rel_container_' . $rel_ctr++;
1203 $topo_graph->add_vertex( $rn );
910a0a6d 1204 foreach( @rels ) {
3a2ebbf4 1205 $rel_containers{$_->id} = $rn;
910a0a6d 1206 }
1207 } else {
1208 # Add a new node to mirror the old node.
3a2ebbf4 1209 $rel_containers{$r->id} = $r->id;
1210 $topo_graph->add_vertex( $r->id );
910a0a6d 1211 }
4a8828f0 1212 }
3a1f2523 1213
3a2ebbf4 1214 # Add the edges.
910a0a6d 1215 foreach my $r ( $self->readings ) {
3a2ebbf4 1216 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1217 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1218 $rel_containers{$n} );
4e5a7b2c 1219 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1220 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1221 }
1222 }
1223
1224 # Now do the rankings, starting with the start node.
3a2ebbf4 1225 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1226 my $node_ranks = { $topo_start => 0 };
910a0a6d 1227 my @curr_origin = ( $topo_start );
1228 # A little iterative function.
1229 while( @curr_origin ) {
c9bf3dbf 1230 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1231 }
1232 # Transfer our rankings from the topological graph to the real one.
1233 foreach my $r ( $self->readings ) {
3a2ebbf4 1234 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1235 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1236 } else {
63778331 1237 # Die. Find the last rank we calculated.
1238 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1239 <=> $node_ranks->{$rel_containers{$b->id}} }
1240 $self->readings;
1241 my $last = pop @all_defined;
1242 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1243 }
de51424a 1244 }
8e1394aa 1245}
3a1f2523 1246
910a0a6d 1247sub _assign_rank {
c9bf3dbf 1248 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1249 # Look at each of the children of @current_nodes. If all the child's
1250 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1251 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1252 # parent gets a rank.
910a0a6d 1253 my @next_nodes;
1254 foreach my $c ( @current_nodes ) {
c9bf3dbf 1255 warn "Current reading $c has no rank!"
1256 unless exists $node_ranks->{$c};
1257 # print STDERR "Looking at child of node $c, rank "
1258 # . $node_ranks->{$c} . "\n";
1259 foreach my $child ( $graph->successors( $c ) ) {
1260 next if exists $node_ranks->{$child};
910a0a6d 1261 my $highest_rank = -1;
1262 my $skip = 0;
c9bf3dbf 1263 foreach my $parent ( $graph->predecessors( $child ) ) {
1264 if( exists $node_ranks->{$parent} ) {
1265 $highest_rank = $node_ranks->{$parent}
1266 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1267 } else {
1268 $skip = 1;
1269 last;
1270 }
1271 }
1272 next if $skip;
c9bf3dbf 1273 my $c_rank = $highest_rank + 1;
1274 # print STDERR "Assigning rank $c_rank to node $child \n";
1275 $node_ranks->{$child} = $c_rank;
910a0a6d 1276 push( @next_nodes, $child );
1277 }
1278 }
1279 return @next_nodes;
4cdd82f1 1280}
910a0a6d 1281
4e5a7b2c 1282=head2 flatten_ranks
1283
1284A convenience method for parsing collation data. Searches the graph for readings
1285with the same text at the same rank, and merges any that are found.
1286
1287=cut
1288
0e476982 1289sub flatten_ranks {
1290 my $self = shift;
1291 my %unique_rank_rdg;
1292 foreach my $rdg ( $self->readings ) {
1293 next unless $rdg->has_rank;
1294 my $key = $rdg->rank . "||" . $rdg->text;
1295 if( exists $unique_rank_rdg{$key} ) {
1296 # Combine!
56eefa04 1297 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1298 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
0ecb975c 1299 # TODO see if this now makes a common point.
0e476982 1300 } else {
1301 $unique_rank_rdg{$key} = $rdg;
1302 }
1303 }
1304}
1305
d4b75f44 1306=head2 calculate_common_readings
1307
1308Goes through the graph identifying the readings that appear in every witness
1309(apart from those with lacunae at that spot.) Marks them as common and returns
1310the list.
1311
1312=begin testing
1313
1314use Text::Tradition;
1315
1316my $cxfile = 't/data/Collatex-16.xml';
1317my $t = Text::Tradition->new(
1318 'name' => 'inline',
1319 'input' => 'CollateX',
1320 'file' => $cxfile,
1321 );
1322my $c = $t->collation;
1323
1324my @common = $c->calculate_common_readings();
1325is( scalar @common, 8, "Found correct number of common readings" );
1326my @marked = sort $c->common_readings();
1327is( scalar @common, 8, "All common readings got marked as such" );
1328my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1329is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1330
1331=end testing
1332
1333=cut
1334
1335sub calculate_common_readings {
1336 my $self = shift;
1337 my @common;
1338 my $table = $self->make_alignment_table( 1 );
1339 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1340 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1341 my %hash;
1342 foreach my $r ( @row ) {
1343 if( $r ) {
1344 $hash{$r->id} = $r unless $r->is_meta;
1345 } else {
1346 $hash{'UNDEF'} = $r;
1347 }
1348 }
1349 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1350 my( $r ) = values %hash;
1351 $r->is_common( 1 );
1352 push( @common, $r );
1353 }
1354 }
1355 return @common;
1356}
1357
861c3e27 1358=head2 text_from_paths
1359
1360Calculate the text array for all witnesses from the path, for later consistency
1361checking. Only to be used if there is no non-graph-based way to know the
1362original texts.
1363
1364=cut
1365
1366sub text_from_paths {
1367 my $self = shift;
1368 foreach my $wit ( $self->tradition->witnesses ) {
1369 my @text = split( /\s+/,
1370 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1371 $wit->text( \@text );
1372 if( $wit->is_layered ) {
1373 my @uctext = split( /\s+/,
1374 $self->reading_sequence( $self->start, $self->end,
1375 $wit->sigil.$self->ac_label ) );
1376 $wit->text( \@uctext );
1377 }
1378 }
1379}
0e476982 1380
4e5a7b2c 1381=head1 UTILITY FUNCTIONS
1382
1383=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1384
4e5a7b2c 1385Find the last reading that occurs in sequence before both the given readings.
1386
1387=head2 common_successor( $reading_a, $reading_b )
1388
1389Find the first reading that occurs in sequence after both the given readings.
1390
22222af9 1391=begin testing
1392
1393use Text::Tradition;
1394
1395my $cxfile = 't/data/Collatex-16.xml';
1396my $t = Text::Tradition->new(
1397 'name' => 'inline',
1398 'input' => 'CollateX',
1399 'file' => $cxfile,
1400 );
1401my $c = $t->collation;
1402
4e5a7b2c 1403is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1404 'n20', "Found correct common predecessor" );
4e5a7b2c 1405is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1406 '#END#', "Found correct common successor" );
1407
4e5a7b2c 1408is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1409 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1410is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1411 '#END#', "Found correct common successor for readings on same path" );
1412
1413=end testing
1414
1415=cut
1416
1417## Return the closest reading that is a predecessor of both the given readings.
1418sub common_predecessor {
1419 my $self = shift;
4e5a7b2c 1420 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1421 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1422}
1423
1424sub common_successor {
1425 my $self = shift;
4e5a7b2c 1426 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1427 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1428}
1429
1430sub common_in_path {
1431 my( $self, $r1, $r2, $dir ) = @_;
1432 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1433 $iter = $self->end->rank - $iter if $dir eq 'successors';
1434 my @candidates;
1435 my @last_checked = ( $r1, $r2 );
1436 my %all_seen;
1437 while( !@candidates ) {
1438 my @new_lc;
1439 foreach my $lc ( @last_checked ) {
1440 foreach my $p ( $lc->$dir ) {
1441 if( $all_seen{$p->id} ) {
1442 push( @candidates, $p );
1443 } else {
1444 $all_seen{$p->id} = 1;
1445 push( @new_lc, $p );
1446 }
1447 }
1448 }
1449 @last_checked = @new_lc;
1450 }
1451 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1452 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1453}
1454
63778331 1455sub throw {
1456 Text::Tradition::Error->throw(
1457 'ident' => 'Collation error',
1458 'message' => $_[0],
1459 );
1460}
1461
dd3b58b0 1462no Moose;
1463__PACKAGE__->meta->make_immutable;
e867486f 1464
1465=head1 BUGS / TODO
1466
1467=over
1468
4e5a7b2c 1469=item * Get rid of $backup in reading_sequence
e867486f 1470
1471=back