add horrible hack to fix graph layout direction
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
910a0a6d 3use Encode qw( decode_utf8 );
4use File::Temp;
c9bf3dbf 5use Graph;
8e1394aa 6use IPC::Run qw( run binary );
910a0a6d 7use Text::CSV_XS;
b15511bf 8use Text::Tradition::Collation::Reading;
22222af9 9use Text::Tradition::Collation::RelationshipStore;
63778331 10use Text::Tradition::Error;
df6d9812 11use XML::LibXML;
a344ffcf 12use XML::LibXML::XPathContext;
dd3b58b0 13use Moose;
14
3a2ebbf4 15has 'sequence' => (
d047cd52 16 is => 'ro',
3a2ebbf4 17 isa => 'Graph',
18 default => sub { Graph->new() },
d047cd52 19 handles => {
3a2ebbf4 20 paths => 'edges',
d047cd52 21 },
d047cd52 22 );
3a2ebbf4 23
24has 'relations' => (
25 is => 'ro',
22222af9 26 isa => 'Text::Tradition::Collation::RelationshipStore',
27 handles => {
28 relationships => 'relationships',
29 related_readings => 'related_readings',
30 },
31 writer => '_set_relations',
3a2ebbf4 32 );
dd3b58b0 33
3a2ebbf4 34has 'tradition' => (
35 is => 'ro',
d047cd52 36 isa => 'Text::Tradition',
8d9a1cd8 37 weak_ref => 1,
d047cd52 38 );
dd3b58b0 39
3a2ebbf4 40has 'readings' => (
41 isa => 'HashRef[Text::Tradition::Collation::Reading]',
42 traits => ['Hash'],
43 handles => {
44 reading => 'get',
45 _add_reading => 'set',
46 del_reading => 'delete',
47 has_reading => 'exists',
48 readings => 'values',
49 },
50 default => sub { {} },
51 );
910a0a6d 52
4a8828f0 53has 'wit_list_separator' => (
7854e12e 54 is => 'rw',
55 isa => 'Str',
56 default => ', ',
57 );
58
59has 'baselabel' => (
60 is => 'rw',
61 isa => 'Str',
62 default => 'base text',
63 );
4a8828f0 64
15d2d3df 65has 'linear' => (
66 is => 'rw',
67 isa => 'Bool',
68 default => 1,
69 );
c84275ff 70
ef9d481f 71has 'ac_label' => (
72 is => 'rw',
73 isa => 'Str',
74 default => ' (a.c.)',
75 );
3a2ebbf4 76
77has 'start' => (
78 is => 'ro',
79 isa => 'Text::Tradition::Collation::Reading',
80 writer => '_set_start',
81 weak_ref => 1,
82 );
83
84has 'end' => (
85 is => 'ro',
86 isa => 'Text::Tradition::Collation::Reading',
87 writer => '_set_end',
88 weak_ref => 1,
89 );
1f563ac3 90
4e5a7b2c 91=head1 NAME
92
93Text::Tradition::Collation - a software model for a text collation
94
95=head1 SYNOPSIS
96
97 use Text::Tradition;
98 my $t = Text::Tradition->new(
99 'name' => 'this is a text',
100 'input' => 'TEI',
101 'file' => '/path/to/tei_parallel_seg_file.xml' );
102
103 my $c = $t->collation;
104 my @readings = $c->readings;
105 my @paths = $c->paths;
106 my @relationships = $c->relationships;
107
108 my $svg_variant_graph = $t->collation->as_svg();
109
110=head1 DESCRIPTION
111
112Text::Tradition is a library for representation and analysis of collated
113texts, particularly medieval ones. The Collation is the central feature of
114a Tradition, where the text, its sequence of readings, and its relationships
115between readings are actually kept.
116
117=head1 CONSTRUCTOR
118
119=head2 new
120
121The constructor. Takes a hash or hashref of the following arguments:
122
123=over
124
125=item * tradition - The Text::Tradition object to which the collation
126belongs. Required.
127
128=item * linear - Whether the collation should be linear; that is, whether
129transposed readings should be treated as two linked readings rather than one,
130and therefore whether the collation graph is acyclic. Defaults to true.
131
4e5a7b2c 132=item * baselabel - The default label for the path taken by a base text
133(if any). Defaults to 'base text'.
134
135=item * wit_list_separator - The string to join a list of witnesses for
136purposes of making labels in display graphs. Defaults to ', '.
137
138=item * ac_label - The extra label to tack onto a witness sigil when
139representing another layer of path for the given witness - that is, when
140a text has more than one possible reading due to scribal corrections or
141the like. Defaults to ' (a.c.)'.
142
143=back
144
145=head1 ACCESSORS
146
147=head2 tradition
148
149=head2 linear
150
4e5a7b2c 151=head2 wit_list_separator
152
153=head2 baselabel
154
155=head2 ac_label
156
157Simple accessors for collation attributes.
158
159=head2 start
160
161The meta-reading at the start of every witness path.
162
163=head2 end
164
165The meta-reading at the end of every witness path.
166
167=head2 readings
168
169Returns all Reading objects in the graph.
170
171=head2 reading( $id )
172
173Returns the Reading object corresponding to the given ID.
174
175=head2 add_reading( $reading_args )
176
177Adds a new reading object to the collation.
178See L<Text::Tradition::Collation::Reading> for the available arguments.
179
180=head2 del_reading( $object_or_id )
181
182Removes the given reading from the collation, implicitly removing its
183paths and relationships.
184
185=head2 merge_readings( $main, $second )
186
187Merges the $second reading into the $main one.
188The arguments may be either readings or reading IDs.
189
190=head2 has_reading( $id )
191
192Predicate to see whether a given reading ID is in the graph.
193
194=head2 reading_witnesses( $object_or_id )
195
196Returns a list of sigils whose witnesses contain the reading.
197
198=head2 paths
199
200Returns all reading paths within the document - that is, all edges in the
201collation graph. Each path is an arrayref of [ $source, $target ] reading IDs.
202
203=head2 add_path( $source, $target, $sigil )
204
205Links the given readings in the collation in sequence, under the given witness
206sigil. The readings may be specified by object or ID.
207
208=head2 del_path( $source, $target, $sigil )
209
210Links the given readings in the collation in sequence, under the given witness
211sigil. The readings may be specified by object or ID.
212
213=head2 has_path( $source, $target );
214
215Returns true if the two readings are linked in sequence in any witness.
216The readings may be specified by object or ID.
217
218=head2 relationships
219
220Returns all Relationship objects in the collation.
221
222=head2 add_relationship( $reading, $other_reading, $options )
223
224Adds a new relationship of the type given in $options between the two readings,
225which may be specified by object or ID. Returns a value of ( $status, @vectors)
226where $status is true on success, and @vectors is a list of relationship edges
227that were ultimately added.
228See L<Text::Tradition::Collation::Relationship> for the available options.
229
230=cut
dd3b58b0 231
d047cd52 232sub BUILD {
3a2ebbf4 233 my $self = shift;
22222af9 234 $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
3a2ebbf4 235 $self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
236 $self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
d047cd52 237}
784877d9 238
3a2ebbf4 239### Reading construct/destruct functions
240
241sub add_reading {
242 my( $self, $reading ) = @_;
243 unless( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
244 my %args = %$reading;
245 $reading = Text::Tradition::Collation::Reading->new(
246 'collation' => $self,
247 %args );
248 }
249 # First check to see if a reading with this ID exists.
250 if( $self->reading( $reading->id ) ) {
63778331 251 throw( "Collation already has a reading with id " . $reading->id );
3a2ebbf4 252 }
253 $self->_add_reading( $reading->id => $reading );
254 # Once the reading has been added, put it in both graphs.
255 $self->sequence->add_vertex( $reading->id );
22222af9 256 $self->relations->add_reading( $reading->id );
3a2ebbf4 257 return $reading;
eca16057 258};
259
3a2ebbf4 260around del_reading => sub {
261 my $orig = shift;
262 my $self = shift;
263 my $arg = shift;
264
265 if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
266 $arg = $arg->id;
267 }
3a2ebbf4 268 # Remove the reading from the graphs.
269 $self->sequence->delete_vertex( $arg );
22222af9 270 $self->relations->delete_reading( $arg );
3a2ebbf4 271
272 # Carry on.
273 $self->$orig( $arg );
274};
7854e12e 275
3a2ebbf4 276# merge_readings( $main, $to_be_deleted );
7854e12e 277
3a2ebbf4 278sub merge_readings {
279 my $self = shift;
280
281 # We only need the IDs for adding paths to the graph, not the reading
282 # objects themselves.
49d4f2ac 283 my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
3a2ebbf4 284
285 # The kept reading should inherit the paths and the relationships
286 # of the deleted reading.
287 foreach my $path ( $self->sequence->edges_at( $deleted ) ) {
288 my @vector = ( $kept );
289 push( @vector, $path->[1] ) if $path->[0] eq $deleted;
290 unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
49d4f2ac 291 next if $vector[0] eq $vector[1]; # Don't add a self loop
3a2ebbf4 292 my %wits = %{$self->sequence->get_edge_attributes( @$path )};
293 $self->sequence->add_edge( @vector );
294 my $fwits = $self->sequence->get_edge_attributes( @vector );
295 @wits{keys %$fwits} = values %$fwits;
296 $self->sequence->set_edge_attributes( @vector, \%wits );
297 }
22222af9 298 $self->relations->merge_readings( $kept, $deleted, $combine_char );
3a2ebbf4 299
300 # Do the deletion deed.
49d4f2ac 301 if( $combine_char ) {
302 my $kept_obj = $self->reading( $kept );
303 my $new_text = join( $combine_char, $kept_obj->text,
304 $self->reading( $deleted )->text );
305 $kept_obj->alter_text( $new_text );
306 }
3a2ebbf4 307 $self->del_reading( $deleted );
308}
7854e12e 309
3265b0ce 310
3a2ebbf4 311# Helper function for manipulating the graph.
312sub _stringify_args {
313 my( $self, $first, $second, $arg ) = @_;
314 $first = $first->id
315 if ref( $first ) eq 'Text::Tradition::Collation::Reading';
316 $second = $second->id
317 if ref( $second ) eq 'Text::Tradition::Collation::Reading';
318 return( $first, $second, $arg );
319}
df6d9812 320
4e5a7b2c 321# Helper function for manipulating the graph.
322sub _objectify_args {
323 my( $self, $first, $second, $arg ) = @_;
324 $first = $self->reading( $first )
325 unless ref( $first ) eq 'Text::Tradition::Collation::Reading';
326 $second = $self->reading( $second )
327 unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
328 return( $first, $second, $arg );
329}
3a2ebbf4 330### Path logic
331
332sub add_path {
333 my $self = shift;
334
335 # We only need the IDs for adding paths to the graph, not the reading
336 # objects themselves.
337 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
338
339 # Connect the readings
340 $self->sequence->add_edge( $source, $target );
341 # Note the witness in question
342 $self->sequence->set_edge_attribute( $source, $target, $wit, 1 );
b15511bf 343};
344
3a2ebbf4 345sub del_path {
346 my $self = shift;
49d4f2ac 347 my @args;
348 if( ref( $_[0] ) eq 'ARRAY' ) {
349 my $e = shift @_;
350 @args = ( @$e, @_ );
351 } else {
352 @args = @_;
353 }
3a2ebbf4 354
355 # We only need the IDs for adding paths to the graph, not the reading
356 # objects themselves.
49d4f2ac 357 my( $source, $target, $wit ) = $self->_stringify_args( @args );
3a2ebbf4 358
359 if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
49d4f2ac 360 $self->sequence->delete_edge_attribute( $source, $target, $wit );
3a2ebbf4 361 }
362 unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
363 $self->sequence->delete_edge( $source, $target );
364 }
784877d9 365}
366
3a2ebbf4 367
15d2d3df 368# Extra graph-alike utility
369sub has_path {
3a2ebbf4 370 my $self = shift;
371 my( $source, $target, $wit ) = $self->_stringify_args( @_ );
372 return undef unless $self->sequence->has_edge( $source, $target );
373 return $self->sequence->has_edge_attribute( $source, $target, $wit );
b15511bf 374}
375
4e5a7b2c 376=head2 clear_witness( @sigil_list )
3a2ebbf4 377
4e5a7b2c 378Clear the given witnesses out of the collation entirely, removing references
379to them in paths, and removing readings that belong only to them. Should only
380be called via $tradition->del_witness.
3a2ebbf4 381
382=cut
383
4e5a7b2c 384sub clear_witness {
385 my( $self, @sigils ) = @_;
386
387 # Clear the witness(es) out of the paths
388 foreach my $e ( $self->paths ) {
389 foreach my $sig ( @sigils ) {
390 $self->del_path( $e, $sig );
391 }
392 }
393
394 # Clear out the newly unused readings
395 foreach my $r ( $self->readings ) {
396 unless( $self->reading_witnesses( $r ) ) {
397 $self->del_reading( $r );
398 }
399 }
400}
3a2ebbf4 401
402sub add_relationship {
403 my $self = shift;
22222af9 404 my( $source, $target, $opts ) = $self->_stringify_args( @_ );
63778331 405 my( @vectors ) = $self->relations->add_relationship( $source,
64ae6270 406 $self->reading( $source ), $target, $self->reading( $target ), $opts );
407 # Force a full rank recalculation every time. Yuck.
63778331 408 $self->calculate_ranks() if $self->end->has_rank;
409 return @vectors;
22222af9 410}
ef9d481f 411
22222af9 412=head2 reading_witnesses( $reading )
910a0a6d 413
22222af9 414Return a list of sigils corresponding to the witnesses in which the reading appears.
3265b0ce 415
22222af9 416=cut
1d310495 417
1d310495 418sub reading_witnesses {
419 my( $self, $reading ) = @_;
420 # We need only check either the incoming or the outgoing edges; I have
96dc90ec 421 # arbitrarily chosen "incoming". Thus, special-case the start node.
422 if( $reading eq $self->start ) {
423 return map { $_->sigil } $self->tradition->witnesses;
424 }
1d310495 425 my %all_witnesses;
426 foreach my $e ( $self->sequence->edges_to( $reading ) ) {
427 my $wits = $self->sequence->get_edge_attributes( @$e );
428 @all_witnesses{ keys %$wits } = 1;
429 }
430 return keys %all_witnesses;
910a0a6d 431}
432
4e5a7b2c 433=head1 OUTPUT METHODS
8e1394aa 434
4e5a7b2c 435=head2 as_svg
8e1394aa 436
0068967c 437Returns an SVG string that represents the graph, via as_dot and graphviz.
8e1394aa 438
439=cut
440
441sub as_svg {
3a2ebbf4 442 my( $self ) = @_;
443
8e1394aa 444 my @cmd = qw/dot -Tsvg/;
445 my( $svg, $err );
910a0a6d 446 my $dotfile = File::Temp->new();
a344ffcf 447 ## USE FOR DEBUGGING
eca16057 448 # $dotfile->unlink_on_destroy(0);
910a0a6d 449 binmode $dotfile, ':utf8';
450 print $dotfile $self->as_dot();
451 push( @cmd, $dotfile->filename );
452 run( \@cmd, ">", binary(), \$svg );
a344ffcf 453 # HACK part 3 - remove silent node+edge
454 my $parser = XML::LibXML->new();
455 my $svgdom = $parser->parse_string( $svg );
456 my $xpc = XML::LibXML::XPathContext->new( $svgdom->documentElement );
457 $xpc->registerNs( 'svg', 'http://www.w3.org/2000/svg' );
458 my @hacknodes = $xpc->findnodes( '//svg:g[contains(child::svg:title, "#SILENT#")]' );
459 foreach my $h ( @hacknodes ) {
460 $h->parentNode->removeChild( $h );
461 }
462 return decode_utf8( $svgdom->toString() );
8e1394aa 463}
464
4e5a7b2c 465=head2 svg_subgraph( $from, $to )
b22576c6 466
467Returns an SVG string that represents the portion of the graph given by the
468specified range. The $from and $to variables refer to ranks within the graph.
469
470=cut
471
472sub svg_subgraph {
473 my( $self, $from, $to ) = @_;
474
475 my $dot = $self->as_dot( $from, $to );
476 unless( $dot ) {
63778331 477 throw( "Could not output a graph with range $from - $to" );
b22576c6 478 }
479
480 my @cmd = qw/dot -Tsvg/;
481 my( $svg, $err );
482 my $dotfile = File::Temp->new();
483 ## TODO REMOVE
484 # $dotfile->unlink_on_destroy(0);
485 binmode $dotfile, ':utf8';
486 print $dotfile $dot;
487 push( @cmd, $dotfile->filename );
488 run( \@cmd, ">", binary(), \$svg );
489 $svg = decode_utf8( $svg );
490 return $svg;
491}
492
493
4e5a7b2c 494=head2 as_dot( $from, $to )
df6d9812 495
496Returns a string that is the collation graph expressed in dot
4e5a7b2c 497(i.e. GraphViz) format. If $from or $to is passed, as_dot creates
498a subgraph rather than the entire graph.
df6d9812 499
500=cut
501
502sub as_dot {
b22576c6 503 my( $self, $startrank, $endrank ) = @_;
504
505 # Check the arguments
506 if( $startrank ) {
507 return if $endrank && $startrank > $endrank;
508 return if $startrank > $self->end->rank;
509 }
510 if( defined $endrank ) {
511 return if $endrank < 0;
f1b3b33a 512 $endrank = undef if $endrank == $self->end->rank;
b22576c6 513 }
514
df6d9812 515 # TODO consider making some of these things configurable
67da8d6c 516 my $graph_name = $self->tradition->name;
517 $graph_name =~ s/[^\w\s]//g;
518 $graph_name = join( '_', split( /\s+/, $graph_name ) );
f13b5582 519
520 my %graph_attrs = (
521 'rankdir' => 'LR',
522 'bgcolor' => 'none',
523 );
524 my %node_attrs = (
525 'fontsize' => 11,
526 'fillcolor' => 'white',
527 'style' => 'filled',
528 'shape' => 'ellipse'
529 );
530 my %edge_attrs = (
531 'arrowhead' => 'open',
532 'color' => '#000000',
533 'fontcolor' => '#000000',
534 );
535
67da8d6c 536 my $dot = sprintf( "digraph %s {\n", $graph_name );
f13b5582 537 $dot .= "\tgraph " . _dot_attr_string( \%graph_attrs ) . ";\n";
538 $dot .= "\tnode " . _dot_attr_string( \%node_attrs ) . ";\n";
df6d9812 539
b22576c6 540 # Output substitute start/end readings if necessary
541 if( $startrank ) {
542 $dot .= "\t\"#SUBSTART#\" [ label=\"...\" ];\n";
543 }
544 if( $endrank ) {
545 $dot .= "\t\"#SUBEND#\" [ label=\"...\" ];\n";
546 }
a344ffcf 547 if( !$startrank && !$endrank ) {
548 ## HACK part 1
549 $dot .= "\tsubgraph { rank=same \"#START#\" \"#SILENT#\" }\n";
550 $dot .= "\t\"#SILENT#\" [ color=white,penwidth=0,label=\"\" ];"
551 }
b22576c6 552 my %used; # Keep track of the readings that actually appear in the graph
df6d9812 553 foreach my $reading ( $self->readings ) {
b22576c6 554 # Only output readings within our rank range.
555 next if $startrank && $reading->rank < $startrank;
556 next if $endrank && $reading->rank > $endrank;
557 $used{$reading->id} = 1;
910a0a6d 558 # Need not output nodes without separate labels
3a2ebbf4 559 next if $reading->id eq $reading->text;
d4b75f44 560 my $rattrs;
30f0df34 561 my $label = $reading->text;
8f9cab7b 562 $label =~ s/\"/\\\"/g;
d4b75f44 563 $rattrs->{'label'} = $label;
564 # TODO make this an option?
565 # $rattrs->{'fillcolor'} = 'green' if $reading->is_common;
566 $dot .= sprintf( "\t\"%s\" %s;\n", $reading->id, _dot_attr_string( $rattrs ) );
df6d9812 567 }
3a2ebbf4 568
b22576c6 569 # Add the real edges
570 my @edges = $self->paths;
3bdec618 571 my( %substart, %subend );
b22576c6 572 foreach my $edge ( @edges ) {
573 # Do we need to output this edge?
508fd430 574 if( $used{$edge->[0]} && $used{$edge->[1]} ) {
f13b5582 575 my $label = $self->path_display_label( $self->path_witnesses( $edge ) );
576 my $variables = { %edge_attrs, 'label' => $label };
b22576c6 577 # Account for the rank gap if necessary
b0b4421a 578 if( $self->reading( $edge->[1] )->has_rank
579 && $self->reading( $edge->[0] )->has_rank
580 && $self->reading( $edge->[1] )->rank
581 - $self->reading( $edge->[0] )->rank > 1 ) {
582 $variables->{'minlen'} = $self->reading( $edge->[1] )->rank
b22576c6 583 - $self->reading( $edge->[0] )->rank;
b0b4421a 584 }
508fd430 585 # EXPERIMENTAL: make edge width reflect no. of witnesses
586 my $extrawidth = scalar( $self->path_witnesses( $edge ) ) * 0.2;
587 $variables->{'penwidth'} = $extrawidth + 0.8; # gives 1 for a single wit
588
f13b5582 589 my $varopts = _dot_attr_string( $variables );
590 $dot .= sprintf( "\t\"%s\" -> \"%s\" %s;\n",
591 $edge->[0], $edge->[1], $varopts );
3bdec618 592 } elsif( $used{$edge->[0]} ) {
593 $subend{$edge->[0]} = 1;
594 } elsif( $used{$edge->[1]} ) {
595 $substart{$edge->[1]} = 1;
b22576c6 596 }
df6d9812 597 }
3bdec618 598 # Add substitute start and end edges if necessary
599 foreach my $node ( keys %substart ) {
600 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 601 my $variables = { %edge_attrs, 'label' => $witstr };
602 my $varopts = _dot_attr_string( $variables );
603 $dot .= "\t\"#SUBSTART#\" -> \"$node\" $varopts;";
3bdec618 604 }
605 foreach my $node ( keys %subend ) {
606 my $witstr = $self->path_display_label ( $self->reading_witnesses( $self->reading( $node ) ) );
f13b5582 607 my $variables = { %edge_attrs, 'label' => $witstr };
608 my $varopts = _dot_attr_string( $variables );
609 $dot .= "\t\"$node\" -> \"#SUBEND#\" $varopts;";
3bdec618 610 }
a344ffcf 611 # HACK part 2
612 if( !$startrank && !$endrank ) {
613 $dot .= "\t\"#END#\" -> \"#SILENT#\" [ color=white,penwidth=0 ];\n";
614 }
3bdec618 615
df6d9812 616 $dot .= "}\n";
617 return $dot;
618}
619
f13b5582 620sub _dot_attr_string {
621 my( $hash ) = @_;
622 my @attrs;
623 foreach my $k ( sort keys %$hash ) {
624 my $v = $hash->{$k};
625 push( @attrs, $k.'="'.$v.'"' );
626 }
627 return( '[ ' . join( ', ', @attrs ) . ' ]' );
628}
629
3a2ebbf4 630sub path_witnesses {
631 my( $self, @edge ) = @_;
632 # If edge is an arrayref, cope.
633 if( @edge == 1 && ref( $edge[0] ) eq 'ARRAY' ) {
634 my $e = shift @edge;
635 @edge = @$e;
636 }
637 my @wits = keys %{$self->sequence->get_edge_attributes( @edge )};
508fd430 638 return @wits;
3a2ebbf4 639}
640
8f9cab7b 641sub path_display_label {
508fd430 642 my $self = shift;
643 my @wits = sort @_;
8f9cab7b 644 my $maj = scalar( $self->tradition->witnesses ) * 0.6;
645 if( scalar @wits > $maj ) {
f13b5582 646 # TODO break out a.c. wits
8f9cab7b 647 return 'majority';
648 } else {
649 return join( ', ', @wits );
650 }
651}
652
653
4e5a7b2c 654=head2 as_graphml
8e1394aa 655
4e5a7b2c 656Returns a GraphML representation of the collation. The GraphML will contain
657two graphs. The first expresses the attributes of the readings and the witness
658paths that link them; the second expresses the relationships that link the
659readings. This is the native transfer format for a tradition.
8e1394aa 660
56eefa04 661=begin testing
662
663use Text::Tradition;
664
665my $READINGS = 311;
666my $PATHS = 361;
667
668my $datafile = 't/data/florilegium_tei_ps.xml';
669my $tradition = Text::Tradition->new( 'input' => 'TEI',
670 'name' => 'test0',
671 'file' => $datafile,
672 'linear' => 1 );
673
674ok( $tradition, "Got a tradition object" );
675is( scalar $tradition->witnesses, 13, "Found all witnesses" );
676ok( $tradition->collation, "Tradition has a collation" );
677
678my $c = $tradition->collation;
679is( scalar $c->readings, $READINGS, "Collation has all readings" );
680is( scalar $c->paths, $PATHS, "Collation has all paths" );
681is( scalar $c->relationships, 0, "Collation has all relationships" );
682
683# Add a few relationships
684$c->add_relationship( 'w123', 'w125', { 'type' => 'collated' } );
685$c->add_relationship( 'w193', 'w196', { 'type' => 'collated' } );
686$c->add_relationship( 'w257', 'w262', { 'type' => 'transposition' } );
687
688# Now write it to GraphML and parse it again.
689
690my $graphml = $c->as_graphml;
691my $st = Text::Tradition->new( 'input' => 'Self', 'string' => $graphml );
692is( scalar $st->collation->readings, $READINGS, "Reparsed collation has all readings" );
693is( scalar $st->collation->paths, $PATHS, "Reparsed collation has all paths" );
694is( scalar $st->collation->relationships, 3, "Reparsed collation has new relationships" );
695
696=end testing
697
8e1394aa 698=cut
699
700sub as_graphml {
3a2ebbf4 701 my( $self ) = @_;
8e1394aa 702
703 # Some namespaces
704 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
705 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
706 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
910a0a6d 707 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
8e1394aa 708
709 # Create the document and root node
710 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
711 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
712 $graphml->setDocumentElement( $root );
713 $root->setNamespace( $xsi_ns, 'xsi', 0 );
714 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
715
e309421a 716 # Add the data keys for the graph
717 my %graph_data_keys;
718 my $gdi = 0;
1d310495 719 my @graph_attributes = qw/ version wit_list_separator baselabel linear ac_label /;
e309421a 720 foreach my $datum ( @graph_attributes ) {
721 $graph_data_keys{$datum} = 'dg'.$gdi++;
722 my $key = $root->addNewChild( $graphml_ns, 'key' );
723 $key->setAttribute( 'attr.name', $datum );
724 $key->setAttribute( 'attr.type', $key eq 'linear' ? 'boolean' : 'string' );
725 $key->setAttribute( 'for', 'graph' );
726 $key->setAttribute( 'id', $graph_data_keys{$datum} );
727 }
f6066bac 728
8e1394aa 729 # Add the data keys for nodes
ef9d481f 730 my %node_data_keys;
731 my $ndi = 0;
3a2ebbf4 732 my %node_data = (
733 id => 'string',
255875b8 734 text => 'string',
3a2ebbf4 735 rank => 'string',
736 is_start => 'boolean',
737 is_end => 'boolean',
738 is_lacuna => 'boolean',
739 );
740 foreach my $datum ( keys %node_data ) {
910a0a6d 741 $node_data_keys{$datum} = 'dn'.$ndi++;
742 my $key = $root->addNewChild( $graphml_ns, 'key' );
743 $key->setAttribute( 'attr.name', $datum );
3a2ebbf4 744 $key->setAttribute( 'attr.type', $node_data{$datum} );
910a0a6d 745 $key->setAttribute( 'for', 'node' );
746 $key->setAttribute( 'id', $node_data_keys{$datum} );
8e1394aa 747 }
748
df6d9812 749 # Add the data keys for edges, i.e. witnesses
ef9d481f 750 my $edi = 0;
751 my %edge_data_keys;
3a2ebbf4 752 my %edge_data = (
f523c7a8 753 class => 'string', # Class, deprecated soon
3a2ebbf4 754 witness => 'string', # ID/label for a path
755 relationship => 'string', # ID/label for a relationship
756 extra => 'boolean', # Path key
c84275ff 757 scope => 'string', # Relationship key
3a2ebbf4 758 non_correctable => 'boolean', # Relationship key
759 non_independent => 'boolean', # Relationship key
760 );
761 foreach my $datum ( keys %edge_data ) {
762 $edge_data_keys{$datum} = 'de'.$edi++;
910a0a6d 763 my $key = $root->addNewChild( $graphml_ns, 'key' );
3a2ebbf4 764 $key->setAttribute( 'attr.name', $datum );
765 $key->setAttribute( 'attr.type', $edge_data{$datum} );
910a0a6d 766 $key->setAttribute( 'for', 'edge' );
3a2ebbf4 767 $key->setAttribute( 'id', $edge_data_keys{$datum} );
8e1394aa 768 }
3a2ebbf4 769
22222af9 770 # Add the collation graph itself
2c669bca 771 my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
772 $sgraph->setAttribute( 'edgedefault', 'directed' );
773 $sgraph->setAttribute( 'id', $self->tradition->name );
774 $sgraph->setAttribute( 'parse.edgeids', 'canonical' );
775 $sgraph->setAttribute( 'parse.edges', scalar($self->paths) );
776 $sgraph->setAttribute( 'parse.nodeids', 'canonical' );
777 $sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
778 $sgraph->setAttribute( 'parse.order', 'nodesfirst' );
22222af9 779
e309421a 780 # Collation attribute data
781 foreach my $datum ( @graph_attributes ) {
2c669bca 782 my $value = $datum eq 'version' ? '3.0' : $self->$datum;
783 _add_graphml_data( $sgraph, $graph_data_keys{$datum}, $value );
e309421a 784 }
8e1394aa 785
786 my $node_ctr = 0;
787 my %node_hash;
22222af9 788 # Add our readings to the graph
3a2ebbf4 789 foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
2c669bca 790 # Add to the main graph
791 my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
910a0a6d 792 my $node_xmlid = 'n' . $node_ctr++;
3a2ebbf4 793 $node_hash{ $n->id } = $node_xmlid;
910a0a6d 794 $node_el->setAttribute( 'id', $node_xmlid );
255875b8 795 foreach my $d ( keys %node_data ) {
796 my $nval = $n->$d;
797 _add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
798 if defined $nval;
799 }
b15511bf 800 }
801
2c669bca 802 # Add the path edges to the sequence graph
df6d9812 803 my $edge_ctr = 0;
3a2ebbf4 804 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->sequence->edges() ) {
805 # We add an edge in the graphml for every witness in $e.
508fd430 806 foreach my $wit ( sort $self->path_witnesses( $e ) ) {
3a2ebbf4 807 my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
808 $node_hash{ $e->[0] },
809 $node_hash{ $e->[1] } );
2c669bca 810 my $edge_el = $sgraph->addNewChild( $graphml_ns, 'edge' );
3a2ebbf4 811 $edge_el->setAttribute( 'source', $from );
812 $edge_el->setAttribute( 'target', $to );
813 $edge_el->setAttribute( 'id', $id );
3a2ebbf4 814
815 # It's a witness path, so add the witness
816 my $base = $wit;
817 my $key = $edge_data_keys{'witness'};
818 # Is this an ante-corr witness?
819 my $aclabel = $self->ac_label;
820 if( $wit =~ /^(.*)\Q$aclabel\E$/ ) {
821 # Keep the base witness
822 $base = $1;
823 # ...and record that this is an 'extra' reading path
824 _add_graphml_data( $edge_el, $edge_data_keys{'extra'}, $aclabel );
825 }
826 _add_graphml_data( $edge_el, $edge_data_keys{'witness'}, $base );
f523c7a8 827 _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'path' );
3a2ebbf4 828 }
829 }
830
22222af9 831 # Add the relationship graph to the XML
2626f709 832 $self->relations->as_graphml( $graphml_ns, $root, \%node_hash,
833 $node_data_keys{'id'}, \%edge_data_keys );
8e1394aa 834
94c00c71 835 # Save and return the thing
836 my $result = decode_utf8( $graphml->toString(1) );
94c00c71 837 return $result;
df6d9812 838}
839
b15511bf 840sub _add_graphml_data {
841 my( $el, $key, $value ) = @_;
b15511bf 842 return unless defined $value;
c9bf3dbf 843 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
b15511bf 844 $data_el->setAttribute( 'key', $key );
845 $data_el->appendText( $value );
8e1394aa 846}
847
4e5a7b2c 848=head2 as_csv
910a0a6d 849
850Returns a CSV alignment table representation of the collation graph, one
2c669bca 851row per witness (or witness uncorrected.)
910a0a6d 852
853=cut
854
855sub as_csv {
3a2ebbf4 856 my( $self ) = @_;
910a0a6d 857 my $table = $self->make_alignment_table;
858 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
859 my @result;
2c669bca 860 # Make the header row
861 $csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
862 push( @result, decode_utf8( $csv->string ) );
863 # Make the rest of the rows
864 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
566f4595 865 my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
2c669bca 866 my @row = map { $_ ? $_->{'t'} : $_ } @rowobjs;
867 $csv->combine( @row );
910a0a6d 868 push( @result, decode_utf8( $csv->string ) );
869 }
3a2ebbf4 870 return join( "\n", @result );
910a0a6d 871}
872
4e5a7b2c 873=head2 make_alignment_table( $use_refs, $include_witnesses )
2c669bca 874
566f4595 875Return a reference to an alignment table, in a slightly enhanced CollateX
876format which looks like this:
877
878 $table = { alignment => [ { witness => "SIGIL",
4e5a7b2c 879 tokens => [ { t => "TEXT" }, ... ] },
566f4595 880 { witness => "SIG2",
4e5a7b2c 881 tokens => [ { t => "TEXT" }, ... ] },
566f4595 882 ... ],
883 length => TEXTLEN };
884
885If $use_refs is set to 1, the reading object is returned in the table
886instead of READINGTEXT; if not, the text of the reading is returned.
4e5a7b2c 887
888If $include_witnesses is set to a hashref, only the witnesses whose sigil
566f4595 889keys have a true hash value will be included.
2c669bca 890
891=cut
9f3ba6f7 892
910a0a6d 893sub make_alignment_table {
08e0fb85 894 my( $self, $noderefs, $include ) = @_;
910a0a6d 895 unless( $self->linear ) {
63778331 896 throw( "Need a linear graph in order to make an alignment table" );
910a0a6d 897 }
2c669bca 898 my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
3a2ebbf4 899 my @all_pos = ( 1 .. $self->end->rank - 1 );
68454b71 900 foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
2c669bca 901 if( $include ) {
566f4595 902 next unless $include->{$wit->sigil};
2c669bca 903 }
eca16057 904 # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
1f7aa795 905 my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
906 my @row = _make_witness_row( \@wit_path, \@all_pos, $noderefs );
2c669bca 907 push( @{$table->{'alignment'}},
908 { 'witness' => $wit->sigil, 'tokens' => \@row } );
1f7aa795 909 if( $wit->is_layered ) {
910 my @wit_ac_path = $self->reading_sequence( $self->start, $self->end,
861c3e27 911 $wit->sigil.$self->ac_label );
1f7aa795 912 my @ac_row = _make_witness_row( \@wit_ac_path, \@all_pos, $noderefs );
2c669bca 913 push( @{$table->{'alignment'}},
914 { 'witness' => $wit->sigil.$self->ac_label, 'tokens' => \@ac_row } );
910a0a6d 915 }
916 }
2c669bca 917 return $table;
910a0a6d 918}
919
920sub _make_witness_row {
0e476982 921 my( $path, $positions, $noderefs ) = @_;
910a0a6d 922 my %char_hash;
923 map { $char_hash{$_} = undef } @$positions;
2c669bca 924 my $debug = 0;
910a0a6d 925 foreach my $rdg ( @$path ) {
eca16057 926 my $rtext = $rdg->text;
927 $rtext = '#LACUNA#' if $rdg->is_lacuna;
2c669bca 928 print STDERR "rank " . $rdg->rank . "\n" if $debug;
3a2ebbf4 929 # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
2c669bca 930 $char_hash{$rdg->rank} = $noderefs ? { 't' => $rdg }
931 : { 't' => $rtext };
910a0a6d 932 }
933 my @row = map { $char_hash{$_} } @$positions;
eca16057 934 # Fill in lacuna markers for undef spots in the row
935 my $last_el = shift @row;
936 my @filled_row = ( $last_el );
937 foreach my $el ( @row ) {
0e476982 938 # If we are using node reference, make the lacuna node appear many times
939 # in the table. If not, use the lacuna tag.
940 if( $last_el && _el_is_lacuna( $last_el ) && !defined $el ) {
566f4595 941 $el = $noderefs ? $last_el : { 't' => '#LACUNA#' };
eca16057 942 }
943 push( @filled_row, $el );
944 $last_el = $el;
945 }
946 return @filled_row;
910a0a6d 947}
948
0e476982 949# Tiny utility function to say if a table element is a lacuna
950sub _el_is_lacuna {
951 my $el = shift;
2c669bca 952 return 1 if $el->{'t'} eq '#LACUNA#';
953 return 1 if ref( $el->{'t'} ) eq 'Text::Tradition::Collation::Reading'
954 && $el->{'t'}->is_lacuna;
0e476982 955 return 0;
956}
957
910a0a6d 958# Helper to turn the witnesses along columns rather than rows. Assumes
959# equal-sized rows.
960sub _turn_table {
961 my( $table ) = @_;
962 my $result = [];
963 return $result unless scalar @$table;
964 my $nrows = scalar @{$table->[0]};
965 foreach my $idx ( 0 .. $nrows - 1 ) {
966 foreach my $wit ( 0 .. $#{$table} ) {
967 $result->[$idx]->[$wit] = $table->[$wit]->[$idx];
968 }
969 }
970 return $result;
971}
972
4e5a7b2c 973=head1 NAVIGATION METHODS
910a0a6d 974
4e5a7b2c 975=head2 reading_sequence( $first, $last, $sigil, $backup )
e2902068 976
977Returns the ordered list of readings, starting with $first and ending
4e5a7b2c 978with $last, for the witness given in $sigil. If a $backup sigil is
979specified (e.g. when walking a layered witness), it will be used wherever
980no $sigil path exists. If there is a base text reading, that will be
981used wherever no path exists for $sigil or $backup.
e2902068 982
983=cut
984
910a0a6d 985# TODO Think about returning some lazy-eval iterator.
b0b4421a 986# TODO Get rid of backup; we should know from what witness is whether we need it.
910a0a6d 987
e2902068 988sub reading_sequence {
861c3e27 989 my( $self, $start, $end, $witness ) = @_;
e2902068 990
930ff666 991 $witness = $self->baselabel unless $witness;
e2902068 992 my @readings = ( $start );
993 my %seen;
994 my $n = $start;
3a2ebbf4 995 while( $n && $n->id ne $end->id ) {
996 if( exists( $seen{$n->id} ) ) {
63778331 997 throw( "Detected loop for $witness at " . $n->id );
910a0a6d 998 }
3a2ebbf4 999 $seen{$n->id} = 1;
910a0a6d 1000
861c3e27 1001 my $next = $self->next_reading( $n, $witness );
44771cf2 1002 unless( $next ) {
63778331 1003 throw( "Did not find any path for $witness from reading " . $n->id );
44771cf2 1004 }
910a0a6d 1005 push( @readings, $next );
1006 $n = $next;
e2902068 1007 }
1008 # Check that the last reading is our end reading.
1009 my $last = $readings[$#readings];
63778331 1010 throw( "Last reading found from " . $start->text .
1011 " for witness $witness is not the end!" ) # TODO do we get this far?
3a2ebbf4 1012 unless $last->id eq $end->id;
e2902068 1013
1014 return @readings;
1015}
1016
4e5a7b2c 1017=head2 next_reading( $reading, $sigil );
8e1394aa 1018
4a8828f0 1019Returns the reading that follows the given reading along the given witness
930ff666 1020path.
8e1394aa 1021
1022=cut
1023
4a8828f0 1024sub next_reading {
e2902068 1025 # Return the successor via the corresponding path.
8e1394aa 1026 my $self = shift;
3a2ebbf4 1027 my $answer = $self->_find_linked_reading( 'next', @_ );
2c669bca 1028 return undef unless $answer;
3a2ebbf4 1029 return $self->reading( $answer );
8e1394aa 1030}
1031
4e5a7b2c 1032=head2 prior_reading( $reading, $sigil )
8e1394aa 1033
4a8828f0 1034Returns the reading that precedes the given reading along the given witness
930ff666 1035path.
8e1394aa 1036
1037=cut
1038
4a8828f0 1039sub prior_reading {
e2902068 1040 # Return the predecessor via the corresponding path.
8e1394aa 1041 my $self = shift;
3a2ebbf4 1042 my $answer = $self->_find_linked_reading( 'prior', @_ );
1043 return $self->reading( $answer );
8e1394aa 1044}
1045
4a8828f0 1046sub _find_linked_reading {
861c3e27 1047 my( $self, $direction, $node, $path ) = @_;
1048
1049 # Get a backup if we are dealing with a layered witness
1050 my $alt_path;
1051 my $aclabel = $self->ac_label;
1052 if( $path && $path =~ /^(.*)\Q$aclabel\E$/ ) {
1053 $alt_path = $1;
1054 }
1055
e2902068 1056 my @linked_paths = $direction eq 'next'
3a2ebbf4 1057 ? $self->sequence->edges_from( $node )
1058 : $self->sequence->edges_to( $node );
e2902068 1059 return undef unless scalar( @linked_paths );
8e1394aa 1060
e2902068 1061 # We have to find the linked path that contains all of the
1062 # witnesses supplied in $path.
1063 my( @path_wits, @alt_path_wits );
4e5a7b2c 1064 @path_wits = sort( $self->_witnesses_of_label( $path ) ) if $path;
1065 @alt_path_wits = sort( $self->_witnesses_of_label( $alt_path ) ) if $alt_path;
e2902068 1066 my $base_le;
1067 my $alt_le;
1068 foreach my $le ( @linked_paths ) {
3a2ebbf4 1069 if( $self->sequence->has_edge_attribute( @$le, $self->baselabel ) ) {
910a0a6d 1070 $base_le = $le;
910a0a6d 1071 }
508fd430 1072 my @le_wits = sort $self->path_witnesses( $le );
3a2ebbf4 1073 if( _is_within( \@path_wits, \@le_wits ) ) {
1074 # This is the right path.
1075 return $direction eq 'next' ? $le->[1] : $le->[0];
1076 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
1077 $alt_le = $le;
1078 }
8e1394aa 1079 }
e2902068 1080 # Got this far? Return the alternate path if it exists.
3a2ebbf4 1081 return $direction eq 'next' ? $alt_le->[1] : $alt_le->[0]
910a0a6d 1082 if $alt_le;
e2902068 1083
1084 # Got this far? Return the base path if it exists.
3a2ebbf4 1085 return $direction eq 'next' ? $base_le->[1] : $base_le->[0]
910a0a6d 1086 if $base_le;
e2902068 1087
1088 # Got this far? We have no appropriate path.
2c669bca 1089 warn "Could not find $direction node from " . $node->id
910a0a6d 1090 . " along path $path";
8e1394aa 1091 return undef;
1092}
1093
4a8828f0 1094# Some set logic.
1095sub _is_within {
1096 my( $set1, $set2 ) = @_;
7854e12e 1097 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 1098 foreach my $el ( @$set1 ) {
910a0a6d 1099 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
4a8828f0 1100 }
1101 return $ret;
1102}
1103
4e5a7b2c 1104# Return the string that joins together a list of witnesses for
1105# display on a single path.
1106sub _witnesses_of_label {
1107 my( $self, $label ) = @_;
1108 my $regex = $self->wit_list_separator;
1109 my @answer = split( /\Q$regex\E/, $label );
1110 return @answer;
b0b4421a 1111}
1112
d4b75f44 1113=head2 common_readings
1114
1115Returns the list of common readings in the graph (i.e. those readings that are
1116shared by all non-lacunose witnesses.)
1117
1118=cut
1119
1120sub common_readings {
1121 my $self = shift;
1122 my @common = grep { $_->is_common } $self->readings;
1123 return @common;
1124}
1125
b0b4421a 1126=head2 path_text( $sigil, $mainsigil [, $start, $end ] )
1127
1128Returns the text of a witness (plus its backup, if we are using a layer)
1129as stored in the collation. The text is returned as a string, where the
1130individual readings are joined with spaces and the meta-readings (e.g.
1131lacunae) are omitted. Optional specification of $start and $end allows
1132the generation of a subset of the witness text.
4e5a7b2c 1133
b0b4421a 1134=cut
1135
1136sub path_text {
861c3e27 1137 my( $self, $wit, $start, $end ) = @_;
b0b4421a 1138 $start = $self->start unless $start;
1139 $end = $self->end unless $end;
861c3e27 1140 my @path = grep { !$_->is_meta } $self->reading_sequence( $start, $end, $wit );
b0b4421a 1141 return join( ' ', map { $_->text } @path );
1142}
4e5a7b2c 1143
1144=head1 INITIALIZATION METHODS
1145
1146These are mostly for use by parsers.
1147
1148=head2 make_witness_path( $witness )
1149
1150Link the array of readings contained in $witness->path (and in
1151$witness->uncorrected_path if it exists) into collation paths.
1152Clear out the arrays when finished.
de51424a 1153
4e5a7b2c 1154=head2 make_witness_paths
1155
1156Call make_witness_path for all witnesses in the tradition.
1157
1158=cut
930ff666 1159
7e450e44 1160# For use when a collation is constructed from a base text and an apparatus.
1161# We have the sequences of readings and just need to add path edges.
1f7aa795 1162# When we are done, clear out the witness path attributes, as they are no
1163# longer needed.
1164# TODO Find a way to replace the witness path attributes with encapsulated functions?
e2902068 1165
6a222840 1166sub make_witness_paths {
1167 my( $self ) = @_;
910a0a6d 1168 foreach my $wit ( $self->tradition->witnesses ) {
0068967c 1169 # print STDERR "Making path for " . $wit->sigil . "\n";
910a0a6d 1170 $self->make_witness_path( $wit );
7854e12e 1171 }
7854e12e 1172}
1173
6a222840 1174sub make_witness_path {
7854e12e 1175 my( $self, $wit ) = @_;
1176 my @chain = @{$wit->path};
15d2d3df 1177 my $sig = $wit->sigil;
7854e12e 1178 foreach my $idx ( 0 .. $#chain-1 ) {
910a0a6d 1179 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 1180 }
1f7aa795 1181 if( $wit->is_layered ) {
d9e873d0 1182 @chain = @{$wit->uncorrected_path};
1183 foreach my $idx( 0 .. $#chain-1 ) {
1184 my $source = $chain[$idx];
1185 my $target = $chain[$idx+1];
1186 $self->add_path( $source, $target, $sig.$self->ac_label )
1187 unless $self->has_path( $source, $target, $sig );
1188 }
15d2d3df 1189 }
1f7aa795 1190 $wit->clear_path;
1191 $wit->clear_uncorrected_path;
e2902068 1192}
1193
4e5a7b2c 1194=head2 calculate_ranks
1195
1196Calculate the reading ranks (that is, their aligned positions relative
1197to each other) for the graph. This can only be called on linear collations.
1198
1199=cut
1200
910a0a6d 1201sub calculate_ranks {
1202 my $self = shift;
1203 # Walk a version of the graph where every node linked by a relationship
1204 # edge is fundamentally the same node, and do a topological ranking on
1205 # the nodes in this graph.
c9bf3dbf 1206 my $topo_graph = Graph->new();
910a0a6d 1207 my %rel_containers;
1208 my $rel_ctr = 0;
1209 # Add the nodes
1210 foreach my $r ( $self->readings ) {
3a2ebbf4 1211 next if exists $rel_containers{$r->id};
910a0a6d 1212 my @rels = $r->related_readings( 'colocated' );
1213 if( @rels ) {
1214 # Make a relationship container.
1215 push( @rels, $r );
c9bf3dbf 1216 my $rn = 'rel_container_' . $rel_ctr++;
1217 $topo_graph->add_vertex( $rn );
910a0a6d 1218 foreach( @rels ) {
3a2ebbf4 1219 $rel_containers{$_->id} = $rn;
910a0a6d 1220 }
1221 } else {
1222 # Add a new node to mirror the old node.
3a2ebbf4 1223 $rel_containers{$r->id} = $r->id;
1224 $topo_graph->add_vertex( $r->id );
910a0a6d 1225 }
4a8828f0 1226 }
3a1f2523 1227
3a2ebbf4 1228 # Add the edges.
910a0a6d 1229 foreach my $r ( $self->readings ) {
3a2ebbf4 1230 foreach my $n ( $self->sequence->successors( $r->id ) ) {
1231 my( $tfrom, $tto ) = ( $rel_containers{$r->id},
1232 $rel_containers{$n} );
4e5a7b2c 1233 # $DB::single = 1 unless $tfrom && $tto;
3a2ebbf4 1234 $topo_graph->add_edge( $tfrom, $tto );
910a0a6d 1235 }
1236 }
1237
1238 # Now do the rankings, starting with the start node.
3a2ebbf4 1239 my $topo_start = $rel_containers{$self->start->id};
c9bf3dbf 1240 my $node_ranks = { $topo_start => 0 };
910a0a6d 1241 my @curr_origin = ( $topo_start );
1242 # A little iterative function.
1243 while( @curr_origin ) {
c9bf3dbf 1244 @curr_origin = _assign_rank( $topo_graph, $node_ranks, @curr_origin );
910a0a6d 1245 }
1246 # Transfer our rankings from the topological graph to the real one.
1247 foreach my $r ( $self->readings ) {
3a2ebbf4 1248 if( defined $node_ranks->{$rel_containers{$r->id}} ) {
1249 $r->rank( $node_ranks->{$rel_containers{$r->id}} );
67da8d6c 1250 } else {
63778331 1251 # Die. Find the last rank we calculated.
1252 my @all_defined = sort { $node_ranks->{$rel_containers{$a->id}}
1253 <=> $node_ranks->{$rel_containers{$b->id}} }
1254 $self->readings;
1255 my $last = pop @all_defined;
1256 throw( "Ranks not calculated after $last - do you have a cycle in the graph?" );
67da8d6c 1257 }
de51424a 1258 }
8e1394aa 1259}
3a1f2523 1260
910a0a6d 1261sub _assign_rank {
c9bf3dbf 1262 my( $graph, $node_ranks, @current_nodes ) = @_;
910a0a6d 1263 # Look at each of the children of @current_nodes. If all the child's
1264 # parents have a rank, assign it the highest rank + 1 and add it to
c9bf3dbf 1265 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1266 # parent gets a rank.
910a0a6d 1267 my @next_nodes;
1268 foreach my $c ( @current_nodes ) {
c9bf3dbf 1269 warn "Current reading $c has no rank!"
1270 unless exists $node_ranks->{$c};
1271 # print STDERR "Looking at child of node $c, rank "
1272 # . $node_ranks->{$c} . "\n";
1273 foreach my $child ( $graph->successors( $c ) ) {
1274 next if exists $node_ranks->{$child};
910a0a6d 1275 my $highest_rank = -1;
1276 my $skip = 0;
c9bf3dbf 1277 foreach my $parent ( $graph->predecessors( $child ) ) {
1278 if( exists $node_ranks->{$parent} ) {
1279 $highest_rank = $node_ranks->{$parent}
1280 if $highest_rank <= $node_ranks->{$parent};
910a0a6d 1281 } else {
1282 $skip = 1;
1283 last;
1284 }
1285 }
1286 next if $skip;
c9bf3dbf 1287 my $c_rank = $highest_rank + 1;
1288 # print STDERR "Assigning rank $c_rank to node $child \n";
1289 $node_ranks->{$child} = $c_rank;
910a0a6d 1290 push( @next_nodes, $child );
1291 }
1292 }
1293 return @next_nodes;
4cdd82f1 1294}
910a0a6d 1295
4e5a7b2c 1296=head2 flatten_ranks
1297
1298A convenience method for parsing collation data. Searches the graph for readings
1299with the same text at the same rank, and merges any that are found.
1300
1301=cut
1302
0e476982 1303sub flatten_ranks {
1304 my $self = shift;
1305 my %unique_rank_rdg;
1306 foreach my $rdg ( $self->readings ) {
1307 next unless $rdg->has_rank;
1308 my $key = $rdg->rank . "||" . $rdg->text;
1309 if( exists $unique_rank_rdg{$key} ) {
1310 # Combine!
56eefa04 1311 # print STDERR "Combining readings at same rank: $key\n";
0e476982 1312 $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
1313 } else {
1314 $unique_rank_rdg{$key} = $rdg;
1315 }
1316 }
1317}
1318
d4b75f44 1319=head2 calculate_common_readings
1320
1321Goes through the graph identifying the readings that appear in every witness
1322(apart from those with lacunae at that spot.) Marks them as common and returns
1323the list.
1324
1325=begin testing
1326
1327use Text::Tradition;
1328
1329my $cxfile = 't/data/Collatex-16.xml';
1330my $t = Text::Tradition->new(
1331 'name' => 'inline',
1332 'input' => 'CollateX',
1333 'file' => $cxfile,
1334 );
1335my $c = $t->collation;
1336
1337my @common = $c->calculate_common_readings();
1338is( scalar @common, 8, "Found correct number of common readings" );
1339my @marked = sort $c->common_readings();
1340is( scalar @common, 8, "All common readings got marked as such" );
1341my @expected = qw/ n1 n12 n16 n19 n20 n5 n6 n7 /;
1342is_deeply( \@marked, \@expected, "Found correct list of common readings" );
1343
1344=end testing
1345
1346=cut
1347
1348sub calculate_common_readings {
1349 my $self = shift;
1350 my @common;
1351 my $table = $self->make_alignment_table( 1 );
1352 foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
1353 my @row = map { $_->{'tokens'}->[$idx]->{'t'} } @{$table->{'alignment'}};
1354 my %hash;
1355 foreach my $r ( @row ) {
1356 if( $r ) {
1357 $hash{$r->id} = $r unless $r->is_meta;
1358 } else {
1359 $hash{'UNDEF'} = $r;
1360 }
1361 }
1362 if( keys %hash == 1 && !exists $hash{'UNDEF'} ) {
1363 my( $r ) = values %hash;
1364 $r->is_common( 1 );
1365 push( @common, $r );
1366 }
1367 }
1368 return @common;
1369}
1370
861c3e27 1371=head2 text_from_paths
1372
1373Calculate the text array for all witnesses from the path, for later consistency
1374checking. Only to be used if there is no non-graph-based way to know the
1375original texts.
1376
1377=cut
1378
1379sub text_from_paths {
1380 my $self = shift;
1381 foreach my $wit ( $self->tradition->witnesses ) {
1382 my @text = split( /\s+/,
1383 $self->reading_sequence( $self->start, $self->end, $wit->sigil ) );
1384 $wit->text( \@text );
1385 if( $wit->is_layered ) {
1386 my @uctext = split( /\s+/,
1387 $self->reading_sequence( $self->start, $self->end,
1388 $wit->sigil.$self->ac_label ) );
1389 $wit->text( \@uctext );
1390 }
1391 }
1392}
0e476982 1393
4e5a7b2c 1394=head1 UTILITY FUNCTIONS
1395
1396=head2 common_predecessor( $reading_a, $reading_b )
8e1394aa 1397
4e5a7b2c 1398Find the last reading that occurs in sequence before both the given readings.
1399
1400=head2 common_successor( $reading_a, $reading_b )
1401
1402Find the first reading that occurs in sequence after both the given readings.
1403
22222af9 1404=begin testing
1405
1406use Text::Tradition;
1407
1408my $cxfile = 't/data/Collatex-16.xml';
1409my $t = Text::Tradition->new(
1410 'name' => 'inline',
1411 'input' => 'CollateX',
1412 'file' => $cxfile,
1413 );
1414my $c = $t->collation;
1415
4e5a7b2c 1416is( $c->common_predecessor( 'n9', 'n23' )->id,
22222af9 1417 'n20', "Found correct common predecessor" );
4e5a7b2c 1418is( $c->common_successor( 'n9', 'n23' )->id,
22222af9 1419 '#END#', "Found correct common successor" );
1420
4e5a7b2c 1421is( $c->common_predecessor( 'n19', 'n17' )->id,
22222af9 1422 'n16', "Found correct common predecessor for readings on same path" );
4e5a7b2c 1423is( $c->common_successor( 'n21', 'n26' )->id,
22222af9 1424 '#END#', "Found correct common successor for readings on same path" );
1425
1426=end testing
1427
1428=cut
1429
1430## Return the closest reading that is a predecessor of both the given readings.
1431sub common_predecessor {
1432 my $self = shift;
4e5a7b2c 1433 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1434 return $self->common_in_path( $r1, $r2, 'predecessors' );
22222af9 1435}
1436
1437sub common_successor {
1438 my $self = shift;
4e5a7b2c 1439 my( $r1, $r2 ) = $self->_objectify_args( @_ );
1440 return $self->common_in_path( $r1, $r2, 'successors' );
22222af9 1441}
1442
1443sub common_in_path {
1444 my( $self, $r1, $r2, $dir ) = @_;
1445 my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
1446 $iter = $self->end->rank - $iter if $dir eq 'successors';
1447 my @candidates;
1448 my @last_checked = ( $r1, $r2 );
1449 my %all_seen;
1450 while( !@candidates ) {
1451 my @new_lc;
1452 foreach my $lc ( @last_checked ) {
1453 foreach my $p ( $lc->$dir ) {
1454 if( $all_seen{$p->id} ) {
1455 push( @candidates, $p );
1456 } else {
1457 $all_seen{$p->id} = 1;
1458 push( @new_lc, $p );
1459 }
1460 }
1461 }
1462 @last_checked = @new_lc;
1463 }
1464 my @answer = sort { $a->rank <=> $b->rank } @candidates;
1465 return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
1466}
1467
63778331 1468sub throw {
1469 Text::Tradition::Error->throw(
1470 'ident' => 'Collation error',
1471 'message' => $_[0],
1472 );
1473}
1474
dd3b58b0 1475no Moose;
1476__PACKAGE__->meta->make_immutable;
e867486f 1477
1478=head1 BUGS / TODO
1479
1480=over
1481
4e5a7b2c 1482=item * Get rid of $backup in reading_sequence
e867486f 1483
1484=back