add some debug code for spotting apparatus double entries
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
3use Graph::Easy;
8e1394aa 4use IPC::Run qw( run binary );
8e1394aa 5use Text::Tradition::Collation::Reading;
dd3b58b0 6use Moose;
7
8has 'graph' => (
d047cd52 9 is => 'ro',
10 isa => 'Graph::Easy',
11 handles => {
8e1394aa 12 add_reading => 'add_node',
13 del_reading => 'del_node',
14 add_path => 'add_edge',
15 del_path => 'del_edge',
16 reading => 'node',
17 path => 'edge',
18 readings => 'nodes',
19 paths => 'edges',
d047cd52 20 },
21 default => sub { Graph::Easy->new( undirected => 0 ) },
22 );
784877d9 23
dd3b58b0 24
dd3b58b0 25has 'tradition' => (
8e1394aa 26 is => 'rw',
d047cd52 27 isa => 'Text::Tradition',
28 );
dd3b58b0 29
8e1394aa 30has 'svg' => (
31 is => 'ro',
32 isa => 'Str',
33 writer => '_save_svg',
34 predicate => 'has_svg',
35 );
36
37has 'graphviz' => (
38 is => 'ro',
39 isa => 'Str',
40 writer => '_save_graphviz',
41 predicate => 'has_graphviz',
42 );
43
44has 'graphml' => (
45 is => 'ro',
46 isa => 'XML::LibXML::Document',
47 writer => '_save_graphml',
48 predicate => 'has_graphml',
49 );
50
3a1f2523 51# Keeps track of the lemmas within the collation. At most one lemma
52# per position in the graph.
53has 'lemmata' => (
54 is => 'ro',
55 isa => 'HashRef[Maybe[Str]]',
56 default => sub { {} },
57 );
58
4a8828f0 59has 'wit_list_separator' => (
7854e12e 60 is => 'rw',
61 isa => 'Str',
62 default => ', ',
63 );
64
65has 'baselabel' => (
66 is => 'rw',
67 isa => 'Str',
68 default => 'base text',
69 );
4a8828f0 70
1f563ac3 71has 'collapsed' => (
15d2d3df 72 is => 'rw',
73 isa => 'Bool',
74 );
75
76has 'linear' => (
77 is => 'rw',
78 isa => 'Bool',
79 default => 1,
80 );
1f563ac3 81
82
dd3b58b0 83# The collation can be created two ways:
84# 1. Collate a set of witnesses (with CollateX I guess) and process
85# the results as in 2.
86# 2. Read a pre-prepared collation in one of a variety of formats,
87# and make the graph from that.
88
89# The graph itself will (for now) be immutable, and the positions
90# within the graph will also be immutable. We need to calculate those
91# positions upon graph construction. The equivalences between graph
92# nodes will be mutable, entirely determined by the user (or possibly
93# by some semantic pre-processing provided by the user.) So the
94# constructor should just make an empty equivalences object. The
95# constructor will also need to make the witness objects, if we didn't
96# come through option 1.
97
d047cd52 98sub BUILD {
99 my( $self, $args ) = @_;
8e1394aa 100 $self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
d047cd52 101
4a8828f0 102 # Pass through any graph-specific options.
103 my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
104 $self->graph->set_attribute( 'node', 'shape', $shape );
d047cd52 105}
784877d9 106
7854e12e 107# Wrapper around add_path
108
109around add_path => sub {
110 my $orig = shift;
111 my $self = shift;
112
113 # Make sure there are three arguments
114 unless( @_ == 3 ) {
115 warn "Call add_path with args source, target, witness";
116 return;
117 }
118 # Make sure the proposed path does not yet exist
119 my( $source, $target, $wit ) = @_;
120 $source = $self->reading( $source )
121 unless ref( $source ) eq 'Text::Tradition::Collation::Reading';
122 $target = $self->reading( $target )
123 unless ref( $target ) eq 'Text::Tradition::Collation::Reading';
124 foreach my $path ( $source->edges_to( $target ) ) {
125 if( $path->label eq $wit ) {
126 return;
127 }
128 }
129 # Do the deed
130 $self->$orig( @_ );
131};
132
133# Wrapper around merge_nodes
784877d9 134
135sub merge_readings {
136 my $self = shift;
137 my $first_node = shift;
138 my $second_node = shift;
139 $first_node->merge_from( $second_node );
140 unshift( @_, $first_node, $second_node );
141 return $self->graph->merge_nodes( @_ );
142}
143
15d2d3df 144# Extra graph-alike utility
145sub has_path {
146 my( $self, $source, $target, $label ) = @_;
147 my @paths = $source->edges_to( $target );
148 my @relevant = grep { $_->label eq $label } @paths;
149 return scalar @paths;
150}
151
8e1394aa 152=head2 Output method(s)
153
154=over
155
156=item B<as_svg>
157
158print $graph->as_svg( $recalculate );
159
160Returns an SVG string that represents the graph. Uses GraphViz to do
4a8828f0 161this, because Graph::Easy doesn\'t cope well with long graphs. Unless
8e1394aa 162$recalculate is passed (and is a true value), the method will return a
163cached copy of the SVG after the first call to the method.
164
165=cut
166
167sub as_svg {
168 my( $self, $recalc ) = @_;
169 return $self->svg if $self->has_svg;
170
1f563ac3 171 $self->collapse_graph_edges();
8e1394aa 172 $self->_save_graphviz( $self->graph->as_graphviz() )
173 unless( $self->has_graphviz && !$recalc );
174
175 my @cmd = qw/dot -Tsvg/;
176 my( $svg, $err );
177 my $in = $self->graphviz;
178 run( \@cmd, \$in, ">", binary(), \$svg );
179 $self->{'svg'} = $svg;
1f563ac3 180 $self->expand_graph_edges();
8e1394aa 181 return $svg;
182}
183
184=item B<as_graphml>
185
186print $graph->as_graphml( $recalculate )
187
188Returns a GraphML representation of the collation graph, with
189transposition information and position information. Unless
190$recalculate is passed (and is a true value), the method will return a
191cached copy of the SVG after the first call to the method.
192
193=cut
194
195sub as_graphml {
196 my( $self, $recalc ) = @_;
197 return $self->graphml if $self->has_graphml;
198
199 # Some namespaces
200 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
201 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
202 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
203 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
204
205 # Create the document and root node
206 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
207 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
208 $graphml->setDocumentElement( $root );
209 $root->setNamespace( $xsi_ns, 'xsi', 0 );
210 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
211
212 # Add the data keys for nodes
213 my @node_data = ( 'name', 'token', 'identical', 'position' );
214 foreach my $ndi ( 0 .. $#node_data ) {
215 my $key = $root->addNewChild( $graphml_ns, 'key' );
216 $key->setAttribute( 'attr.name', $node_data[$ndi] );
217 $key->setAttribute( 'attr.type', 'string' );
218 $key->setAttribute( 'for', 'node' );
219 $key->setAttribute( 'id', 'd'.$ndi );
220 }
221
222 # Add the data keys for edges
223 my %wit_hash;
224 my $wit_ctr = 0;
1f563ac3 225 foreach my $wit ( @{$self->tradition->witnesses} ) {
8e1394aa 226 my $wit_key = 'w' . $wit_ctr++;
227 $wit_hash{$wit} = $wit_key;
228 my $key = $root->addNewChild( $graphml_ns, 'key' );
229 $key->setAttribute( 'attr.name', $wit );
230 $key->setAttribute( 'attr.type', 'string' );
231 $key->setAttribute( 'for', 'edge' );
232 $key->setAttribute( 'id', $wit_key );
233 }
234
235 # Add the graph, its nodes, and its edges
1f563ac3 236 $self->collapse_graph_edges();
8e1394aa 237 my $graph = $root->addNewChild( $graphml_ns, 'graph' );
238 $graph->setAttribute( 'edgedefault', 'directed' );
239 $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
240 $graph->setAttribute( 'parse.edgeids', 'canonical' );
241 $graph->setAttribute( 'parse.edges', $self->edges() );
242 $graph->setAttribute( 'parse.nodeids', 'canonical' );
243 $graph->setAttribute( 'parse.nodes', $self->nodes() );
244 $graph->setAttribute( 'parse.order', 'nodesfirst' );
245
246 my $node_ctr = 0;
247 my %node_hash;
248 foreach my $n ( $self->readings ) {
249 my %this_node_data = ();
250 foreach my $ndi ( 0 .. $#node_data ) {
251 my $value;
252 $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name';
253 $this_node_data{'d'.$ndi} = $n->label
254 if $node_data[$ndi] eq 'token';
255 $this_node_data{'d'.$ndi} = $n->primary->name if $n->has_primary;
256 $this_node_data{'d'.$ndi} =
257 $self->{'positions'}->node_position( $n )
258 if $node_data[$ndi] eq 'position';
259 }
260 my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
261 my $node_xmlid = 'n' . $node_ctr++;
262 $node_hash{ $n->name } = $node_xmlid;
263 $node_el->setAttribute( 'id', $node_xmlid );
264
265 foreach my $dk ( keys %this_node_data ) {
266 my $d_el = $node_el->addNewChild( $graphml_ns, 'data' );
267 $d_el->setAttribute( 'key', $dk );
268 $d_el->appendTextChild( $this_node_data{$dk} );
269 }
270 }
271
1f563ac3 272 foreach my $e ( $self->paths() ) {
8e1394aa 273 my( $name, $from, $to ) = ( $e->name,
274 $node_hash{ $e->from()->name() },
275 $node_hash{ $e->to()->name() } );
276 my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
277 $edge_el->setAttribute( 'source', $from );
278 $edge_el->setAttribute( 'target', $to );
279 $edge_el->setAttribute( 'id', $name );
280 # TODO Got to add the witnesses
281 }
282
283 # Return the thing
284 $self->_save_graphml( $graphml );
1f563ac3 285 $self->expand_graph_edges();
8e1394aa 286 return $graphml;
287}
288
1f563ac3 289sub collapse_graph_edges {
290 my $self = shift;
291 # Our collation graph has an edge per witness. This is great for
292 # calculation purposes, but terrible for display. Thus we want to
293 # display only one edge between any two nodes.
294
295 return if $self->collapsed;
296
297 print STDERR "Collapsing path edges in graph...\n";
298
299 # Don't list out every witness if we have more than half to list.
300 my $majority = int( scalar( @{$self->tradition->witnesses} ) / 2 ) + 1;
301 foreach my $node( $self->readings ) {
302 my $newlabels = {};
303 # We will visit each node, so we only look ahead.
304 foreach my $edge ( $node->outgoing() ) {
305 add_hash_entry( $newlabels, $edge->to->name, $edge->name );
306 $self->del_path( $edge );
307 }
308
309 foreach my $newdest ( keys %$newlabels ) {
310 my $label;
311 my @compressed_wits = ();
312 if( @{$newlabels->{$newdest}} < $majority ) {
313 $label = join( ', ', @{$newlabels->{$newdest}} );
314 } else {
315 ## TODO FIX THIS HACK
6a222840 316 my @aclabels;
1f563ac3 317 foreach my $wit ( @{$newlabels->{$newdest}} ) {
6a222840 318 if( $wit =~ /^(.*?)(\s*\(?a\.\s*c\.\)?)$/ ) {
319 push( @aclabels, $wit );
1f563ac3 320 } else {
321 push( @compressed_wits, $wit );
322 }
323 }
6a222840 324 $label = join( ', ', 'majority', @aclabels );
1f563ac3 325 }
326
327 my $newedge =
328 $self->add_path( $node, $self->reading( $newdest ), $label );
329 if( @compressed_wits ) {
330 ## TODO fix this hack too.
331 $newedge->set_attribute( 'class',
332 join( '|', @compressed_wits ) );
333 }
334 }
335 }
336
337 $self->collapsed( 1 );
338}
339
340sub expand_graph_edges {
341 my $self = shift;
342 # Our collation graph has only one edge between any two nodes.
343 # This is great for display, but not so great for analysis.
344 # Expand this so that each witness has its own edge between any
345 # two reading nodes.
346 return unless $self->collapsed;
347
348 print STDERR "Expanding path edges in graph...\n";
349
350 foreach my $edge( $self->paths ) {
351 my $from = $edge->from;
352 my $to = $edge->to;
353 my @wits = split( /, /, $edge->label );
354 if( grep { $_ eq 'majority' } @wits ) {
355 push( @wits, split( /\|/, $edge->get_attribute( 'class' ) ) );
356 }
357 $self->del_path( $edge );
358 foreach ( @wits ) {
359 $self->add_path( $from, $to, $_ );
360 }
361 }
362 $self->collapsed( 0 );
363}
364
8e1394aa 365=back
366
de51424a 367=head2 Navigation methods
368
369=over
370
8e1394aa 371=item B<start>
372
373my $beginning = $collation->start();
374
375Returns the beginning of the collation, a meta-reading with label '#START#'.
376
377=cut
378
379sub start {
4a8828f0 380 # Return the beginning reading of the graph.
8e1394aa 381 my $self = shift;
382 my( $new_start ) = @_;
383 if( $new_start ) {
384 $self->del_reading( '#START#' );
385 $self->graph->rename_node( $new_start, '#START#' );
386 }
387 return $self->reading('#START#');
388}
389
e2902068 390=item B<reading_sequence>
391
392my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] );
393
394Returns the ordered list of readings, starting with $first and ending
395with $last, along the given witness path. If no path is specified,
396assume that the path is that of the base text (if any.)
397
398=cut
399
400sub reading_sequence {
401 my( $self, $start, $end, $witness, $backup ) = @_;
402
930ff666 403 $witness = $self->baselabel unless $witness;
e2902068 404 my @readings = ( $start );
405 my %seen;
406 my $n = $start;
930ff666 407 while( $n && $n ne $end ) {
e2902068 408 if( exists( $seen{$n->name()} ) ) {
409 warn "Detected loop at " . $n->name();
410 last;
411 }
412 $seen{$n->name()} = 1;
413
414 my $next = $self->next_reading( $n, $witness, $backup );
415 warn "Did not find any path for $witness from reading " . $n->name
416 unless $next;
417 push( @readings, $next );
418 $n = $next;
419 }
420 # Check that the last reading is our end reading.
421 my $last = $readings[$#readings];
422 warn "Last reading found from " . $start->label() .
423 " for witness $witness is not the end!"
424 unless $last eq $end;
425
426 return @readings;
427}
428
4a8828f0 429=item B<next_reading>
8e1394aa 430
4a8828f0 431my $next_reading = $graph->next_reading( $reading, $witpath );
8e1394aa 432
4a8828f0 433Returns the reading that follows the given reading along the given witness
930ff666 434path.
8e1394aa 435
436=cut
437
4a8828f0 438sub next_reading {
e2902068 439 # Return the successor via the corresponding path.
8e1394aa 440 my $self = shift;
4a8828f0 441 return $self->_find_linked_reading( 'next', @_ );
8e1394aa 442}
443
4a8828f0 444=item B<prior_reading>
8e1394aa 445
4a8828f0 446my $prior_reading = $graph->prior_reading( $reading, $witpath );
8e1394aa 447
4a8828f0 448Returns the reading that precedes the given reading along the given witness
930ff666 449path.
8e1394aa 450
451=cut
452
4a8828f0 453sub prior_reading {
e2902068 454 # Return the predecessor via the corresponding path.
8e1394aa 455 my $self = shift;
4a8828f0 456 return $self->_find_linked_reading( 'prior', @_ );
8e1394aa 457}
458
4a8828f0 459sub _find_linked_reading {
e2902068 460 my( $self, $direction, $node, $path, $alt_path ) = @_;
461 my @linked_paths = $direction eq 'next'
8e1394aa 462 ? $node->outgoing() : $node->incoming();
e2902068 463 return undef unless scalar( @linked_paths );
8e1394aa 464
e2902068 465 # We have to find the linked path that contains all of the
466 # witnesses supplied in $path.
467 my( @path_wits, @alt_path_wits );
468 @path_wits = $self->witnesses_of_label( $path ) if $path;
469 @alt_path_wits = $self->witnesses_of_label( $alt_path ) if $alt_path;
470 my $base_le;
471 my $alt_le;
472 foreach my $le ( @linked_paths ) {
930ff666 473 if( $le->name eq $self->baselabel ) {
e2902068 474 $base_le = $le;
475 } else {
476 my @le_wits = $self->witnesses_of_label( $le->name );
477 if( _is_within( \@path_wits, \@le_wits ) ) {
478 # This is the right path.
479 return $direction eq 'next' ? $le->to() : $le->from();
480 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
481 $alt_le = $le;
482 }
8e1394aa 483 }
484 }
e2902068 485 # Got this far? Return the alternate path if it exists.
486 return $direction eq 'next' ? $alt_le->to() : $alt_le->from()
487 if $alt_le;
488
489 # Got this far? Return the base path if it exists.
490 return $direction eq 'next' ? $base_le->to() : $base_le->from()
491 if $base_le;
492
493 # Got this far? We have no appropriate path.
8e1394aa 494 warn "Could not find $direction node from " . $node->label
e2902068 495 . " along path $path";
8e1394aa 496 return undef;
497}
498
4a8828f0 499# Some set logic.
500sub _is_within {
501 my( $set1, $set2 ) = @_;
7854e12e 502 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 503 foreach my $el ( @$set1 ) {
504 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
505 }
506 return $ret;
507}
508
de51424a 509
510## INITIALIZATION METHODS - for use by parsers
4a8828f0 511# Walk the paths for each witness in the graph, and return the nodes
e2902068 512# that the graph has in common. If $using_base is true, some
513# different logic is needed.
4a8828f0 514
515sub walk_witness_paths {
516 my( $self, $end ) = @_;
517 # For each witness, walk the path through the graph.
518 # Then we need to find the common nodes.
519 # TODO This method is going to fall down if we have a very gappy
520 # text in the collation.
521 my $paths = {};
3a1f2523 522 my @common_readings;
4a8828f0 523 foreach my $wit ( @{$self->tradition->witnesses} ) {
524 my $curr_reading = $self->start;
e2902068 525 my @wit_path = $self->reading_sequence( $self->start, $end,
526 $wit->sigil );
4a8828f0 527 $wit->path( \@wit_path );
e2902068 528
529 # Detect the common readings.
930ff666 530 @common_readings = _find_common( \@common_readings, \@wit_path );
4a8828f0 531 }
532
533 # Mark all the nodes as either common or not.
3a1f2523 534 foreach my $cn ( @common_readings ) {
e2902068 535 print STDERR "Setting " . $cn->name . " / " . $cn->label
536 . " as common node\n";
4a8828f0 537 $cn->make_common;
538 }
539 foreach my $n ( $self->readings() ) {
540 $n->make_variant unless $n->is_common;
541 }
3a1f2523 542 # Return an array of the common nodes in order.
543 return @common_readings;
4a8828f0 544}
545
930ff666 546sub _find_common {
547 my( $common_readings, $new_path ) = @_;
548 my @cr;
549 if( @$common_readings ) {
550 foreach my $n ( @$new_path ) {
551 push( @cr, $n ) if grep { $_ eq $n } @$common_readings;
552 }
553 } else {
554 push( @cr, @$new_path );
555 }
556 return @cr;
557}
558
559sub _remove_common {
560 my( $common_readings, $divergence ) = @_;
561 my @cr;
562 my %diverged;
563 map { $diverged{$_->name} = 1 } @$divergence;
564 foreach( @$common_readings ) {
565 push( @cr, $_ ) unless $diverged{$_->name};
566 }
567 return @cr;
568}
569
570
e2902068 571# An alternative to walk_witness_paths, for use when a collation is
6a222840 572# constructed from a base text and an apparatus. We have the
573# sequences of readings and just need to add path edges.
e2902068 574
6a222840 575sub make_witness_paths {
576 my( $self ) = @_;
e2902068 577
930ff666 578 my @common_readings;
e2902068 579 foreach my $wit ( @{$self->tradition->witnesses} ) {
15d2d3df 580 print STDERR "Making path for " . $wit->sigil . "\n";
6a222840 581 $self->make_witness_path( $wit );
582 @common_readings = _find_common( \@common_readings, $wit->path );
15d2d3df 583 @common_readings = _find_common( \@common_readings, $wit->uncorrected_path );
7854e12e 584 }
6a222840 585 return @common_readings;
7854e12e 586}
587
6a222840 588sub make_witness_path {
7854e12e 589 my( $self, $wit ) = @_;
590 my @chain = @{$wit->path};
15d2d3df 591 my $sig = $wit->sigil;
7854e12e 592 foreach my $idx ( 0 .. $#chain-1 ) {
6a222840 593 $self->add_path( $chain[$idx], $chain[$idx+1], $sig );
7854e12e 594 }
15d2d3df 595 @chain = @{$wit->uncorrected_path};
596 foreach my $idx( 0 .. $#chain-1 ) {
597 my $source = $chain[$idx];
598 my $target = $chain[$idx+1];
599 $self->add_path( $source, $target, "$sig (a.c.)" )
600 unless $self->has_path( $source, $target, $sig );
601 }
e2902068 602}
603
4a8828f0 604sub common_readings {
605 my $self = shift;
606 my @common = grep { $_->is_common } $self->readings();
de51424a 607 return sort { _cmp_position( $a->position, $b->position ) } @common;
4a8828f0 608}
609
610# Calculate the relative positions of nodes in the graph, if they
611# were not given to us.
612sub calculate_positions {
3a1f2523 613 my( $self, @ordered_common ) = @_;
4a8828f0 614
615 # We have to calculate the position identifiers for each word,
616 # keyed on the common nodes. This will be 'fun'. The end result
617 # is a hash per witness, whose key is the word node and whose
618 # value is its position in the text. Common nodes are always N,1
619 # so have identical positions in each text.
4a8828f0 620
621 my $node_pos = {};
622 foreach my $wit ( @{$self->tradition->witnesses} ) {
930ff666 623 print STDERR "Calculating positions in " . $wit->sigil . "\n";
624 _update_positions_from_path( $wit->path, @ordered_common );
6a222840 625 _update_positions_from_path( $wit->uncorrected_path, @ordered_common )
626 if $wit->has_ante_corr;
930ff666 627 }
628
629 # DEBUG
630 foreach my $r ( $self->readings() ) {
631 print STDERR "Reading " . $r->name . "/" . $r->label . " has no position\n"
632 unless( $r->has_position );
4a8828f0 633 }
3a1f2523 634
635 $self->init_lemmata();
4a8828f0 636}
637
930ff666 638sub _update_positions_from_path {
639 my( $path, @ordered_common ) = @_;
640
641 # First we walk the given path, making a matrix for the witness
642 # that corresponds to its eventual position identifier. Common
643 # nodes always start a new row, and are thus always in the first
644 # column.
930ff666 645 my $wit_matrix = [];
646 my $cn = 0; # We should hit the common readings in order.
647 my $row = [];
648 foreach my $wn ( @{$path} ) {
649 if( $wn eq $ordered_common[$cn] ) {
650 # Set up to look for the next common node, and
651 # start a new row of words.
652 $cn++;
653 push( @$wit_matrix, $row ) if scalar( @$row );
654 $row = [];
655 }
656 push( @$row, $wn );
657 }
658 push( @$wit_matrix, $row ); # Push the last row onto the matrix
659
660 # Now we have a matrix per witness, so that each row in the
661 # matrix begins with a common node, and continues with all the
662 # variant words that appear in the witness. We turn this into
663 # real positions in row,cell format. But we need some
664 # trickery in order to make sure that each node gets assigned
665 # to only one position.
666
667 foreach my $li ( 1..scalar(@$wit_matrix) ) {
668 foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
669 my $reading = $wit_matrix->[$li-1]->[$di-1];
670 my $position = "$li,$di";
6a222840 671
930ff666 672 # If we have seen this node before, we need to compare
673 # its position with what went before.
674 unless( $reading->has_position &&
675 _cmp_position( $position, $reading->position ) < 1 ) {
676 # The new position ID replaces the old one.
677 $reading->position( $position );
678 } # otherwise, the old position needs to stay.
679 }
680 }
681}
682
4a8828f0 683sub _cmp_position {
684 my( $a, $b ) = @_;
de51424a 685 if ( $a && $b ) {
686 my @pos_a = split(/,/, $a );
687 my @pos_b = split(/,/, $b );
688
689 my $big_cmp = $pos_a[0] <=> $pos_b[0];
690 return $big_cmp if $big_cmp;
691 # else
692 return $pos_a[1] <=> $pos_b[1];
693 } elsif ( $b ) { # a is undefined
694 return -1;
695 } elsif ( $a ) { # b is undefined
696 return 1;
697 }
698 return 0; # they are both undefined
8e1394aa 699}
3a1f2523 700
701sub all_positions {
702 my $self = shift;
703 my %positions = ();
704 map { $positions{$_->position} = 1 } $self->readings;
de51424a 705 my @answer = sort { _cmp_position( $a, $b ) } keys( %positions );
706 return @answer;
3a1f2523 707}
708
709sub readings_at_position {
710 my( $self, $pos ) = @_;
711 my @answer = grep { $_->position eq $pos } $self->readings;
712 return @answer;
713}
714
715## Lemmatizer functions
716
717sub init_lemmata {
718 my $self = shift;
719
720 foreach my $position ( $self->all_positions ) {
721 $self->lemmata->{$position} = undef;
722 }
723
724 foreach my $cr ( $self->common_readings ) {
725 $self->lemmata->{$cr->position} = $cr->name;
726 }
727}
728
729=item B<lemma_readings>
730
731my @state = $graph->lemma_readings( @readings_delemmatized );
732
733Takes a list of readings that have just been delemmatized, and returns
734a set of tuples of the form ['reading', 'state'] that indicates what
735changes need to be made to the graph.
736
737=over
738
739=item *
740
741A state of 1 means 'lemmatize this reading'
742
743=item *
744
745A state of 0 means 'delemmatize this reading'
746
747=item *
748
749A state of undef means 'an ellipsis belongs in the text here because
750no decision has been made / an earlier decision was backed out'
751
752=back
753
754=cut
755
756sub lemma_readings {
757 my( $self, @toggled_off_nodes ) = @_;
758
759 # First get the positions of those nodes which have been
760 # toggled off.
761 my $positions_off = {};
762 map { $positions_off->{ $_->position } = $_->name } @toggled_off_nodes;
de51424a 763
3a1f2523 764 # Now for each position, we have to see if a node is on, and we
765 # have to see if a node has been turned off.
766 my @answer;
767 foreach my $pos ( $self->all_positions() ) {
768 # Find the state of this position. If there is an active node,
769 # its name will be the state; otherwise the state will be 0
770 # (nothing at this position) or undef (ellipsis at this position)
771 my $active = $self->lemmata->{$pos};
772
773 # Is there a formerly active node that was toggled off?
774 if( exists( $positions_off->{$pos} ) ) {
775 my $off_node = $positions_off->{$pos};
776 if( $active && $active ne $off_node) {
777 push( @answer, [ $off_node, 0 ], [ $active, 1 ] );
778 } else {
779 push( @answer, [ $off_node, $active ] );
780 }
781
782 # No formerly active node, so we just see if there is a currently
783 # active one.
784 } elsif( $active ) {
785 # Push the active node, whatever it is.
786 push( @answer, [ $active, 1 ] );
787 } else {
788 # Push the state that is there. Arbitrarily use the first node
789 # at that position.
790 my @pos_nodes = $self->readings_at_position( $pos );
de51424a 791 push( @answer, [ $pos_nodes[0]->name, $self->lemmata->{$pos} ] );
3a1f2523 792 }
793 }
794
795 return @answer;
796}
797
de51424a 798=item B<toggle_reading>
799
800my @readings_delemmatized = $graph->toggle_reading( $reading_name );
801
802Takes a reading node name, and either lemmatizes or de-lemmatizes
803it. Returns a list of all readings that are de-lemmatized as a result
804of the toggle.
805
806=cut
807
808sub toggle_reading {
809 my( $self, $rname ) = @_;
810
811 return unless $rname;
812 my $reading = $self->reading( $rname );
813 if( !$reading || $reading->is_common() ) {
814 # Do nothing, it's a common node.
815 return;
816 }
817
818 my $pos = $reading->position;
819 my $old_state = $self->lemmata->{$pos};
820 my @readings_off;
821 if( $old_state && $old_state eq $rname ) {
822 # Turn off the node. We turn on no others by default.
823 push( @readings_off, $reading );
824 } else {
825 # Turn on the node.
826 $self->lemmata->{$pos} = $rname;
827 # Any other 'on' readings in the same position should be off.
828 push( @readings_off, $self->same_position_as( $reading ) );
829 # Any node that is an identical transposed one should be off.
830 push( @readings_off, $reading->identical_readings );
831 }
832 @readings_off = unique_list( @readings_off );
833
834 # Turn off the readings that need to be turned off.
835 my @readings_delemmatized;
836 foreach my $n ( @readings_off ) {
837 my $state = $self->lemmata->{$n->position};
838 if( $state && $state eq $n->name ) {
839 # this reading is still on, so turn it off
840 push( @readings_delemmatized, $n );
841 my $new_state = undef;
842 if( $n eq $reading ) {
843 # This is the reading that was clicked, so if there are no
844 # other readings there, turn off the position. In all other
845 # cases, restore the ellipsis.
846 my @other_n = $self->same_position_as( $n );
847 $new_state = 0 unless @other_n;
848 }
849 $self->lemmata->{$n->position} = $new_state;
850 } elsif( $old_state && $old_state eq $n->name ) {
851 # another reading has already been turned on here
852 push( @readings_delemmatized, $n );
853 } # else some other reading was on anyway, so pass.
854 }
855 return @readings_delemmatized;
856}
857
858sub same_position_as {
859 my( $self, $reading ) = @_;
860 my $pos = $reading->position;
861 my @same = grep { $_ ne $reading } $self->readings_at_position( $reading->position );
862 return @same;
863}
3a1f2523 864
4a8828f0 865# Return the string that joins together a list of witnesses for
866# display on a single path.
867sub path_label {
868 my $self = shift;
869 return join( $self->wit_list_separator, @_ );
870}
871
872sub witnesses_of_label {
de51424a 873 my( $self, $label ) = @_;
4a8828f0 874 my $regex = $self->wit_list_separator;
de51424a 875 my @answer = split( /\Q$regex\E/, $label );
876 return @answer;
4a8828f0 877}
8e1394aa 878
de51424a 879sub unique_list {
880 my( @list ) = @_;
881 my %h;
882 map { $h{$_->name} = $_ } @list;
883 return values( %h );
884}
885
1f563ac3 886sub add_hash_entry {
887 my( $hash, $key, $entry ) = @_;
888 if( exists $hash->{$key} ) {
889 push( @{$hash->{$key}}, $entry );
890 } else {
891 $hash->{$key} = [ $entry ];
892 }
893}
894
dd3b58b0 895no Moose;
896__PACKAGE__->meta->make_immutable;