add expand/collapse path edge functionality
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation.pm
CommitLineData
dd3b58b0 1package Text::Tradition::Collation;
d047cd52 2
3use Graph::Easy;
8e1394aa 4use IPC::Run qw( run binary );
8e1394aa 5use Text::Tradition::Collation::Reading;
dd3b58b0 6use Moose;
7
8has 'graph' => (
d047cd52 9 is => 'ro',
10 isa => 'Graph::Easy',
11 handles => {
8e1394aa 12 add_reading => 'add_node',
13 del_reading => 'del_node',
14 add_path => 'add_edge',
15 del_path => 'del_edge',
16 reading => 'node',
17 path => 'edge',
18 readings => 'nodes',
19 paths => 'edges',
d047cd52 20 },
21 default => sub { Graph::Easy->new( undirected => 0 ) },
22 );
784877d9 23
dd3b58b0 24
dd3b58b0 25has 'tradition' => (
8e1394aa 26 is => 'rw',
d047cd52 27 isa => 'Text::Tradition',
28 );
dd3b58b0 29
8e1394aa 30has 'svg' => (
31 is => 'ro',
32 isa => 'Str',
33 writer => '_save_svg',
34 predicate => 'has_svg',
35 );
36
37has 'graphviz' => (
38 is => 'ro',
39 isa => 'Str',
40 writer => '_save_graphviz',
41 predicate => 'has_graphviz',
42 );
43
44has 'graphml' => (
45 is => 'ro',
46 isa => 'XML::LibXML::Document',
47 writer => '_save_graphml',
48 predicate => 'has_graphml',
49 );
50
3a1f2523 51# Keeps track of the lemmas within the collation. At most one lemma
52# per position in the graph.
53has 'lemmata' => (
54 is => 'ro',
55 isa => 'HashRef[Maybe[Str]]',
56 default => sub { {} },
57 );
58
4a8828f0 59has 'wit_list_separator' => (
7854e12e 60 is => 'rw',
61 isa => 'Str',
62 default => ', ',
63 );
64
65has 'baselabel' => (
66 is => 'rw',
67 isa => 'Str',
68 default => 'base text',
69 );
4a8828f0 70
1f563ac3 71has 'collapsed' => (
72 is => 'rw',
73 isa => 'Bool',
74 );
75
76
dd3b58b0 77# The collation can be created two ways:
78# 1. Collate a set of witnesses (with CollateX I guess) and process
79# the results as in 2.
80# 2. Read a pre-prepared collation in one of a variety of formats,
81# and make the graph from that.
82
83# The graph itself will (for now) be immutable, and the positions
84# within the graph will also be immutable. We need to calculate those
85# positions upon graph construction. The equivalences between graph
86# nodes will be mutable, entirely determined by the user (or possibly
87# by some semantic pre-processing provided by the user.) So the
88# constructor should just make an empty equivalences object. The
89# constructor will also need to make the witness objects, if we didn't
90# come through option 1.
91
d047cd52 92sub BUILD {
93 my( $self, $args ) = @_;
8e1394aa 94 $self->graph->use_class('node', 'Text::Tradition::Collation::Reading');
d047cd52 95
4a8828f0 96 # Pass through any graph-specific options.
97 my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse';
98 $self->graph->set_attribute( 'node', 'shape', $shape );
d047cd52 99}
784877d9 100
7854e12e 101# Wrapper around add_path
102
103around add_path => sub {
104 my $orig = shift;
105 my $self = shift;
106
107 # Make sure there are three arguments
108 unless( @_ == 3 ) {
109 warn "Call add_path with args source, target, witness";
110 return;
111 }
112 # Make sure the proposed path does not yet exist
113 my( $source, $target, $wit ) = @_;
114 $source = $self->reading( $source )
115 unless ref( $source ) eq 'Text::Tradition::Collation::Reading';
116 $target = $self->reading( $target )
117 unless ref( $target ) eq 'Text::Tradition::Collation::Reading';
118 foreach my $path ( $source->edges_to( $target ) ) {
119 if( $path->label eq $wit ) {
120 return;
121 }
122 }
123 # Do the deed
124 $self->$orig( @_ );
125};
126
127# Wrapper around merge_nodes
784877d9 128
129sub merge_readings {
130 my $self = shift;
131 my $first_node = shift;
132 my $second_node = shift;
133 $first_node->merge_from( $second_node );
134 unshift( @_, $first_node, $second_node );
135 return $self->graph->merge_nodes( @_ );
136}
137
8e1394aa 138=head2 Output method(s)
139
140=over
141
142=item B<as_svg>
143
144print $graph->as_svg( $recalculate );
145
146Returns an SVG string that represents the graph. Uses GraphViz to do
4a8828f0 147this, because Graph::Easy doesn\'t cope well with long graphs. Unless
8e1394aa 148$recalculate is passed (and is a true value), the method will return a
149cached copy of the SVG after the first call to the method.
150
151=cut
152
153sub as_svg {
154 my( $self, $recalc ) = @_;
155 return $self->svg if $self->has_svg;
156
1f563ac3 157 $self->collapse_graph_edges();
8e1394aa 158 $self->_save_graphviz( $self->graph->as_graphviz() )
159 unless( $self->has_graphviz && !$recalc );
160
161 my @cmd = qw/dot -Tsvg/;
162 my( $svg, $err );
163 my $in = $self->graphviz;
164 run( \@cmd, \$in, ">", binary(), \$svg );
165 $self->{'svg'} = $svg;
1f563ac3 166 $self->expand_graph_edges();
8e1394aa 167 return $svg;
168}
169
170=item B<as_graphml>
171
172print $graph->as_graphml( $recalculate )
173
174Returns a GraphML representation of the collation graph, with
175transposition information and position information. Unless
176$recalculate is passed (and is a true value), the method will return a
177cached copy of the SVG after the first call to the method.
178
179=cut
180
181sub as_graphml {
182 my( $self, $recalc ) = @_;
183 return $self->graphml if $self->has_graphml;
184
185 # Some namespaces
186 my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns';
187 my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance';
188 my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' .
189 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd';
190
191 # Create the document and root node
192 my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" );
193 my $root = $graphml->createElementNS( $graphml_ns, 'graphml' );
194 $graphml->setDocumentElement( $root );
195 $root->setNamespace( $xsi_ns, 'xsi', 0 );
196 $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema );
197
198 # Add the data keys for nodes
199 my @node_data = ( 'name', 'token', 'identical', 'position' );
200 foreach my $ndi ( 0 .. $#node_data ) {
201 my $key = $root->addNewChild( $graphml_ns, 'key' );
202 $key->setAttribute( 'attr.name', $node_data[$ndi] );
203 $key->setAttribute( 'attr.type', 'string' );
204 $key->setAttribute( 'for', 'node' );
205 $key->setAttribute( 'id', 'd'.$ndi );
206 }
207
208 # Add the data keys for edges
209 my %wit_hash;
210 my $wit_ctr = 0;
1f563ac3 211 foreach my $wit ( @{$self->tradition->witnesses} ) {
8e1394aa 212 my $wit_key = 'w' . $wit_ctr++;
213 $wit_hash{$wit} = $wit_key;
214 my $key = $root->addNewChild( $graphml_ns, 'key' );
215 $key->setAttribute( 'attr.name', $wit );
216 $key->setAttribute( 'attr.type', 'string' );
217 $key->setAttribute( 'for', 'edge' );
218 $key->setAttribute( 'id', $wit_key );
219 }
220
221 # Add the graph, its nodes, and its edges
1f563ac3 222 $self->collapse_graph_edges();
8e1394aa 223 my $graph = $root->addNewChild( $graphml_ns, 'graph' );
224 $graph->setAttribute( 'edgedefault', 'directed' );
225 $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful
226 $graph->setAttribute( 'parse.edgeids', 'canonical' );
227 $graph->setAttribute( 'parse.edges', $self->edges() );
228 $graph->setAttribute( 'parse.nodeids', 'canonical' );
229 $graph->setAttribute( 'parse.nodes', $self->nodes() );
230 $graph->setAttribute( 'parse.order', 'nodesfirst' );
231
232 my $node_ctr = 0;
233 my %node_hash;
234 foreach my $n ( $self->readings ) {
235 my %this_node_data = ();
236 foreach my $ndi ( 0 .. $#node_data ) {
237 my $value;
238 $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name';
239 $this_node_data{'d'.$ndi} = $n->label
240 if $node_data[$ndi] eq 'token';
241 $this_node_data{'d'.$ndi} = $n->primary->name if $n->has_primary;
242 $this_node_data{'d'.$ndi} =
243 $self->{'positions'}->node_position( $n )
244 if $node_data[$ndi] eq 'position';
245 }
246 my $node_el = $graph->addNewChild( $graphml_ns, 'node' );
247 my $node_xmlid = 'n' . $node_ctr++;
248 $node_hash{ $n->name } = $node_xmlid;
249 $node_el->setAttribute( 'id', $node_xmlid );
250
251 foreach my $dk ( keys %this_node_data ) {
252 my $d_el = $node_el->addNewChild( $graphml_ns, 'data' );
253 $d_el->setAttribute( 'key', $dk );
254 $d_el->appendTextChild( $this_node_data{$dk} );
255 }
256 }
257
1f563ac3 258 foreach my $e ( $self->paths() ) {
8e1394aa 259 my( $name, $from, $to ) = ( $e->name,
260 $node_hash{ $e->from()->name() },
261 $node_hash{ $e->to()->name() } );
262 my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' );
263 $edge_el->setAttribute( 'source', $from );
264 $edge_el->setAttribute( 'target', $to );
265 $edge_el->setAttribute( 'id', $name );
266 # TODO Got to add the witnesses
267 }
268
269 # Return the thing
270 $self->_save_graphml( $graphml );
1f563ac3 271 $self->expand_graph_edges();
8e1394aa 272 return $graphml;
273}
274
1f563ac3 275sub collapse_graph_edges {
276 my $self = shift;
277 # Our collation graph has an edge per witness. This is great for
278 # calculation purposes, but terrible for display. Thus we want to
279 # display only one edge between any two nodes.
280
281 return if $self->collapsed;
282
283 print STDERR "Collapsing path edges in graph...\n";
284
285 # Don't list out every witness if we have more than half to list.
286 my $majority = int( scalar( @{$self->tradition->witnesses} ) / 2 ) + 1;
287 foreach my $node( $self->readings ) {
288 my $newlabels = {};
289 # We will visit each node, so we only look ahead.
290 foreach my $edge ( $node->outgoing() ) {
291 add_hash_entry( $newlabels, $edge->to->name, $edge->name );
292 $self->del_path( $edge );
293 }
294
295 foreach my $newdest ( keys %$newlabels ) {
296 my $label;
297 my @compressed_wits = ();
298 if( @{$newlabels->{$newdest}} < $majority ) {
299 $label = join( ', ', @{$newlabels->{$newdest}} );
300 } else {
301 ## TODO FIX THIS HACK
302 my @pclabels;
303 foreach my $wit ( @{$newlabels->{$newdest}} ) {
304 if( $wit =~ /^(.*?)(\s*\(?p\.\s*c\.\)?)$/ ) {
305 push( @pclabels, $wit );
306 } else {
307 push( @compressed_wits, $wit );
308 }
309 }
310 $label = join( ', ', 'majority', @pclabels );
311 }
312
313 my $newedge =
314 $self->add_path( $node, $self->reading( $newdest ), $label );
315 if( @compressed_wits ) {
316 ## TODO fix this hack too.
317 $newedge->set_attribute( 'class',
318 join( '|', @compressed_wits ) );
319 }
320 }
321 }
322
323 $self->collapsed( 1 );
324}
325
326sub expand_graph_edges {
327 my $self = shift;
328 # Our collation graph has only one edge between any two nodes.
329 # This is great for display, but not so great for analysis.
330 # Expand this so that each witness has its own edge between any
331 # two reading nodes.
332 return unless $self->collapsed;
333
334 print STDERR "Expanding path edges in graph...\n";
335
336 foreach my $edge( $self->paths ) {
337 my $from = $edge->from;
338 my $to = $edge->to;
339 my @wits = split( /, /, $edge->label );
340 if( grep { $_ eq 'majority' } @wits ) {
341 push( @wits, split( /\|/, $edge->get_attribute( 'class' ) ) );
342 }
343 $self->del_path( $edge );
344 foreach ( @wits ) {
345 $self->add_path( $from, $to, $_ );
346 }
347 }
348 $self->collapsed( 0 );
349}
350
8e1394aa 351=back
352
de51424a 353=head2 Navigation methods
354
355=over
356
8e1394aa 357=item B<start>
358
359my $beginning = $collation->start();
360
361Returns the beginning of the collation, a meta-reading with label '#START#'.
362
363=cut
364
365sub start {
4a8828f0 366 # Return the beginning reading of the graph.
8e1394aa 367 my $self = shift;
368 my( $new_start ) = @_;
369 if( $new_start ) {
370 $self->del_reading( '#START#' );
371 $self->graph->rename_node( $new_start, '#START#' );
372 }
373 return $self->reading('#START#');
374}
375
e2902068 376=item B<reading_sequence>
377
378my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] );
379
380Returns the ordered list of readings, starting with $first and ending
381with $last, along the given witness path. If no path is specified,
382assume that the path is that of the base text (if any.)
383
384=cut
385
386sub reading_sequence {
387 my( $self, $start, $end, $witness, $backup ) = @_;
388
930ff666 389 $witness = $self->baselabel unless $witness;
e2902068 390 my @readings = ( $start );
391 my %seen;
392 my $n = $start;
930ff666 393 while( $n && $n ne $end ) {
e2902068 394 if( exists( $seen{$n->name()} ) ) {
395 warn "Detected loop at " . $n->name();
396 last;
397 }
398 $seen{$n->name()} = 1;
399
400 my $next = $self->next_reading( $n, $witness, $backup );
401 warn "Did not find any path for $witness from reading " . $n->name
402 unless $next;
403 push( @readings, $next );
404 $n = $next;
405 }
406 # Check that the last reading is our end reading.
407 my $last = $readings[$#readings];
408 warn "Last reading found from " . $start->label() .
409 " for witness $witness is not the end!"
410 unless $last eq $end;
411
412 return @readings;
413}
414
4a8828f0 415=item B<next_reading>
8e1394aa 416
4a8828f0 417my $next_reading = $graph->next_reading( $reading, $witpath );
8e1394aa 418
4a8828f0 419Returns the reading that follows the given reading along the given witness
930ff666 420path.
8e1394aa 421
422=cut
423
4a8828f0 424sub next_reading {
e2902068 425 # Return the successor via the corresponding path.
8e1394aa 426 my $self = shift;
4a8828f0 427 return $self->_find_linked_reading( 'next', @_ );
8e1394aa 428}
429
4a8828f0 430=item B<prior_reading>
8e1394aa 431
4a8828f0 432my $prior_reading = $graph->prior_reading( $reading, $witpath );
8e1394aa 433
4a8828f0 434Returns the reading that precedes the given reading along the given witness
930ff666 435path.
8e1394aa 436
437=cut
438
4a8828f0 439sub prior_reading {
e2902068 440 # Return the predecessor via the corresponding path.
8e1394aa 441 my $self = shift;
4a8828f0 442 return $self->_find_linked_reading( 'prior', @_ );
8e1394aa 443}
444
4a8828f0 445sub _find_linked_reading {
e2902068 446 my( $self, $direction, $node, $path, $alt_path ) = @_;
447 my @linked_paths = $direction eq 'next'
8e1394aa 448 ? $node->outgoing() : $node->incoming();
e2902068 449 return undef unless scalar( @linked_paths );
8e1394aa 450
e2902068 451 # We have to find the linked path that contains all of the
452 # witnesses supplied in $path.
453 my( @path_wits, @alt_path_wits );
454 @path_wits = $self->witnesses_of_label( $path ) if $path;
455 @alt_path_wits = $self->witnesses_of_label( $alt_path ) if $alt_path;
456 my $base_le;
457 my $alt_le;
458 foreach my $le ( @linked_paths ) {
930ff666 459 if( $le->name eq $self->baselabel ) {
e2902068 460 $base_le = $le;
461 } else {
462 my @le_wits = $self->witnesses_of_label( $le->name );
463 if( _is_within( \@path_wits, \@le_wits ) ) {
464 # This is the right path.
465 return $direction eq 'next' ? $le->to() : $le->from();
466 } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) {
467 $alt_le = $le;
468 }
8e1394aa 469 }
470 }
e2902068 471 # Got this far? Return the alternate path if it exists.
472 return $direction eq 'next' ? $alt_le->to() : $alt_le->from()
473 if $alt_le;
474
475 # Got this far? Return the base path if it exists.
476 return $direction eq 'next' ? $base_le->to() : $base_le->from()
477 if $base_le;
478
479 # Got this far? We have no appropriate path.
8e1394aa 480 warn "Could not find $direction node from " . $node->label
e2902068 481 . " along path $path";
8e1394aa 482 return undef;
483}
484
4a8828f0 485# Some set logic.
486sub _is_within {
487 my( $set1, $set2 ) = @_;
7854e12e 488 my $ret = @$set1; # will be 0, i.e. false, if set1 is empty
4a8828f0 489 foreach my $el ( @$set1 ) {
490 $ret = 0 unless grep { /^\Q$el\E$/ } @$set2;
491 }
492 return $ret;
493}
494
de51424a 495
496## INITIALIZATION METHODS - for use by parsers
4a8828f0 497# Walk the paths for each witness in the graph, and return the nodes
e2902068 498# that the graph has in common. If $using_base is true, some
499# different logic is needed.
4a8828f0 500
501sub walk_witness_paths {
502 my( $self, $end ) = @_;
503 # For each witness, walk the path through the graph.
504 # Then we need to find the common nodes.
505 # TODO This method is going to fall down if we have a very gappy
506 # text in the collation.
507 my $paths = {};
3a1f2523 508 my @common_readings;
4a8828f0 509 foreach my $wit ( @{$self->tradition->witnesses} ) {
510 my $curr_reading = $self->start;
e2902068 511 my @wit_path = $self->reading_sequence( $self->start, $end,
512 $wit->sigil );
4a8828f0 513 $wit->path( \@wit_path );
e2902068 514
515 # Detect the common readings.
930ff666 516 @common_readings = _find_common( \@common_readings, \@wit_path );
4a8828f0 517 }
518
519 # Mark all the nodes as either common or not.
3a1f2523 520 foreach my $cn ( @common_readings ) {
e2902068 521 print STDERR "Setting " . $cn->name . " / " . $cn->label
522 . " as common node\n";
4a8828f0 523 $cn->make_common;
524 }
525 foreach my $n ( $self->readings() ) {
526 $n->make_variant unless $n->is_common;
527 }
3a1f2523 528 # Return an array of the common nodes in order.
529 return @common_readings;
4a8828f0 530}
531
930ff666 532sub _find_common {
533 my( $common_readings, $new_path ) = @_;
534 my @cr;
535 if( @$common_readings ) {
536 foreach my $n ( @$new_path ) {
537 push( @cr, $n ) if grep { $_ eq $n } @$common_readings;
538 }
539 } else {
540 push( @cr, @$new_path );
541 }
542 return @cr;
543}
544
545sub _remove_common {
546 my( $common_readings, $divergence ) = @_;
547 my @cr;
548 my %diverged;
549 map { $diverged{$_->name} = 1 } @$divergence;
550 foreach( @$common_readings ) {
551 push( @cr, $_ ) unless $diverged{$_->name};
552 }
553 return @cr;
554}
555
556
e2902068 557# An alternative to walk_witness_paths, for use when a collation is
558# constructed from a base text and an apparatus. Also modifies the
559# collation graph to remove all 'base text' paths and replace them
560# with real witness paths.
561
562sub walk_and_expand_base {
563 my( $self, $end ) = @_;
564
930ff666 565 my @common_readings;
e2902068 566 foreach my $wit ( @{$self->tradition->witnesses} ) {
7854e12e 567 my $sig = $wit->sigil;
1f563ac3 568 $DB::single = 1 if $sig eq 'Vb5';
e2902068 569 my $post_sig;
570 $post_sig = $wit->post_correctione
571 if $wit->has_post_correctione;
930ff666 572
7854e12e 573 my @wit_path = $self->reading_sequence( $self->start, $end, $sig );
574 $wit->path( \@wit_path );
575 $self->connect_readings_for_witness( $wit );
930ff666 576 @common_readings = _find_common( \@common_readings, \@wit_path );
7854e12e 577
578 # If there is a post-correctio, get its path and compare.
579 # Add a correction range for each divergence.
580 if( $post_sig ) {
7854e12e 581 my @corr_wit_path = $self->reading_sequence( $self->start, $end,
582 "$sig$post_sig", $sig );
583
584 # Map ante-corr readings to their indices
585 my %in_orig;
586 my $i = 0;
587 map { $in_orig{$_->name} = $i++ } @wit_path;
588
589 # Look for divergences
590 my $diverged = 0;
591 my $last_common;
592 my @correction;
593 foreach my $rdg ( @corr_wit_path ) {
594 if( exists( $in_orig{$rdg->name} ) && !$diverged ) {
595 # We are reading the same here
596 $last_common = $in_orig{$rdg->name};
7854e12e 597 } elsif ( exists( $in_orig{$rdg->name} ) ) {
598 # We have been diverging but are reading the same again.
599 # Add the correction to the witness.
930ff666 600 my $offset = $last_common + 1;
601 my $length = $in_orig{$rdg->name} - $offset;
602 $wit->add_correction( $offset, $length, @correction );
7854e12e 603 $diverged = 0;
930ff666 604 @common_readings = _remove_common( \@common_readings, \@correction );
7854e12e 605 @correction = ();
606 $last_common = $in_orig{$rdg->name};
607 } elsif( $diverged ) {
608 # We are in the middle of a divergence.
609 push( @correction, $rdg );
610 } else {
611 # We have started to diverge. Note it.
612 $diverged = 1;
613 push( @correction, $rdg );
614 }
e2902068 615 }
930ff666 616 # Add any divergence that is at the end of the text
7854e12e 617 if( $diverged ) {
930ff666 618 $wit->add_correction( $last_common+1, $#wit_path, \@correction );
e2902068 619 }
e2902068 620 }
7854e12e 621 }
e2902068 622
7854e12e 623 # Remove any 'base text' paths.
624 foreach my $path ( $self->paths ) {
625 $self->del_path( $path )
626 if $path->label eq $self->baselabel;
627 }
628}
629
630sub connect_readings_for_witness {
631 my( $self, $wit ) = @_;
632 my @chain = @{$wit->path};
633 foreach my $idx ( 0 .. $#chain-1 ) {
634 $self->add_path( $chain[$idx], $chain[$idx+1], $wit->sigil );
635 }
e2902068 636}
637
4a8828f0 638sub common_readings {
639 my $self = shift;
640 my @common = grep { $_->is_common } $self->readings();
de51424a 641 return sort { _cmp_position( $a->position, $b->position ) } @common;
4a8828f0 642}
643
644# Calculate the relative positions of nodes in the graph, if they
645# were not given to us.
646sub calculate_positions {
3a1f2523 647 my( $self, @ordered_common ) = @_;
4a8828f0 648
649 # We have to calculate the position identifiers for each word,
650 # keyed on the common nodes. This will be 'fun'. The end result
651 # is a hash per witness, whose key is the word node and whose
652 # value is its position in the text. Common nodes are always N,1
653 # so have identical positions in each text.
4a8828f0 654
655 my $node_pos = {};
656 foreach my $wit ( @{$self->tradition->witnesses} ) {
930ff666 657 print STDERR "Calculating positions in " . $wit->sigil . "\n";
658 _update_positions_from_path( $wit->path, @ordered_common );
659 _update_positions_from_path( $wit->corrected_path, @ordered_common )
660 if $wit->has_post_correctione;
661 }
662
663 # DEBUG
664 foreach my $r ( $self->readings() ) {
665 print STDERR "Reading " . $r->name . "/" . $r->label . " has no position\n"
666 unless( $r->has_position );
4a8828f0 667 }
3a1f2523 668
669 $self->init_lemmata();
4a8828f0 670}
671
930ff666 672sub _update_positions_from_path {
673 my( $path, @ordered_common ) = @_;
674
675 # First we walk the given path, making a matrix for the witness
676 # that corresponds to its eventual position identifier. Common
677 # nodes always start a new row, and are thus always in the first
678 # column.
679
680 my $wit_matrix = [];
681 my $cn = 0; # We should hit the common readings in order.
682 my $row = [];
683 foreach my $wn ( @{$path} ) {
684 if( $wn eq $ordered_common[$cn] ) {
685 # Set up to look for the next common node, and
686 # start a new row of words.
687 $cn++;
688 push( @$wit_matrix, $row ) if scalar( @$row );
689 $row = [];
690 }
691 push( @$row, $wn );
692 }
693 push( @$wit_matrix, $row ); # Push the last row onto the matrix
694
695 # Now we have a matrix per witness, so that each row in the
696 # matrix begins with a common node, and continues with all the
697 # variant words that appear in the witness. We turn this into
698 # real positions in row,cell format. But we need some
699 # trickery in order to make sure that each node gets assigned
700 # to only one position.
701
702 foreach my $li ( 1..scalar(@$wit_matrix) ) {
703 foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
704 my $reading = $wit_matrix->[$li-1]->[$di-1];
705 my $position = "$li,$di";
706 # If we have seen this node before, we need to compare
707 # its position with what went before.
708 unless( $reading->has_position &&
709 _cmp_position( $position, $reading->position ) < 1 ) {
710 # The new position ID replaces the old one.
711 $reading->position( $position );
712 } # otherwise, the old position needs to stay.
713 }
714 }
715}
716
4a8828f0 717sub _cmp_position {
718 my( $a, $b ) = @_;
de51424a 719 if ( $a && $b ) {
720 my @pos_a = split(/,/, $a );
721 my @pos_b = split(/,/, $b );
722
723 my $big_cmp = $pos_a[0] <=> $pos_b[0];
724 return $big_cmp if $big_cmp;
725 # else
726 return $pos_a[1] <=> $pos_b[1];
727 } elsif ( $b ) { # a is undefined
728 return -1;
729 } elsif ( $a ) { # b is undefined
730 return 1;
731 }
732 return 0; # they are both undefined
8e1394aa 733}
3a1f2523 734
735sub all_positions {
736 my $self = shift;
737 my %positions = ();
738 map { $positions{$_->position} = 1 } $self->readings;
de51424a 739 my @answer = sort { _cmp_position( $a, $b ) } keys( %positions );
740 return @answer;
3a1f2523 741}
742
743sub readings_at_position {
744 my( $self, $pos ) = @_;
745 my @answer = grep { $_->position eq $pos } $self->readings;
746 return @answer;
747}
748
749## Lemmatizer functions
750
751sub init_lemmata {
752 my $self = shift;
753
754 foreach my $position ( $self->all_positions ) {
755 $self->lemmata->{$position} = undef;
756 }
757
758 foreach my $cr ( $self->common_readings ) {
759 $self->lemmata->{$cr->position} = $cr->name;
760 }
761}
762
763=item B<lemma_readings>
764
765my @state = $graph->lemma_readings( @readings_delemmatized );
766
767Takes a list of readings that have just been delemmatized, and returns
768a set of tuples of the form ['reading', 'state'] that indicates what
769changes need to be made to the graph.
770
771=over
772
773=item *
774
775A state of 1 means 'lemmatize this reading'
776
777=item *
778
779A state of 0 means 'delemmatize this reading'
780
781=item *
782
783A state of undef means 'an ellipsis belongs in the text here because
784no decision has been made / an earlier decision was backed out'
785
786=back
787
788=cut
789
790sub lemma_readings {
791 my( $self, @toggled_off_nodes ) = @_;
792
793 # First get the positions of those nodes which have been
794 # toggled off.
795 my $positions_off = {};
796 map { $positions_off->{ $_->position } = $_->name } @toggled_off_nodes;
de51424a 797
3a1f2523 798 # Now for each position, we have to see if a node is on, and we
799 # have to see if a node has been turned off.
800 my @answer;
801 foreach my $pos ( $self->all_positions() ) {
802 # Find the state of this position. If there is an active node,
803 # its name will be the state; otherwise the state will be 0
804 # (nothing at this position) or undef (ellipsis at this position)
805 my $active = $self->lemmata->{$pos};
806
807 # Is there a formerly active node that was toggled off?
808 if( exists( $positions_off->{$pos} ) ) {
809 my $off_node = $positions_off->{$pos};
810 if( $active && $active ne $off_node) {
811 push( @answer, [ $off_node, 0 ], [ $active, 1 ] );
812 } else {
813 push( @answer, [ $off_node, $active ] );
814 }
815
816 # No formerly active node, so we just see if there is a currently
817 # active one.
818 } elsif( $active ) {
819 # Push the active node, whatever it is.
820 push( @answer, [ $active, 1 ] );
821 } else {
822 # Push the state that is there. Arbitrarily use the first node
823 # at that position.
824 my @pos_nodes = $self->readings_at_position( $pos );
de51424a 825 push( @answer, [ $pos_nodes[0]->name, $self->lemmata->{$pos} ] );
3a1f2523 826 }
827 }
828
829 return @answer;
830}
831
de51424a 832=item B<toggle_reading>
833
834my @readings_delemmatized = $graph->toggle_reading( $reading_name );
835
836Takes a reading node name, and either lemmatizes or de-lemmatizes
837it. Returns a list of all readings that are de-lemmatized as a result
838of the toggle.
839
840=cut
841
842sub toggle_reading {
843 my( $self, $rname ) = @_;
844
845 return unless $rname;
846 my $reading = $self->reading( $rname );
847 if( !$reading || $reading->is_common() ) {
848 # Do nothing, it's a common node.
849 return;
850 }
851
852 my $pos = $reading->position;
853 my $old_state = $self->lemmata->{$pos};
854 my @readings_off;
855 if( $old_state && $old_state eq $rname ) {
856 # Turn off the node. We turn on no others by default.
857 push( @readings_off, $reading );
858 } else {
859 # Turn on the node.
860 $self->lemmata->{$pos} = $rname;
861 # Any other 'on' readings in the same position should be off.
862 push( @readings_off, $self->same_position_as( $reading ) );
863 # Any node that is an identical transposed one should be off.
864 push( @readings_off, $reading->identical_readings );
865 }
866 @readings_off = unique_list( @readings_off );
867
868 # Turn off the readings that need to be turned off.
869 my @readings_delemmatized;
870 foreach my $n ( @readings_off ) {
871 my $state = $self->lemmata->{$n->position};
872 if( $state && $state eq $n->name ) {
873 # this reading is still on, so turn it off
874 push( @readings_delemmatized, $n );
875 my $new_state = undef;
876 if( $n eq $reading ) {
877 # This is the reading that was clicked, so if there are no
878 # other readings there, turn off the position. In all other
879 # cases, restore the ellipsis.
880 my @other_n = $self->same_position_as( $n );
881 $new_state = 0 unless @other_n;
882 }
883 $self->lemmata->{$n->position} = $new_state;
884 } elsif( $old_state && $old_state eq $n->name ) {
885 # another reading has already been turned on here
886 push( @readings_delemmatized, $n );
887 } # else some other reading was on anyway, so pass.
888 }
889 return @readings_delemmatized;
890}
891
892sub same_position_as {
893 my( $self, $reading ) = @_;
894 my $pos = $reading->position;
895 my @same = grep { $_ ne $reading } $self->readings_at_position( $reading->position );
896 return @same;
897}
3a1f2523 898
4a8828f0 899# Return the string that joins together a list of witnesses for
900# display on a single path.
901sub path_label {
902 my $self = shift;
903 return join( $self->wit_list_separator, @_ );
904}
905
906sub witnesses_of_label {
de51424a 907 my( $self, $label ) = @_;
4a8828f0 908 my $regex = $self->wit_list_separator;
de51424a 909 my @answer = split( /\Q$regex\E/, $label );
910 return @answer;
4a8828f0 911}
8e1394aa 912
de51424a 913sub unique_list {
914 my( @list ) = @_;
915 my %h;
916 map { $h{$_->name} = $_ } @list;
917 return values( %h );
918}
919
1f563ac3 920sub add_hash_entry {
921 my( $hash, $key, $entry ) = @_;
922 if( exists $hash->{$key} ) {
923 push( @{$hash->{$key}}, $entry );
924 } else {
925 $hash->{$key} = [ $entry ];
926 }
927}
928
dd3b58b0 929no Moose;
930__PACKAGE__->meta->make_immutable;