Commit | Line | Data |
dd3b58b0 |
1 | package Text::Tradition::Collation; |
d047cd52 |
2 | |
3 | use Graph::Easy; |
8e1394aa |
4 | use IPC::Run qw( run binary ); |
8e1394aa |
5 | use Text::Tradition::Collation::Reading; |
dd3b58b0 |
6 | use Moose; |
7 | |
8 | has 'graph' => ( |
d047cd52 |
9 | is => 'ro', |
10 | isa => 'Graph::Easy', |
11 | handles => { |
8e1394aa |
12 | add_reading => 'add_node', |
13 | del_reading => 'del_node', |
14 | add_path => 'add_edge', |
15 | del_path => 'del_edge', |
16 | reading => 'node', |
17 | path => 'edge', |
18 | readings => 'nodes', |
19 | paths => 'edges', |
d047cd52 |
20 | }, |
21 | default => sub { Graph::Easy->new( undirected => 0 ) }, |
22 | ); |
784877d9 |
23 | |
dd3b58b0 |
24 | |
dd3b58b0 |
25 | has 'tradition' => ( |
8e1394aa |
26 | is => 'rw', |
d047cd52 |
27 | isa => 'Text::Tradition', |
28 | ); |
dd3b58b0 |
29 | |
8e1394aa |
30 | has 'svg' => ( |
31 | is => 'ro', |
32 | isa => 'Str', |
33 | writer => '_save_svg', |
34 | predicate => 'has_svg', |
35 | ); |
36 | |
37 | has 'graphviz' => ( |
38 | is => 'ro', |
39 | isa => 'Str', |
40 | writer => '_save_graphviz', |
41 | predicate => 'has_graphviz', |
42 | ); |
43 | |
44 | has 'graphml' => ( |
45 | is => 'ro', |
46 | isa => 'XML::LibXML::Document', |
47 | writer => '_save_graphml', |
48 | predicate => 'has_graphml', |
49 | ); |
50 | |
3a1f2523 |
51 | # Keeps track of the lemmas within the collation. At most one lemma |
52 | # per position in the graph. |
53 | has 'lemmata' => ( |
54 | is => 'ro', |
55 | isa => 'HashRef[Maybe[Str]]', |
56 | default => sub { {} }, |
57 | ); |
58 | |
4a8828f0 |
59 | has 'wit_list_separator' => ( |
60 | is => 'rw', |
61 | isa => 'Str', |
62 | default => ', ', |
63 | ); |
64 | |
dd3b58b0 |
65 | # The collation can be created two ways: |
66 | # 1. Collate a set of witnesses (with CollateX I guess) and process |
67 | # the results as in 2. |
68 | # 2. Read a pre-prepared collation in one of a variety of formats, |
69 | # and make the graph from that. |
70 | |
71 | # The graph itself will (for now) be immutable, and the positions |
72 | # within the graph will also be immutable. We need to calculate those |
73 | # positions upon graph construction. The equivalences between graph |
74 | # nodes will be mutable, entirely determined by the user (or possibly |
75 | # by some semantic pre-processing provided by the user.) So the |
76 | # constructor should just make an empty equivalences object. The |
77 | # constructor will also need to make the witness objects, if we didn't |
78 | # come through option 1. |
79 | |
d047cd52 |
80 | sub BUILD { |
81 | my( $self, $args ) = @_; |
8e1394aa |
82 | $self->graph->use_class('node', 'Text::Tradition::Collation::Reading'); |
d047cd52 |
83 | |
4a8828f0 |
84 | # Pass through any graph-specific options. |
85 | my $shape = exists( $args->{'shape'} ) ? $args->{'shape'} : 'ellipse'; |
86 | $self->graph->set_attribute( 'node', 'shape', $shape ); |
d047cd52 |
87 | } |
784877d9 |
88 | |
de51424a |
89 | # Wrappes around merge_nodes |
784877d9 |
90 | |
91 | sub merge_readings { |
92 | my $self = shift; |
93 | my $first_node = shift; |
94 | my $second_node = shift; |
95 | $first_node->merge_from( $second_node ); |
96 | unshift( @_, $first_node, $second_node ); |
97 | return $self->graph->merge_nodes( @_ ); |
98 | } |
99 | |
8e1394aa |
100 | =head2 Output method(s) |
101 | |
102 | =over |
103 | |
104 | =item B<as_svg> |
105 | |
106 | print $graph->as_svg( $recalculate ); |
107 | |
108 | Returns an SVG string that represents the graph. Uses GraphViz to do |
4a8828f0 |
109 | this, because Graph::Easy doesn\'t cope well with long graphs. Unless |
8e1394aa |
110 | $recalculate is passed (and is a true value), the method will return a |
111 | cached copy of the SVG after the first call to the method. |
112 | |
113 | =cut |
114 | |
115 | sub as_svg { |
116 | my( $self, $recalc ) = @_; |
117 | return $self->svg if $self->has_svg; |
118 | |
119 | $self->_save_graphviz( $self->graph->as_graphviz() ) |
120 | unless( $self->has_graphviz && !$recalc ); |
121 | |
122 | my @cmd = qw/dot -Tsvg/; |
123 | my( $svg, $err ); |
124 | my $in = $self->graphviz; |
125 | run( \@cmd, \$in, ">", binary(), \$svg ); |
126 | $self->{'svg'} = $svg; |
127 | return $svg; |
128 | } |
129 | |
130 | =item B<as_graphml> |
131 | |
132 | print $graph->as_graphml( $recalculate ) |
133 | |
134 | Returns a GraphML representation of the collation graph, with |
135 | transposition information and position information. Unless |
136 | $recalculate is passed (and is a true value), the method will return a |
137 | cached copy of the SVG after the first call to the method. |
138 | |
139 | =cut |
140 | |
141 | sub as_graphml { |
142 | my( $self, $recalc ) = @_; |
143 | return $self->graphml if $self->has_graphml; |
144 | |
145 | # Some namespaces |
146 | my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns'; |
147 | my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance'; |
148 | my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' . |
149 | 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; |
150 | |
151 | # Create the document and root node |
152 | my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); |
153 | my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); |
154 | $graphml->setDocumentElement( $root ); |
155 | $root->setNamespace( $xsi_ns, 'xsi', 0 ); |
156 | $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); |
157 | |
158 | # Add the data keys for nodes |
159 | my @node_data = ( 'name', 'token', 'identical', 'position' ); |
160 | foreach my $ndi ( 0 .. $#node_data ) { |
161 | my $key = $root->addNewChild( $graphml_ns, 'key' ); |
162 | $key->setAttribute( 'attr.name', $node_data[$ndi] ); |
163 | $key->setAttribute( 'attr.type', 'string' ); |
164 | $key->setAttribute( 'for', 'node' ); |
165 | $key->setAttribute( 'id', 'd'.$ndi ); |
166 | } |
167 | |
168 | # Add the data keys for edges |
169 | my %wit_hash; |
170 | my $wit_ctr = 0; |
171 | foreach my $wit ( $self->getWitnessList ) { |
172 | my $wit_key = 'w' . $wit_ctr++; |
173 | $wit_hash{$wit} = $wit_key; |
174 | my $key = $root->addNewChild( $graphml_ns, 'key' ); |
175 | $key->setAttribute( 'attr.name', $wit ); |
176 | $key->setAttribute( 'attr.type', 'string' ); |
177 | $key->setAttribute( 'for', 'edge' ); |
178 | $key->setAttribute( 'id', $wit_key ); |
179 | } |
180 | |
181 | # Add the graph, its nodes, and its edges |
182 | my $graph = $root->addNewChild( $graphml_ns, 'graph' ); |
183 | $graph->setAttribute( 'edgedefault', 'directed' ); |
184 | $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful |
185 | $graph->setAttribute( 'parse.edgeids', 'canonical' ); |
186 | $graph->setAttribute( 'parse.edges', $self->edges() ); |
187 | $graph->setAttribute( 'parse.nodeids', 'canonical' ); |
188 | $graph->setAttribute( 'parse.nodes', $self->nodes() ); |
189 | $graph->setAttribute( 'parse.order', 'nodesfirst' ); |
190 | |
191 | my $node_ctr = 0; |
192 | my %node_hash; |
193 | foreach my $n ( $self->readings ) { |
194 | my %this_node_data = (); |
195 | foreach my $ndi ( 0 .. $#node_data ) { |
196 | my $value; |
197 | $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name'; |
198 | $this_node_data{'d'.$ndi} = $n->label |
199 | if $node_data[$ndi] eq 'token'; |
200 | $this_node_data{'d'.$ndi} = $n->primary->name if $n->has_primary; |
201 | $this_node_data{'d'.$ndi} = |
202 | $self->{'positions'}->node_position( $n ) |
203 | if $node_data[$ndi] eq 'position'; |
204 | } |
205 | my $node_el = $graph->addNewChild( $graphml_ns, 'node' ); |
206 | my $node_xmlid = 'n' . $node_ctr++; |
207 | $node_hash{ $n->name } = $node_xmlid; |
208 | $node_el->setAttribute( 'id', $node_xmlid ); |
209 | |
210 | foreach my $dk ( keys %this_node_data ) { |
211 | my $d_el = $node_el->addNewChild( $graphml_ns, 'data' ); |
212 | $d_el->setAttribute( 'key', $dk ); |
213 | $d_el->appendTextChild( $this_node_data{$dk} ); |
214 | } |
215 | } |
216 | |
217 | foreach my $e ( $self->edges() ) { |
218 | my( $name, $from, $to ) = ( $e->name, |
219 | $node_hash{ $e->from()->name() }, |
220 | $node_hash{ $e->to()->name() } ); |
221 | my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' ); |
222 | $edge_el->setAttribute( 'source', $from ); |
223 | $edge_el->setAttribute( 'target', $to ); |
224 | $edge_el->setAttribute( 'id', $name ); |
225 | # TODO Got to add the witnesses |
226 | } |
227 | |
228 | # Return the thing |
229 | $self->_save_graphml( $graphml ); |
230 | return $graphml; |
231 | } |
232 | |
233 | =back |
234 | |
de51424a |
235 | =head2 Navigation methods |
236 | |
237 | =over |
238 | |
8e1394aa |
239 | =item B<start> |
240 | |
241 | my $beginning = $collation->start(); |
242 | |
243 | Returns the beginning of the collation, a meta-reading with label '#START#'. |
244 | |
245 | =cut |
246 | |
247 | sub start { |
4a8828f0 |
248 | # Return the beginning reading of the graph. |
8e1394aa |
249 | my $self = shift; |
250 | my( $new_start ) = @_; |
251 | if( $new_start ) { |
252 | $self->del_reading( '#START#' ); |
253 | $self->graph->rename_node( $new_start, '#START#' ); |
254 | } |
255 | return $self->reading('#START#'); |
256 | } |
257 | |
e2902068 |
258 | =item B<reading_sequence> |
259 | |
260 | my @readings = $graph->reading_sequence( $first, $last, $path[, $alt_path] ); |
261 | |
262 | Returns the ordered list of readings, starting with $first and ending |
263 | with $last, along the given witness path. If no path is specified, |
264 | assume that the path is that of the base text (if any.) |
265 | |
266 | =cut |
267 | |
268 | sub reading_sequence { |
269 | my( $self, $start, $end, $witness, $backup ) = @_; |
270 | |
271 | $witness = 'base text' unless $witness; |
272 | my @readings = ( $start ); |
273 | my %seen; |
274 | my $n = $start; |
275 | while( $n ne $end ) { |
276 | if( exists( $seen{$n->name()} ) ) { |
277 | warn "Detected loop at " . $n->name(); |
278 | last; |
279 | } |
280 | $seen{$n->name()} = 1; |
281 | |
282 | my $next = $self->next_reading( $n, $witness, $backup ); |
283 | warn "Did not find any path for $witness from reading " . $n->name |
284 | unless $next; |
285 | push( @readings, $next ); |
286 | $n = $next; |
287 | } |
288 | # Check that the last reading is our end reading. |
289 | my $last = $readings[$#readings]; |
290 | warn "Last reading found from " . $start->label() . |
291 | " for witness $witness is not the end!" |
292 | unless $last eq $end; |
293 | |
294 | return @readings; |
295 | } |
296 | |
4a8828f0 |
297 | =item B<next_reading> |
8e1394aa |
298 | |
4a8828f0 |
299 | my $next_reading = $graph->next_reading( $reading, $witpath ); |
8e1394aa |
300 | |
4a8828f0 |
301 | Returns the reading that follows the given reading along the given witness |
8e1394aa |
302 | path. TODO These are badly named. |
303 | |
304 | =cut |
305 | |
4a8828f0 |
306 | sub next_reading { |
e2902068 |
307 | # Return the successor via the corresponding path. |
8e1394aa |
308 | my $self = shift; |
4a8828f0 |
309 | return $self->_find_linked_reading( 'next', @_ ); |
8e1394aa |
310 | } |
311 | |
4a8828f0 |
312 | =item B<prior_reading> |
8e1394aa |
313 | |
4a8828f0 |
314 | my $prior_reading = $graph->prior_reading( $reading, $witpath ); |
8e1394aa |
315 | |
4a8828f0 |
316 | Returns the reading that precedes the given reading along the given witness |
8e1394aa |
317 | path. TODO These are badly named. |
318 | |
319 | =cut |
320 | |
4a8828f0 |
321 | sub prior_reading { |
e2902068 |
322 | # Return the predecessor via the corresponding path. |
8e1394aa |
323 | my $self = shift; |
4a8828f0 |
324 | return $self->_find_linked_reading( 'prior', @_ ); |
8e1394aa |
325 | } |
326 | |
4a8828f0 |
327 | sub _find_linked_reading { |
e2902068 |
328 | my( $self, $direction, $node, $path, $alt_path ) = @_; |
329 | my @linked_paths = $direction eq 'next' |
8e1394aa |
330 | ? $node->outgoing() : $node->incoming(); |
e2902068 |
331 | return undef unless scalar( @linked_paths ); |
8e1394aa |
332 | |
e2902068 |
333 | # We have to find the linked path that contains all of the |
334 | # witnesses supplied in $path. |
335 | my( @path_wits, @alt_path_wits ); |
336 | @path_wits = $self->witnesses_of_label( $path ) if $path; |
337 | @alt_path_wits = $self->witnesses_of_label( $alt_path ) if $alt_path; |
338 | my $base_le; |
339 | my $alt_le; |
340 | foreach my $le ( @linked_paths ) { |
341 | if( $le->name eq 'base text' ) { |
342 | $base_le = $le; |
343 | } else { |
344 | my @le_wits = $self->witnesses_of_label( $le->name ); |
345 | if( _is_within( \@path_wits, \@le_wits ) ) { |
346 | # This is the right path. |
347 | return $direction eq 'next' ? $le->to() : $le->from(); |
348 | } elsif( _is_within( \@alt_path_wits, \@le_wits ) ) { |
349 | $alt_le = $le; |
350 | } |
8e1394aa |
351 | } |
352 | } |
e2902068 |
353 | # Got this far? Return the alternate path if it exists. |
354 | return $direction eq 'next' ? $alt_le->to() : $alt_le->from() |
355 | if $alt_le; |
356 | |
357 | # Got this far? Return the base path if it exists. |
358 | return $direction eq 'next' ? $base_le->to() : $base_le->from() |
359 | if $base_le; |
360 | |
361 | # Got this far? We have no appropriate path. |
8e1394aa |
362 | warn "Could not find $direction node from " . $node->label |
e2902068 |
363 | . " along path $path"; |
8e1394aa |
364 | return undef; |
365 | } |
366 | |
4a8828f0 |
367 | # Some set logic. |
368 | sub _is_within { |
369 | my( $set1, $set2 ) = @_; |
370 | my $ret = 1; |
371 | foreach my $el ( @$set1 ) { |
372 | $ret = 0 unless grep { /^\Q$el\E$/ } @$set2; |
373 | } |
374 | return $ret; |
375 | } |
376 | |
de51424a |
377 | |
378 | ## INITIALIZATION METHODS - for use by parsers |
4a8828f0 |
379 | # Walk the paths for each witness in the graph, and return the nodes |
e2902068 |
380 | # that the graph has in common. If $using_base is true, some |
381 | # different logic is needed. |
4a8828f0 |
382 | |
383 | sub walk_witness_paths { |
384 | my( $self, $end ) = @_; |
385 | # For each witness, walk the path through the graph. |
386 | # Then we need to find the common nodes. |
387 | # TODO This method is going to fall down if we have a very gappy |
388 | # text in the collation. |
389 | my $paths = {}; |
3a1f2523 |
390 | my @common_readings; |
4a8828f0 |
391 | foreach my $wit ( @{$self->tradition->witnesses} ) { |
392 | my $curr_reading = $self->start; |
e2902068 |
393 | my @wit_path = $self->reading_sequence( $self->start, $end, |
394 | $wit->sigil ); |
4a8828f0 |
395 | $wit->path( \@wit_path ); |
e2902068 |
396 | |
397 | # Detect the common readings. |
3a1f2523 |
398 | if( @common_readings ) { |
4a8828f0 |
399 | my @cn; |
400 | foreach my $n ( @wit_path ) { |
3a1f2523 |
401 | push( @cn, $n ) if grep { $_ eq $n } @common_readings; |
4a8828f0 |
402 | } |
3a1f2523 |
403 | @common_readings = (); |
404 | push( @common_readings, @cn ); |
4a8828f0 |
405 | } else { |
3a1f2523 |
406 | push( @common_readings, @wit_path ); |
4a8828f0 |
407 | } |
408 | } |
409 | |
410 | # Mark all the nodes as either common or not. |
3a1f2523 |
411 | foreach my $cn ( @common_readings ) { |
e2902068 |
412 | print STDERR "Setting " . $cn->name . " / " . $cn->label |
413 | . " as common node\n"; |
4a8828f0 |
414 | $cn->make_common; |
415 | } |
416 | foreach my $n ( $self->readings() ) { |
417 | $n->make_variant unless $n->is_common; |
418 | } |
3a1f2523 |
419 | # Return an array of the common nodes in order. |
420 | return @common_readings; |
4a8828f0 |
421 | } |
422 | |
e2902068 |
423 | # An alternative to walk_witness_paths, for use when a collation is |
424 | # constructed from a base text and an apparatus. Also modifies the |
425 | # collation graph to remove all 'base text' paths and replace them |
426 | # with real witness paths. |
427 | |
428 | sub walk_and_expand_base { |
429 | my( $self, $end ) = @_; |
430 | |
431 | foreach my $wit ( @{$self->tradition->witnesses} ) { |
432 | my $sig = $wit_sigil; |
433 | my $post_sig; |
434 | $post_sig = $wit->post_correctione |
435 | if $wit->has_post_correctione; |
436 | my @wit_path = ( $self->start ); |
437 | my @wit_pc_path; |
438 | my $curr_rdg = $self->start; |
439 | my %seen; |
440 | while( $curr_rdg ne $end ) { |
441 | if( $seen{$curr_reading->name} ) { |
442 | warn "Detected loop in walk_and_expand_base with witness " |
443 | . "$sig on reading " . $curr_reading->name . "\n"; |
444 | last; |
445 | } |
446 | my $next_rdg = $self->next_reading( $curr_reading, $sig ); |
447 | unless( $self->has_explicit_path( $curr_reading, |
448 | $next_reading, $sig ) ) { |
449 | $self->add_path( $curr_reading, $next_reading, $sig ); |
450 | } |
451 | push( @wit_path, $next_reading ); |
452 | $seen{$curr_reading->name} = 1; |
453 | } |
454 | $wit->path( \@wit_path ); |
455 | |
456 | # Now go through this path and look for p.c. divergences. |
457 | # TODO decide how to handle p.c. paths |
458 | # BIG TODO handle case where p.c. follows the base and a.c. doesn't! |
459 | |
460 | |
461 | } |
462 | |
4a8828f0 |
463 | sub common_readings { |
464 | my $self = shift; |
465 | my @common = grep { $_->is_common } $self->readings(); |
de51424a |
466 | return sort { _cmp_position( $a->position, $b->position ) } @common; |
4a8828f0 |
467 | } |
468 | |
469 | # Calculate the relative positions of nodes in the graph, if they |
470 | # were not given to us. |
471 | sub calculate_positions { |
3a1f2523 |
472 | my( $self, @ordered_common ) = @_; |
4a8828f0 |
473 | |
474 | # We have to calculate the position identifiers for each word, |
475 | # keyed on the common nodes. This will be 'fun'. The end result |
476 | # is a hash per witness, whose key is the word node and whose |
477 | # value is its position in the text. Common nodes are always N,1 |
478 | # so have identical positions in each text. |
4a8828f0 |
479 | |
480 | my $node_pos = {}; |
481 | foreach my $wit ( @{$self->tradition->witnesses} ) { |
482 | # First we walk each path, making a matrix for each witness that |
483 | # corresponds to its eventual position identifier. Common nodes |
484 | # always start a new row, and are thus always in the first column. |
485 | |
486 | my $wit_matrix = []; |
487 | my $cn = 0; # We should hit the common readings in order. |
488 | my $row = []; |
489 | foreach my $wn ( @{$wit->path} ) { |
3a1f2523 |
490 | if( $wn eq $ordered_common[$cn] ) { |
4a8828f0 |
491 | # Set up to look for the next common node, and |
492 | # start a new row of words. |
493 | $cn++; |
494 | push( @$wit_matrix, $row ) if scalar( @$row ); |
495 | $row = []; |
496 | } |
497 | push( @$row, $wn ); |
498 | } |
499 | push( @$wit_matrix, $row ); # Push the last row onto the matrix |
500 | |
501 | # Now we have a matrix per witness, so that each row in the |
502 | # matrix begins with a common node, and continues with all the |
503 | # variant words that appear in the witness. We turn this into |
504 | # real positions in row,cell format. But we need some |
505 | # trickery in order to make sure that each node gets assigned |
506 | # to only one position. |
507 | |
508 | foreach my $li ( 1..scalar(@$wit_matrix) ) { |
509 | foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) { |
510 | my $reading = $wit_matrix->[$li-1]->[$di-1]; |
511 | my $position = "$li,$di"; |
512 | # If we have seen this node before, we need to compare |
513 | # its position with what went before. |
514 | unless( $reading->has_position && |
515 | _cmp_position( $position, $reading->position ) < 1 ) { |
516 | # The new position ID replaces the old one. |
517 | $reading->position( $position ); |
518 | } # otherwise, the old position needs to stay. |
519 | } |
520 | } |
521 | } |
3a1f2523 |
522 | |
523 | $self->init_lemmata(); |
4a8828f0 |
524 | } |
525 | |
526 | sub _cmp_position { |
527 | my( $a, $b ) = @_; |
de51424a |
528 | if ( $a && $b ) { |
529 | my @pos_a = split(/,/, $a ); |
530 | my @pos_b = split(/,/, $b ); |
531 | |
532 | my $big_cmp = $pos_a[0] <=> $pos_b[0]; |
533 | return $big_cmp if $big_cmp; |
534 | # else |
535 | return $pos_a[1] <=> $pos_b[1]; |
536 | } elsif ( $b ) { # a is undefined |
537 | return -1; |
538 | } elsif ( $a ) { # b is undefined |
539 | return 1; |
540 | } |
541 | return 0; # they are both undefined |
8e1394aa |
542 | } |
3a1f2523 |
543 | |
544 | sub all_positions { |
545 | my $self = shift; |
546 | my %positions = (); |
547 | map { $positions{$_->position} = 1 } $self->readings; |
de51424a |
548 | my @answer = sort { _cmp_position( $a, $b ) } keys( %positions ); |
549 | return @answer; |
3a1f2523 |
550 | } |
551 | |
552 | sub readings_at_position { |
553 | my( $self, $pos ) = @_; |
554 | my @answer = grep { $_->position eq $pos } $self->readings; |
555 | return @answer; |
556 | } |
557 | |
558 | ## Lemmatizer functions |
559 | |
560 | sub init_lemmata { |
561 | my $self = shift; |
562 | |
563 | foreach my $position ( $self->all_positions ) { |
564 | $self->lemmata->{$position} = undef; |
565 | } |
566 | |
567 | foreach my $cr ( $self->common_readings ) { |
568 | $self->lemmata->{$cr->position} = $cr->name; |
569 | } |
570 | } |
571 | |
572 | =item B<lemma_readings> |
573 | |
574 | my @state = $graph->lemma_readings( @readings_delemmatized ); |
575 | |
576 | Takes a list of readings that have just been delemmatized, and returns |
577 | a set of tuples of the form ['reading', 'state'] that indicates what |
578 | changes need to be made to the graph. |
579 | |
580 | =over |
581 | |
582 | =item * |
583 | |
584 | A state of 1 means 'lemmatize this reading' |
585 | |
586 | =item * |
587 | |
588 | A state of 0 means 'delemmatize this reading' |
589 | |
590 | =item * |
591 | |
592 | A state of undef means 'an ellipsis belongs in the text here because |
593 | no decision has been made / an earlier decision was backed out' |
594 | |
595 | =back |
596 | |
597 | =cut |
598 | |
599 | sub lemma_readings { |
600 | my( $self, @toggled_off_nodes ) = @_; |
601 | |
602 | # First get the positions of those nodes which have been |
603 | # toggled off. |
604 | my $positions_off = {}; |
605 | map { $positions_off->{ $_->position } = $_->name } @toggled_off_nodes; |
de51424a |
606 | |
3a1f2523 |
607 | # Now for each position, we have to see if a node is on, and we |
608 | # have to see if a node has been turned off. |
609 | my @answer; |
610 | foreach my $pos ( $self->all_positions() ) { |
611 | # Find the state of this position. If there is an active node, |
612 | # its name will be the state; otherwise the state will be 0 |
613 | # (nothing at this position) or undef (ellipsis at this position) |
614 | my $active = $self->lemmata->{$pos}; |
615 | |
616 | # Is there a formerly active node that was toggled off? |
617 | if( exists( $positions_off->{$pos} ) ) { |
618 | my $off_node = $positions_off->{$pos}; |
619 | if( $active && $active ne $off_node) { |
620 | push( @answer, [ $off_node, 0 ], [ $active, 1 ] ); |
621 | } else { |
622 | push( @answer, [ $off_node, $active ] ); |
623 | } |
624 | |
625 | # No formerly active node, so we just see if there is a currently |
626 | # active one. |
627 | } elsif( $active ) { |
628 | # Push the active node, whatever it is. |
629 | push( @answer, [ $active, 1 ] ); |
630 | } else { |
631 | # Push the state that is there. Arbitrarily use the first node |
632 | # at that position. |
633 | my @pos_nodes = $self->readings_at_position( $pos ); |
de51424a |
634 | push( @answer, [ $pos_nodes[0]->name, $self->lemmata->{$pos} ] ); |
3a1f2523 |
635 | } |
636 | } |
637 | |
638 | return @answer; |
639 | } |
640 | |
de51424a |
641 | =item B<toggle_reading> |
642 | |
643 | my @readings_delemmatized = $graph->toggle_reading( $reading_name ); |
644 | |
645 | Takes a reading node name, and either lemmatizes or de-lemmatizes |
646 | it. Returns a list of all readings that are de-lemmatized as a result |
647 | of the toggle. |
648 | |
649 | =cut |
650 | |
651 | sub toggle_reading { |
652 | my( $self, $rname ) = @_; |
653 | |
654 | return unless $rname; |
655 | my $reading = $self->reading( $rname ); |
656 | if( !$reading || $reading->is_common() ) { |
657 | # Do nothing, it's a common node. |
658 | return; |
659 | } |
660 | |
661 | my $pos = $reading->position; |
662 | my $old_state = $self->lemmata->{$pos}; |
663 | my @readings_off; |
664 | if( $old_state && $old_state eq $rname ) { |
665 | # Turn off the node. We turn on no others by default. |
666 | push( @readings_off, $reading ); |
667 | } else { |
668 | # Turn on the node. |
669 | $self->lemmata->{$pos} = $rname; |
670 | # Any other 'on' readings in the same position should be off. |
671 | push( @readings_off, $self->same_position_as( $reading ) ); |
672 | # Any node that is an identical transposed one should be off. |
673 | push( @readings_off, $reading->identical_readings ); |
674 | } |
675 | @readings_off = unique_list( @readings_off ); |
676 | |
677 | # Turn off the readings that need to be turned off. |
678 | my @readings_delemmatized; |
679 | foreach my $n ( @readings_off ) { |
680 | my $state = $self->lemmata->{$n->position}; |
681 | if( $state && $state eq $n->name ) { |
682 | # this reading is still on, so turn it off |
683 | push( @readings_delemmatized, $n ); |
684 | my $new_state = undef; |
685 | if( $n eq $reading ) { |
686 | # This is the reading that was clicked, so if there are no |
687 | # other readings there, turn off the position. In all other |
688 | # cases, restore the ellipsis. |
689 | my @other_n = $self->same_position_as( $n ); |
690 | $new_state = 0 unless @other_n; |
691 | } |
692 | $self->lemmata->{$n->position} = $new_state; |
693 | } elsif( $old_state && $old_state eq $n->name ) { |
694 | # another reading has already been turned on here |
695 | push( @readings_delemmatized, $n ); |
696 | } # else some other reading was on anyway, so pass. |
697 | } |
698 | return @readings_delemmatized; |
699 | } |
700 | |
701 | sub same_position_as { |
702 | my( $self, $reading ) = @_; |
703 | my $pos = $reading->position; |
704 | my @same = grep { $_ ne $reading } $self->readings_at_position( $reading->position ); |
705 | return @same; |
706 | } |
3a1f2523 |
707 | |
4a8828f0 |
708 | # Return the string that joins together a list of witnesses for |
709 | # display on a single path. |
710 | sub path_label { |
711 | my $self = shift; |
712 | return join( $self->wit_list_separator, @_ ); |
713 | } |
714 | |
715 | sub witnesses_of_label { |
de51424a |
716 | my( $self, $label ) = @_; |
4a8828f0 |
717 | my $regex = $self->wit_list_separator; |
de51424a |
718 | my @answer = split( /\Q$regex\E/, $label ); |
719 | return @answer; |
4a8828f0 |
720 | } |
8e1394aa |
721 | |
de51424a |
722 | sub unique_list { |
723 | my( @list ) = @_; |
724 | my %h; |
725 | map { $h{$_->name} = $_ } @list; |
726 | return values( %h ); |
727 | } |
728 | |
dd3b58b0 |
729 | no Moose; |
730 | __PACKAGE__->meta->make_immutable; |