Commit | Line | Data |
e58153d6 |
1 | package Text::Tradition::Graph; |
b49c4318 |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Graph::Easy; |
6 | use IPC::Run qw( run binary ); |
7 | use Module::Load; |
e58153d6 |
8 | use Text::Tradition::Graph::Position; |
a25d4374 |
9 | |
10 | =head1 NAME |
11 | |
e58153d6 |
12 | Text::Tradition::Graph |
a25d4374 |
13 | |
14 | =head1 SYNOPSIS |
15 | |
2ceca8c3 |
16 | use Text::Tradition::Graph; |
a25d4374 |
17 | |
2ceca8c3 |
18 | my $text = Text::Tradition::Graph->new( 'GraphML' => '/my/graphml/file.xml' ); |
19 | my $text = Text::Tradition::Graph->new( 'TEI' => '/my/tei/file.xml' ); |
20 | my $text = Text::Tradition::Graph->new( 'CSV' => '/my/csv/file.csv', |
21 | 'base' => '/my/basefile.txt' ); |
22 | my $text = Text::Tradition::Graph->new( 'CTE' => '/my/cte/file.txt', |
23 | 'base' => '/my/basefile.txt' ); |
a25d4374 |
24 | |
2ceca8c3 |
25 | my $svg_string = $text->as_svg(); |
a25d4374 |
26 | |
2ceca8c3 |
27 | my $lemma_nodes = $text->active_nodes(); |
28 | $text->toggle_node( 'some_word' ); |
a25d4374 |
29 | |
30 | =head1 DESCRIPTION |
31 | |
32 | A text tradition is the representation of our knowledge of a text that |
33 | has been passed down via manuscript copies from a time before printing |
34 | presses. Each text has a number of witnesses, that is, manuscripts |
35 | that bear a version of the text. The tradition is the aggregation of |
36 | these witnesses, which is to say, the collation of the text. |
37 | |
38 | This module takes a text collation and represents it as a horizontal |
39 | directed graph, suitable for SVG rendering and for analysis of various |
40 | forms. Since this module was written by a medievalist, it also |
41 | provides a facility for making a critical text reconstruction by |
42 | choosing certain variants to be 'lemma' text - that is, text which |
43 | should be considered the 'standard' reading. |
44 | |
45 | Although the graph is a very good way to render text collation, and is |
46 | visually very easy for a human to interpret, it doesn't have any |
47 | inherent information about which nodes 'go together' - that is, which |
48 | text readings appear in the same place as other readings. This module |
49 | therefore calculates 'positions' on the graph, thus holding some |
50 | information about which readings can and can't be substituted for |
51 | others. |
52 | |
53 | =head1 METHODS |
54 | |
55 | =over 4 |
56 | |
57 | =item B<new> |
58 | |
59 | Constructor. Takes a source collation file from which to construct |
60 | the initial graph. This file can be TEI (parallel segmentation) XML, |
2ceca8c3 |
61 | CSV in a format yet to be documented, GraphML as documented by the |
62 | CollateX tool (L<http://gregor.middell.net/collatex/>), or a Classical |
63 | Text Editor apparatus. For CSV and Classical Text Editor files, the |
64 | user must also supply a base text to which the line numbering in the |
65 | collation file refers. |
66 | |
67 | 20/04/2011 Currently only CSV and GraphML are really supported. |
a25d4374 |
68 | |
69 | =cut |
b49c4318 |
70 | |
71 | sub new { |
72 | my $proto = shift; |
73 | my $class = ref( $proto ) || $proto; |
74 | my %opts = ( 'on_color' => 'yellow', |
75 | 'off_color' => 'white', |
76 | @_ ); |
77 | my $self = {}; |
78 | |
79 | # opts can be: GraphML, base+CSV, base+CTE, TEI. We need |
80 | # something to parse. |
81 | my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %opts ); |
82 | my $format = shift( @formats ); |
83 | unless( $format ) { |
52ce987f |
84 | warn "No data given to create a graph; will initialize an empty one"; |
b49c4318 |
85 | } |
86 | if( $format =~ /^(CSV|CTE)$/ && !exists $opts{'base'} ) { |
87 | warn "Cannot make a graph from $format without a base text"; |
88 | return; |
89 | } |
90 | |
91 | # Make a graph object. |
92 | my $collation_graph = Graph::Easy->new(); |
93 | $collation_graph->set_attribute( 'node', 'shape', 'ellipse' ); |
94 | # Starting point for all texts |
95 | my $last_node = $collation_graph->add_node( '#START#' ); |
96 | |
97 | $self->{'graph'} = $collation_graph; |
98 | bless( $self, $class ); |
99 | |
100 | # Now do the parsing. |
52ce987f |
101 | if( $format ) { |
102 | my @args; |
103 | if( $format =~ /^(CSV|CTE)$/ ) { |
104 | @args = ( 'base' => $opts{'base'}, |
105 | 'data' => $opts{$format}, |
106 | 'format' => $format ); |
107 | $format = 'BaseText'; |
108 | } else { |
109 | @args = ( $opts{ $format } ); |
110 | } |
111 | my $mod = "Text::Tradition::Parser::$format"; |
112 | load( $mod ); |
113 | $mod->can('parse')->( $self, @args ); |
b49c4318 |
114 | } |
b49c4318 |
115 | return $self; |
116 | } |
117 | |
2ceca8c3 |
118 | =item B<make_positions> |
119 | |
120 | $graph->make_positions( $common_nodes, $paths ) |
121 | |
122 | Create an associated Graph::Positions object that records the position |
123 | of each node in the graph. This method call is probably in the wrong |
124 | place and will move. |
125 | |
126 | =cut |
127 | |
128 | sub make_positions { |
129 | my( $self, $common_nodes, $paths ) = @_; |
130 | my $positions = Text::Tradition::Graph::Position->new( $common_nodes, $paths ); |
131 | $self->{'positions'} = $positions; |
132 | } |
133 | |
134 | =back |
135 | |
136 | =head2 Graph::Easy object accessor methods |
137 | |
138 | See the Graph::Easy documentation for descriptions of these functions. |
139 | |
140 | =over |
141 | |
142 | =item B<node> |
143 | |
144 | =cut |
b49c4318 |
145 | |
b49c4318 |
146 | sub node { |
147 | my $self = shift; |
148 | return $self->{'graph'}->node( @_ ); |
149 | } |
150 | |
2ceca8c3 |
151 | =item B<edge> |
152 | |
153 | =cut |
154 | |
b49c4318 |
155 | sub edge { |
156 | my $self = shift; |
157 | return $self->{'graph'}->edge( @_ ); |
158 | } |
159 | |
2ceca8c3 |
160 | =item B<add_node> |
161 | |
162 | =cut |
163 | |
c2d16875 |
164 | # Not only adds the node, but also initializes internal data |
165 | # about the node. |
2ceca8c3 |
166 | |
b49c4318 |
167 | sub add_node { |
168 | my $self = shift; |
c2d16875 |
169 | my $node = $self->{'graph'}->add_node( @_ ); |
170 | $self->{'identical_nodes'}->{ $node->name() } = [ $node->name() ]; |
171 | return $node; |
b49c4318 |
172 | } |
173 | |
2ceca8c3 |
174 | =item B<add_edge> |
175 | |
176 | =cut |
177 | |
b49c4318 |
178 | sub add_edge { |
179 | my $self = shift; |
180 | return $self->{'graph'}->add_edge( @_ ); |
181 | } |
182 | |
2ceca8c3 |
183 | =item B<del_node> |
184 | |
185 | =cut |
186 | |
b49c4318 |
187 | sub del_node { |
188 | my $self = shift; |
c2d16875 |
189 | my $node = $_[0]; |
190 | |
191 | # Delete this node out of any relevant transposition pool. |
192 | if( ref $node eq 'Graph::Easy::Node' ) { |
193 | $node = $node->name(); |
194 | } |
195 | my @ident = $self->identical_nodes( $node ); |
196 | if( @ident ) { |
197 | # Get the pool. |
198 | my $pool = $self->{'identical_nodes'}->{ $ident[0] }; |
199 | foreach my $i ( 0 .. scalar(@$pool)-1 ) { |
200 | if( $pool->[$i] eq $node ) { |
201 | splice( @$pool, $i, 1 ); |
202 | last; |
203 | } |
204 | } |
205 | } |
206 | delete $self->{'identical_nodes'}->{ $node }; |
207 | |
208 | # Now delete the node. |
b49c4318 |
209 | return $self->{'graph'}->del_node( @_ ); |
210 | } |
211 | |
2ceca8c3 |
212 | =item B<del_edge> |
213 | |
214 | =cut |
215 | |
b49c4318 |
216 | sub del_edge { |
217 | my $self = shift; |
218 | return $self->{'graph'}->del_edge( @_ ); |
219 | } |
220 | |
2ceca8c3 |
221 | =item B<nodes> |
222 | |
223 | =cut |
224 | |
b49c4318 |
225 | sub nodes { |
226 | my $self = shift; |
227 | return $self->{'graph'}->nodes( @_ ); |
228 | } |
229 | |
2ceca8c3 |
230 | =item B<edges> |
231 | |
232 | =cut |
233 | |
b49c4318 |
234 | sub edges { |
235 | my $self = shift; |
236 | return $self->{'graph'}->edges( @_ ); |
237 | } |
238 | |
2ceca8c3 |
239 | =item B<merge_nodes> |
240 | |
241 | =cut |
242 | |
b49c4318 |
243 | sub merge_nodes { |
244 | my $self = shift; |
245 | return $self->{'graph'}->merge_nodes( @_ ); |
246 | } |
247 | |
248 | ### Helper methods for navigating the tree |
249 | |
2ceca8c3 |
250 | =back |
251 | |
252 | =head2 Graph navigation methods |
253 | |
254 | =over |
255 | |
256 | =item B<start> |
257 | |
258 | my $node = $graph->start(); |
259 | |
260 | Returns the beginning node of the graph. |
261 | |
262 | =cut |
263 | |
b49c4318 |
264 | sub start { |
265 | # Return the beginning node of the graph. |
266 | my $self = shift; |
267 | my( $new_start ) = @_; |
268 | if( $new_start ) { |
c2d16875 |
269 | # Fix the node transposition data |
270 | delete $self->{'identical_nodes'}->{ $new_start->name() }; |
271 | $self->{'identical_nodes'}->{'#START#'} = [ '#START#' ]; |
b49c4318 |
272 | $self->{'graph'}->rename_node( $new_start, '#START#' ); |
273 | } |
274 | return $self->{'graph'}->node('#START#'); |
275 | } |
276 | |
2ceca8c3 |
277 | =item B<next_word> |
c2d16875 |
278 | |
2ceca8c3 |
279 | my $next_node = $graph->next_word( $node, $path ); |
c2d16875 |
280 | |
2ceca8c3 |
281 | Returns the node that follows the given node along the given witness |
282 | path. TODO These are badly named. |
283 | |
284 | =cut |
b49c4318 |
285 | |
286 | sub next_word { |
287 | # Return the successor via the corresponding edge. |
e49731d7 |
288 | my $self = shift; |
289 | return $self->_find_linked_word( 'next', @_ ); |
b49c4318 |
290 | } |
291 | |
2ceca8c3 |
292 | =item B<prior_word> |
293 | |
294 | my $prior_node = $graph->prior_word( $node, $path ); |
295 | |
296 | Returns the node that precedes the given node along the given witness |
297 | path. TODO These are badly named. |
298 | |
299 | =cut |
300 | |
b49c4318 |
301 | sub prior_word { |
302 | # Return the predecessor via the corresponding edge. |
e49731d7 |
303 | my $self = shift; |
304 | return $self->_find_linked_word( 'prior', @_ ); |
305 | } |
306 | |
307 | sub _find_linked_word { |
308 | my( $self, $direction, $node, $edge ) = @_; |
309 | $edge = 'base text' unless $edge; |
310 | my @linked_edges = $direction eq 'next' |
311 | ? $node->outgoing() : $node->incoming(); |
312 | return undef unless scalar( @linked_edges ); |
b49c4318 |
313 | |
e49731d7 |
314 | # We have to find the linked edge that contains all of the |
315 | # witnesses supplied in $edge. |
316 | my @edge_wits = split( /, /, $edge ); |
317 | foreach my $le ( @linked_edges ) { |
318 | my @le_wits = split( /, /, $le->name() ); |
319 | if( _is_within( \@edge_wits, \@le_wits ) ) { |
320 | # This is the right edge. |
321 | return $direction eq 'next' ? $le->to() : $le->from(); |
322 | } |
b49c4318 |
323 | } |
e49731d7 |
324 | warn "Could not find $direction node from " . $node->label |
325 | . " along edge $edge"; |
b49c4318 |
326 | return undef; |
327 | } |
328 | |
e49731d7 |
329 | # Some set logic. |
330 | sub _is_within { |
331 | my( $set1, $set2 ) = @_; |
332 | my $ret = 1; |
333 | foreach my $el ( @$set1 ) { |
334 | $ret = 0 unless grep { /^\Q$el\E$/ } @$set2; |
335 | } |
336 | return $ret; |
337 | } |
338 | |
2ceca8c3 |
339 | =item B<node_sequence> |
340 | |
341 | my @nodes = $graph->node_sequence( $first, $last, $path ); |
342 | |
343 | Returns the ordered list of nodes, starting with $first and ending |
344 | with $last, along the given witness path. |
345 | |
346 | =cut |
347 | |
b49c4318 |
348 | sub node_sequence { |
52ce987f |
349 | my( $self, $start, $end, $witness, $backup ) = @_; |
b49c4318 |
350 | unless( ref( $start ) eq 'Graph::Easy::Node' |
351 | && ref( $end ) eq 'Graph::Easy::Node' ) { |
352 | warn "Called node_sequence without two nodes!"; |
353 | return (); |
354 | } |
52ce987f |
355 | $witness = 'base text' unless $witness; |
b49c4318 |
356 | my @nodes = ( $start ); |
357 | my %seen; |
358 | my $n = $start; |
359 | while( $n ne $end ) { |
360 | if( exists( $seen{$n->name()} ) ) { |
361 | warn "Detected loop at " . $n->name(); |
362 | last; |
363 | } |
364 | $seen{$n->name()} = 1; |
365 | |
366 | my @edges = $n->outgoing(); |
52ce987f |
367 | my @relevant_edges = grep { my @w = split( /, /, $_->label ); |
368 | grep { /^\Q$witness\E$/ } @w } @edges; |
369 | unless( @relevant_edges ) { |
370 | @relevant_edges = grep { my @w = split( /, /, $_->label ); |
371 | grep { /^\Q$backup\E$/ } @w } @edges |
372 | if $backup; |
373 | } |
374 | unless( @relevant_edges ) { |
375 | @relevant_edges = grep { $_->label() eq 'base text' } @edges; |
376 | } |
377 | |
378 | warn "Did not find an edge for $witness from node " . $n->label |
b49c4318 |
379 | unless scalar @relevant_edges; |
b49c4318 |
380 | my $next = $relevant_edges[0]->to(); |
381 | push( @nodes, $next ); |
382 | $n = $next; |
383 | } |
384 | # Check that the last node is our end node. |
385 | my $last = $nodes[$#nodes]; |
386 | warn "Last node found from " . $start->label() . |
52ce987f |
387 | " for witness $witness is not the end!" |
b49c4318 |
388 | unless $last eq $end; |
389 | |
390 | return @nodes; |
391 | } |
392 | |
2ceca8c3 |
393 | =item B<string_lemma> |
394 | |
395 | my $text = $graph->string_lemma( $first, $last, $path ); |
396 | |
397 | Returns the whitespace-separated text, starting with $first and ending |
398 | with $last, represented in the graph along the given path. |
399 | |
400 | =cut |
401 | |
b49c4318 |
402 | sub string_lemma { |
403 | my( $self, $start, $end, $label ) = @_; |
404 | |
405 | my @nodes = $self->node_sequence( $start, $end, $label ); |
406 | my @words = map { $_->label() } @nodes; |
407 | return join( ' ', @words ); |
408 | } |
409 | |
2ceca8c3 |
410 | =back |
411 | |
412 | =head2 Transposition handling methods |
413 | |
414 | These should really move to their own module. For use when the graph |
415 | has split transposed nodes in order to avoid edges that travel |
416 | backward. |
417 | |
418 | =over |
419 | |
420 | =item B<set_identical_node> |
421 | |
422 | $graph->set_identical_node( $node, $other_node ) |
423 | |
424 | Tell the graph that these two nodes contain the same (transposed) reading. |
425 | |
426 | =cut |
427 | |
428 | sub set_identical_node { |
c557b209 |
429 | my( $self, $node, $main_node ) = @_; |
430 | |
431 | # The identical_nodes hash contains a key per node, and a value |
432 | # that is an arrayref to a list of nodes. Those nodes that are |
433 | # the same (transposed) node should be keys that point to the same |
434 | # arrayref. Each arrayref should contain the name of each node |
435 | # that points to it. So basically here we want to merge the |
436 | # arrays for the two nodes that are now identical. The 'main' |
437 | # node should always be first in the array. |
438 | |
2ceca8c3 |
439 | my $pool = $self->{'identical_nodes'}->{ $node }; |
c557b209 |
440 | my $main_pool = $self->{'identical_nodes'}->{ $main_node }; |
441 | |
2ceca8c3 |
442 | my %poolhash; |
c557b209 |
443 | foreach ( @$main_pool ) { |
444 | # Note which nodes are already in the main pool so that we |
445 | # don't re-add them. |
2ceca8c3 |
446 | $poolhash{$_} = 1; |
447 | } |
2ceca8c3 |
448 | |
c557b209 |
449 | foreach( @$pool ) { |
450 | # Add the remaining nodes to the main pool... |
451 | push( @$main_pool, $_ ) unless $poolhash{$_}; |
452 | } |
453 | # ...and set this node to point to the enlarged pool. |
454 | $self->{'identical_nodes'}->{ $node } = $main_pool; |
2ceca8c3 |
455 | } |
456 | |
c557b209 |
457 | =item B<identical_nodes> |
2ceca8c3 |
458 | |
459 | my @nodes = $graph->identical_nodes( $node ) |
460 | |
461 | Get a list of nodes that contain the same (transposed) reading as the |
462 | given node. |
463 | |
464 | =cut |
465 | |
466 | sub identical_nodes { |
467 | my( $self, $node ) = @_; |
468 | my @others = grep { $_ !~ /^$node$/ } |
469 | @{$self->{'identical_nodes'}->{ $node }}; |
470 | return @others; |
471 | } |
472 | |
473 | =back |
474 | |
475 | =head2 Output method(s) |
476 | |
477 | =over |
478 | |
479 | =item B<as_svg> |
480 | |
481 | print $graph->as_svg( $recalculate ); |
482 | |
483 | Returns an SVG string that represents the graph. Uses GraphViz to do |
484 | this, because Graph::Easy doesn't cope well with long graphs. Unless |
485 | $recalculate is passed (and is a true value), the method will return a |
486 | cached copy of the SVG after the first call to the method. |
487 | |
488 | =cut |
b49c4318 |
489 | |
490 | sub as_svg { |
491 | my( $self, $recalc ) = @_; |
492 | return $self->{'svg'} if( exists $self->{'svg'} && !$recalc ); |
493 | |
494 | $self->{'graphviz'} = $self->{'graph'}->as_graphviz() |
495 | unless( exists $self->{'graphviz'} && !$recalc ); |
496 | |
497 | my @cmd = qw/dot -Tsvg/; |
498 | my( $svg, $err ); |
499 | my $in = $self->{'graphviz'}; |
500 | run( \@cmd, \$in, ">", binary(), \$svg ); |
501 | $self->{'svg'} = $svg; |
502 | return $svg; |
503 | } |
504 | |
c557b209 |
505 | =item B<as_graphml> |
506 | |
507 | print $graph->as_graphml( $recalculate ) |
508 | |
509 | Returns a GraphML representation of the collation graph, with |
510 | transposition information and position information. Unless |
511 | $recalculate is passed (and is a true value), the method will return a |
512 | cached copy of the SVG after the first call to the method. |
513 | |
514 | =cut |
515 | |
516 | sub as_graphml { |
517 | my( $self, $recalc ) = @_; |
518 | return $self->{'graphml'} if( exists $self->{'graphml'} && !$recalc ); |
519 | |
520 | # Some namespaces |
521 | my $graphml_ns = 'http://graphml.graphdrawing.org/xmlns'; |
522 | my $xsi_ns = 'http://www.w3.org/2001/XMLSchema-instance'; |
523 | my $graphml_schema = 'http://graphml.graphdrawing.org/xmlns ' . |
524 | 'http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd'; |
525 | |
526 | # Create the document and root node |
527 | my $graphml = XML::LibXML->createDocument( "1.0", "UTF-8" ); |
528 | my $root = $graphml->createElementNS( $graphml_ns, 'graphml' ); |
529 | $graphml->setDocumentElement( $root ); |
530 | $root->setNamespace( $xsi_ns, 'xsi', 0 ); |
531 | $root->setAttributeNS( $xsi_ns, 'schemaLocation', $graphml_schema ); |
532 | |
533 | # Add the data keys for nodes |
534 | my @node_data = ( 'name', 'token', 'identical', 'position' ); |
535 | foreach my $ndi ( 0 .. $#node_data ) { |
536 | my $key = $root->addNewChild( $graphml_ns, 'key' ); |
537 | $key->setAttribute( 'attr.name', $node_data[$ndi] ); |
538 | $key->setAttribute( 'attr.type', 'string' ); |
539 | $key->setAttribute( 'for', 'node' ); |
540 | $key->setAttribute( 'id', 'd'.$ndi ); |
541 | } |
542 | |
543 | # Add the data keys for edges |
544 | my %wit_hash; |
545 | my $wit_ctr = 0; |
546 | foreach my $wit ( $self->getWitnessList ) { |
547 | my $wit_key = 'w' . $wit_ctr++; |
548 | $wit_hash{$wit} = $wit_key; |
549 | my $key = $root->addNewChild( $graphml_ns, 'key' ); |
550 | $key->setAttribute( 'attr.name', $wit ); |
551 | $key->setAttribute( 'attr.type', 'string' ); |
552 | $key->setAttribute( 'for', 'edge' ); |
553 | $key->setAttribute( 'id', $wit_key ); |
554 | } |
555 | |
556 | # Add the graph, its nodes, and its edges |
557 | my $graph = $root->addNewChild( $graphml_ns, 'graph' ); |
558 | $graph->setAttribute( 'edgedefault', 'directed' ); |
559 | $graph->setAttribute( 'id', 'g0' ); # TODO make this meaningful |
560 | $graph->setAttribute( 'parse.edgeids', 'canonical' ); |
561 | $graph->setAttribute( 'parse.edges', $self->edges() ); |
562 | $graph->setAttribute( 'parse.nodeids', 'canonical' ); |
563 | $graph->setAttribute( 'parse.nodes', $self->nodes() ); |
564 | $graph->setAttribute( 'parse.order', 'nodesfirst' ); |
565 | |
566 | my $node_ctr = 0; |
567 | my %node_hash; |
568 | foreach my $n ( $self->nodes() ) { |
569 | my %this_node_data = (); |
570 | foreach my $ndi ( 0 .. $#node_data ) { |
571 | my $value; |
572 | $this_node_data{'d'.$ndi} = $n->name if $node_data[$ndi] eq 'name'; |
573 | $this_node_data{'d'.$ndi} = $n->label |
574 | if $node_data[$ndi] eq 'token'; |
575 | $this_node_data{'d'.$ndi} = $self->primary_node( $n ) |
576 | if $node_data[$ndi] eq 'name'; |
577 | $this_node_data{'d'.$ndi} = |
578 | $self->{'positions'}->node_position( $n ) |
579 | if $node_data[$ndi] eq 'position'; |
580 | } |
581 | my $node_el = $graph->addNewChild( $graphml_ns, 'node' ); |
582 | my $node_xmlid = 'n' . $node_ctr++; |
583 | $node_hash{ $n->name } = $node_xmlid; |
584 | $node_el->setAttribute( 'id', $node_xmlid ); |
585 | |
586 | foreach my $dk ( keys %this_node_data ) { |
587 | my $d_el = $node_el->addNewChild( $graphml_ns, 'data' ); |
588 | $d_el->setAttribute( 'key', $dk ); |
589 | $d_el->appendTextChild( $this_node_data{$dk} ); |
590 | } |
591 | } |
592 | |
593 | foreach my $e ( $self->edges() ) { |
594 | my( $name, $from, $to ) = ( $e->name, |
595 | $node_hash{ $e->from()->name() }, |
596 | $node_hash{ $e->to()->name() } ); |
597 | my $edge_el = $graph->addNewChild( $graphml_ns, 'edge' ); |
598 | $edge_el->setAttribute( 'source', $from ); |
599 | $edge_el->setAttribute( 'target', $to ); |
600 | $edge_el->setAttribute( 'id', $name ); |
601 | # TODO Got to add the witnesses |
602 | } |
603 | |
604 | # Return the thing |
605 | $self->{'graphml'} = $graphml; |
606 | return $graphml; |
607 | } |
608 | |
2ceca8c3 |
609 | =back |
610 | |
611 | =head2 Lemmatization methods |
612 | |
613 | =over |
614 | |
615 | =item B<init_lemmatizer> |
616 | |
617 | =cut |
b49c4318 |
618 | |
a25d4374 |
619 | sub init_lemmatizer { |
620 | my $self = shift; |
621 | # Initialize the 'lemma' hash, going through all the nodes and seeing |
58a3c424 |
622 | # which ones are common nodes. This should only be run once. |
a25d4374 |
623 | |
58a3c424 |
624 | return if( $self->{'lemmatizer_initialized'} ); |
625 | my @active_names = map { $_->name } grep { $self->is_common( $_ ) } |
626 | $self->nodes(); |
627 | $self->{'positions'}->init_lemmatizer( @active_names ); |
628 | $self->{'lemmatizer_initialized'} = 1; |
a25d4374 |
629 | |
a25d4374 |
630 | } |
631 | |
e49731d7 |
632 | =item B<toggle_node> |
633 | |
634 | my @nodes_turned_off = $graph->toggle_node( $node ); |
635 | |
636 | Takes a node name, and either lemmatizes or de-lemmatizes it. Returns |
637 | a list of all nodes that are de-lemmatized as a result of the toggle. |
638 | |
639 | =cut |
640 | |
641 | sub toggle_node { |
642 | my( $self, $node ) = @_; |
643 | |
644 | # In case this is being called for the first time. |
645 | $self->init_lemmatizer(); |
646 | |
52ce987f |
647 | if( !$node || $self->is_common( $node ) ) { |
e49731d7 |
648 | # Do nothing, it's a common node. |
649 | return; |
650 | } |
651 | |
652 | my $pos = $self->{'positions'}->node_position( $node ); |
653 | my $old_state = $self->{'positions'}->state( $pos ); |
654 | my @nodes_off; |
655 | if( $old_state && $old_state eq $node ) { |
656 | # Turn off the node. We turn on no others by default. |
657 | push( @nodes_off, $node ); |
658 | } else { |
659 | # Turn on the node. |
660 | $self->{'positions'}->set_state( $pos, $node ); |
661 | # Any other 'on' nodes in the same position should be off. |
662 | push( @nodes_off, $self->colocated_nodes( $node ) ); |
663 | # Any node that is an identical transposed one should be off. |
664 | push( @nodes_off, $self->identical_nodes( $node ) ) |
665 | if $self->identical_nodes( $node ); |
666 | } |
667 | @nodes_off = unique_list( @nodes_off ); |
668 | |
669 | # Turn off the nodes that need to be turned off. |
670 | my @nodes_turned_off; |
671 | foreach my $n ( @nodes_off ) { |
672 | my $npos = $self->{'positions'}->node_position( $n ); |
673 | my $state = $self->{'positions'}->state( $npos ); |
674 | if( $state && $state eq $n ) { |
675 | # this node is still on |
676 | push( @nodes_turned_off, $n ); |
677 | my $new_state = undef; |
678 | if( $n eq $node ) { |
679 | # This is the node that was clicked, so if there are no |
680 | # other nodes there, turn off the position. In all other |
681 | # cases, restore the ellipsis. |
682 | my @all_n = $self->{'positions'}->nodes_at_position( $pos ); |
683 | $new_state = 0 if scalar( @all_n ) == 1; |
684 | } |
685 | $self->{'positions'}->set_state( $npos, $new_state ); |
686 | } elsif( $old_state && $old_state eq $n ) { |
687 | # another node has already been turned on here |
688 | push( @nodes_turned_off, $n ); |
689 | } # else some other node was on anyway, so pass. |
690 | } |
691 | return @nodes_turned_off; |
692 | } |
693 | |
694 | =item B<active_nodes> |
695 | |
696 | my @state = $graph->active_nodes( @nodes_turned_off ); |
697 | |
698 | Takes a list of nodes that have just been turned off, and returns a |
699 | set of tuples of the form ['node', 'state'] that indicates what |
700 | changes need to be made to the graph. |
701 | |
702 | =over |
703 | |
704 | =item * |
705 | |
706 | A state of 1 means 'turn on this node' |
707 | |
708 | =item * |
709 | |
710 | A state of 0 means 'turn off this node' |
711 | |
712 | =item * |
713 | |
714 | A state of undef means 'an ellipsis belongs in the text here because |
715 | no decision has been made' |
716 | |
717 | =back |
718 | |
719 | =cut |
720 | |
b49c4318 |
721 | sub active_nodes { |
722 | my( $self, @toggled_off_nodes ) = @_; |
a25d4374 |
723 | |
724 | # In case this is the first run |
725 | $self->init_lemmatizer(); |
726 | # First get the positions of those nodes which have been |
727 | # toggled off. |
b49c4318 |
728 | my $positions_off = {}; |
a25d4374 |
729 | map { $positions_off->{ $self->{'positions'}->node_position( $_ ) } = $_ } |
730 | @toggled_off_nodes; |
731 | |
b49c4318 |
732 | |
733 | # Now for each position, we have to see if a node is on, and we |
734 | # have to see if a node has been turned off. |
735 | my @answer; |
a25d4374 |
736 | foreach my $pos ( $self->{'positions'}->all() ) { |
58a3c424 |
737 | # Find the state of this position. If there is an active node, |
738 | # its name will be the state; otherwise the state will be 0 |
739 | # (nothing at this position) or undef (ellipsis at this position) |
740 | my $active = $self->{'positions'}->state( $pos ); |
a25d4374 |
741 | |
b49c4318 |
742 | # Is there a formerly active node that was toggled off? |
743 | if( exists( $positions_off->{$pos} ) ) { |
a25d4374 |
744 | my $off_node = $positions_off->{$pos}; |
58a3c424 |
745 | if( $active && $active ne $off_node) { |
b49c4318 |
746 | push( @answer, [ $off_node, 0 ], [ $active, 1 ] ); |
b49c4318 |
747 | } else { |
58a3c424 |
748 | push( @answer, [ $off_node, $active ] ); |
b49c4318 |
749 | } |
58a3c424 |
750 | |
b49c4318 |
751 | # No formerly active node, so we just see if there is a currently |
752 | # active one. |
753 | } elsif( $active ) { |
754 | # Push the active node, whatever it is. |
755 | push( @answer, [ $active, 1 ] ); |
756 | } else { |
58a3c424 |
757 | # Push the state that is there. Arbitrarily use the first node |
758 | # at that position. |
759 | my @pos_nodes = $self->{'positions'}->nodes_at_position( $pos ); |
760 | push( @answer, |
761 | [ $pos_nodes[0], $self->{'positions'}->state( $pos ) ] ); |
b49c4318 |
762 | } |
763 | } |
58a3c424 |
764 | |
b49c4318 |
765 | return @answer; |
766 | } |
767 | |
52ce987f |
768 | # A couple of helpers. |
b49c4318 |
769 | |
a25d4374 |
770 | sub is_common { |
771 | my( $self, $node ) = @_; |
772 | $node = $self->_nodeobj( $node ); |
773 | return $node->get_attribute('class') eq 'common'; |
b49c4318 |
774 | } |
775 | |
a25d4374 |
776 | sub _nodeobj { |
777 | my( $self, $node ) = @_; |
778 | unless( ref $node eq 'Graph::Easy::Node' ) { |
779 | $node = $self->node( $node ); |
b49c4318 |
780 | } |
a25d4374 |
781 | return $node; |
b49c4318 |
782 | } |
783 | |
b49c4318 |
784 | sub colocated_nodes { |
a25d4374 |
785 | my $self = shift; |
786 | return $self->{'positions'}->colocated_nodes( @_ ); |
b49c4318 |
787 | } |
788 | |
a25d4374 |
789 | sub text_of_node { |
790 | my( $self, $node_id ) = @_; |
791 | # This is the label of the given node. |
792 | return $self->node( $node_id )->label(); |
b49c4318 |
793 | } |
794 | |
795 | sub text_for_witness { |
796 | my( $self, $wit ) = @_; |
b49c4318 |
797 | |
a25d4374 |
798 | my @nodes = $self->{'positions'}->witness_path( $wit ); |
799 | my @words = map { $self->node( $_ )->label() } @nodes; |
b49c4318 |
800 | return join( ' ', @words ); |
801 | } |
802 | |
a25d4374 |
803 | sub unique_list { |
804 | my( @list ) = @_; |
805 | my %h; |
806 | map { $h{$_} = 1 } @list; |
807 | return keys( %h ); |
b49c4318 |
808 | } |
a25d4374 |
809 | |
2ceca8c3 |
810 | =back |
811 | |
812 | =head1 LICENSE |
813 | |
814 | This package is free software and is provided "as is" without express |
815 | or implied warranty. You can redistribute it and/or modify it under |
816 | the same terms as Perl itself. |
817 | |
818 | =head1 AUTHOR |
819 | |
820 | Tara L Andrews, aurum@cpan.org |
821 | |
822 | =cut |
823 | |
b49c4318 |
824 | 1; |
a25d4374 |
825 | |