Commit | Line | Data |
b49c4318 |
1 | package Traditions::Graph; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Graph::Easy; |
6 | use IPC::Run qw( run binary ); |
7 | use Module::Load; |
8 | |
9 | sub new { |
10 | my $proto = shift; |
11 | my $class = ref( $proto ) || $proto; |
12 | my %opts = ( 'on_color' => 'yellow', |
13 | 'off_color' => 'white', |
14 | @_ ); |
15 | my $self = {}; |
16 | |
17 | # opts can be: GraphML, base+CSV, base+CTE, TEI. We need |
18 | # something to parse. |
19 | my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %opts ); |
20 | my $format = shift( @formats ); |
21 | unless( $format ) { |
22 | warn "No data given to create a graph: need GraphML, CSV, or TEI"; |
23 | return; |
24 | } |
25 | if( $format =~ /^(CSV|CTE)$/ && !exists $opts{'base'} ) { |
26 | warn "Cannot make a graph from $format without a base text"; |
27 | return; |
28 | } |
29 | |
30 | # Make a graph object. |
31 | my $collation_graph = Graph::Easy->new(); |
32 | $collation_graph->set_attribute( 'node', 'shape', 'ellipse' ); |
33 | # Starting point for all texts |
34 | my $last_node = $collation_graph->add_node( '#START#' ); |
35 | |
36 | $self->{'graph'} = $collation_graph; |
37 | bless( $self, $class ); |
38 | |
39 | # Now do the parsing. |
40 | my $mod = "Traditions::Parser::$format"; |
41 | load( $mod ); |
42 | my @args = ( $opts{ $format } ); |
43 | if( $format =~ /^(CSV|CTE)$/ ) { |
44 | push( @args, $opts{'base'} ); |
45 | } |
46 | $mod->can('parse')->( $self, @args ); |
47 | |
48 | return $self; |
49 | } |
50 | |
51 | |
52 | ### Graph::Easy object accessor methods |
53 | sub node { |
54 | my $self = shift; |
55 | return $self->{'graph'}->node( @_ ); |
56 | } |
57 | |
58 | sub edge { |
59 | my $self = shift; |
60 | return $self->{'graph'}->edge( @_ ); |
61 | } |
62 | |
63 | sub add_node { |
64 | my $self = shift; |
65 | return $self->{'graph'}->add_node( @_ ); |
66 | } |
67 | |
68 | sub add_edge { |
69 | my $self = shift; |
70 | return $self->{'graph'}->add_edge( @_ ); |
71 | } |
72 | |
73 | sub del_node { |
74 | my $self = shift; |
75 | return $self->{'graph'}->del_node( @_ ); |
76 | } |
77 | |
78 | sub del_edge { |
79 | my $self = shift; |
80 | return $self->{'graph'}->del_edge( @_ ); |
81 | } |
82 | |
83 | sub nodes { |
84 | my $self = shift; |
85 | return $self->{'graph'}->nodes( @_ ); |
86 | } |
87 | |
88 | sub edges { |
89 | my $self = shift; |
90 | return $self->{'graph'}->edges( @_ ); |
91 | } |
92 | |
93 | sub merge_nodes { |
94 | my $self = shift; |
95 | return $self->{'graph'}->merge_nodes( @_ ); |
96 | } |
97 | |
98 | ### Helper methods for navigating the tree |
99 | |
100 | sub start { |
101 | # Return the beginning node of the graph. |
102 | my $self = shift; |
103 | my( $new_start ) = @_; |
104 | if( $new_start ) { |
105 | $self->{'graph'}->rename_node( $new_start, '#START#' ); |
106 | } |
107 | return $self->{'graph'}->node('#START#'); |
108 | } |
109 | |
110 | sub save_positions { |
111 | my( $self, $positions ) = @_; |
112 | $self->{'positions'} = $positions; |
113 | } |
114 | |
115 | sub next_word { |
116 | # Return the successor via the corresponding edge. |
117 | my( $self, $node, $edge ) = @_; |
118 | $edge = "base text" unless $edge; |
119 | my @next_edges = $node->outgoing(); |
120 | return undef unless scalar( @next_edges ); |
121 | |
122 | foreach my $e ( @next_edges ) { |
123 | next unless $e->label() eq $edge; |
124 | return $e->to(); |
125 | } |
126 | |
127 | warn "Could not find node connected to edge $edge"; |
128 | return undef; |
129 | } |
130 | |
131 | sub prior_word { |
132 | # Return the predecessor via the corresponding edge. |
133 | my( $self, $node, $edge ) = @_; |
134 | $edge = "base text" unless $edge; |
135 | my @prior_edges = $node->incoming(); |
136 | return undef unless scalar( @prior_edges ); |
137 | |
138 | foreach my $e ( @prior_edges ) { |
139 | next unless $e->label() eq $edge; |
140 | return $e->from(); |
141 | } |
142 | |
143 | warn "Could not find node connected from edge $edge"; |
144 | return undef; |
145 | } |
146 | |
147 | sub node_sequence { |
148 | my( $self, $start, $end, $label ) = @_; |
149 | # TODO make label able to follow a single MS |
150 | unless( ref( $start ) eq 'Graph::Easy::Node' |
151 | && ref( $end ) eq 'Graph::Easy::Node' ) { |
152 | warn "Called node_sequence without two nodes!"; |
153 | return (); |
154 | } |
155 | $label = 'base text' unless $label; |
156 | my @nodes = ( $start ); |
157 | my %seen; |
158 | my $n = $start; |
159 | while( $n ne $end ) { |
160 | if( exists( $seen{$n->name()} ) ) { |
161 | warn "Detected loop at " . $n->name(); |
162 | last; |
163 | } |
164 | $seen{$n->name()} = 1; |
165 | |
166 | my @edges = $n->outgoing(); |
167 | my @relevant_edges = grep { $_->label =~ /^$label$/ } @edges; |
168 | warn "Did not find an edge $label from node " . $n->label |
169 | unless scalar @relevant_edges; |
170 | warn "Found more than one edge $label from node " . $n->label |
171 | unless scalar @relevant_edges == 1; |
172 | my $next = $relevant_edges[0]->to(); |
173 | push( @nodes, $next ); |
174 | $n = $next; |
175 | } |
176 | # Check that the last node is our end node. |
177 | my $last = $nodes[$#nodes]; |
178 | warn "Last node found from " . $start->label() . |
179 | " via path $label is not the end!" |
180 | unless $last eq $end; |
181 | |
182 | return @nodes; |
183 | } |
184 | |
185 | sub string_lemma { |
186 | my( $self, $start, $end, $label ) = @_; |
187 | |
188 | my @nodes = $self->node_sequence( $start, $end, $label ); |
189 | my @words = map { $_->label() } @nodes; |
190 | return join( ' ', @words ); |
191 | } |
192 | |
193 | ## Output. We use GraphViz for the layout because it handles large |
194 | ## graphs better than Graph::Easy does natively. |
195 | |
196 | sub as_svg { |
197 | my( $self, $recalc ) = @_; |
198 | return $self->{'svg'} if( exists $self->{'svg'} && !$recalc ); |
199 | |
200 | $self->{'graphviz'} = $self->{'graph'}->as_graphviz() |
201 | unless( exists $self->{'graphviz'} && !$recalc ); |
202 | |
203 | my @cmd = qw/dot -Tsvg/; |
204 | my( $svg, $err ); |
205 | my $in = $self->{'graphviz'}; |
206 | run( \@cmd, \$in, ">", binary(), \$svg ); |
207 | $self->{'svg'} = $svg; |
208 | return $svg; |
209 | } |
210 | |
211 | 1; |
212 | __END__ |
213 | #### EXAMINE BELOW #### |
214 | |
215 | # Returns a list of the nodes that are currently on and the nodes for |
216 | # which an ellipsis needs to stand in. Optionally takes a list of |
217 | # nodes that have just been turned off, to include in the list. |
218 | sub active_nodes { |
219 | my( $self, @toggled_off_nodes ) = @_; |
220 | |
221 | my $all_nodes = {}; |
222 | map { $all_nodes->{ $_ } = $self->_find_position( $_ ) } keys %{$self->{node_state}}; |
223 | my $positions = _invert_hash( $all_nodes ); |
224 | my $positions_off = {}; |
225 | map { $positions_off->{ $all_nodes->{$_} } = $_ } @toggled_off_nodes; |
226 | |
227 | # Now for each position, we have to see if a node is on, and we |
228 | # have to see if a node has been turned off. |
229 | my @answer; |
230 | foreach my $pos ( @{$self->{_all_positions}} ) { |
231 | my $nodes = $positions->{$pos}; |
232 | |
233 | # See if there is an active node for this position. |
234 | my @active_nodes = grep { $self->{node_state}->{$_} == 1 } @$nodes; |
235 | warn "More than one active node at position $pos!" |
236 | unless scalar( @active_nodes ) < 2; |
237 | my $active; |
238 | if( scalar( @active_nodes ) ) { |
239 | $active = $self->node_to_svg( $active_nodes[0] ); |
240 | } |
241 | |
242 | # Is there a formerly active node that was toggled off? |
243 | if( exists( $positions_off->{$pos} ) ) { |
244 | my $off_node = $self->node_to_svg( $positions_off->{$pos} ); |
245 | if( $active ) { |
246 | push( @answer, [ $off_node, 0 ], [ $active, 1 ] ); |
247 | } elsif ( scalar @$nodes == 1 ) { |
248 | # This was the only node at its position. No ellipsis. |
249 | push( @answer, [ $off_node, 0 ] ); |
250 | } else { |
251 | # More than one node at this position, none now active. |
252 | # Restore the ellipsis. |
253 | push( @answer, [ $off_node, undef ] ); |
254 | } |
255 | # No formerly active node, so we just see if there is a currently |
256 | # active one. |
257 | } elsif( $active ) { |
258 | # Push the active node, whatever it is. |
259 | push( @answer, [ $active, 1 ] ); |
260 | } else { |
261 | # There is no change here; we need an ellipsis. Use |
262 | # the first node in the list, arbitrarily. |
263 | push( @answer, [ $self->node_to_svg( $nodes->[0] ), undef ] ); |
264 | } |
265 | } |
266 | |
267 | return @answer; |
268 | } |
269 | |
270 | # Compares two nodes according to their positions in the witness |
271 | # index hash. |
272 | sub _by_position { |
273 | my $self = shift; |
274 | return _cmp_position( $self->_find_position( $a ), |
275 | $self->_find_position( $b ) ); |
276 | } |
277 | |
278 | # Takes two position strings (X,Y) and sorts them. |
279 | sub _cmp_position { |
280 | my @pos_a = split(/,/, $a ); |
281 | my @pos_b = split(/,/, $b ); |
282 | |
283 | my $big_cmp = $pos_a[0] <=> $pos_b[0]; |
284 | return $big_cmp if $big_cmp; |
285 | # else |
286 | return $pos_a[1] <=> $pos_b[1]; |
287 | } |
288 | |
289 | # Finds the position of a node in the witness index hash. Warns if |
290 | # the same node has non-identical positions across witnesses. Quite |
291 | # possibly should not warn. |
292 | sub _find_position { |
293 | my $self = shift; |
294 | my $node = shift; |
295 | |
296 | my $position; |
297 | foreach my $wit ( keys %{$self->{indices}} ) { |
298 | if( exists $self->{indices}->{$wit}->{$node} ) { |
299 | if( $position && $self->{indices}->{$wit}->{$node} ne $position ) { |
300 | warn "Position for node $node varies between witnesses"; |
301 | } |
302 | $position = $self->{indices}->{$wit}->{$node}; |
303 | } |
304 | } |
305 | |
306 | warn "No position found for node $node" unless $position; |
307 | return $position; |
308 | } |
309 | |
310 | sub _invert_hash { |
311 | my ( $hash, $plaintext_keys ) = @_; |
312 | my %new_hash; |
313 | foreach my $key ( keys %$hash ) { |
314 | my $val = $hash->{$key}; |
315 | my $valkey = $val; |
316 | if( $plaintext_keys |
317 | && ref( $val ) ) { |
318 | $valkey = $plaintext_keys->{ scalar( $val ) }; |
319 | warn( "No plaintext value given for $val" ) unless $valkey; |
320 | } |
321 | if( exists ( $new_hash{$valkey} ) ) { |
322 | push( @{$new_hash{$valkey}}, $key ); |
323 | } else { |
324 | $new_hash{$valkey} = [ $key ]; |
325 | } |
326 | } |
327 | return \%new_hash; |
328 | } |
329 | |
330 | |
331 | # Takes a node ID to toggle; returns a list of nodes that are |
332 | # turned OFF as a result. |
333 | sub toggle_node { |
334 | my( $self, $node_id ) = @_; |
335 | $node_id = $self->node_from_svg( $node_id ); |
336 | |
337 | # Is it a common node? If so, we don't want to turn it off. |
338 | # Later we might want to allow it off, but give a warning. |
339 | if( grep { $_ =~ /^$node_id$/ } @{$self->{common_nodes}} ) { |
340 | return (); |
341 | } |
342 | |
343 | my @nodes_off; |
344 | # If we are about to turn on a node... |
345 | if( !$self->{node_state}->{$node_id} ) { |
346 | # Turn on the node. |
347 | $self->{node_state}->{$node_id} = 1; |
348 | # Turn off any other 'on' nodes in the same position. |
349 | push( @nodes_off, $self->colocated_nodes( $node_id ) ); |
350 | # Turn off any node that is an identical transposed one. |
351 | push( @nodes_off, $self->identical_nodes( $node_id ) ) |
352 | if $self->identical_nodes( $node_id ); |
353 | } else { |
354 | push( @nodes_off, $node_id ); |
355 | } |
356 | |
357 | # Turn off the nodes that need to be turned off. |
358 | map { $self->{node_state}->{$_} = 0 } @nodes_off; |
359 | return @nodes_off; |
360 | } |
361 | |
362 | sub node_from_svg { |
363 | my( $self, $node_id ) = @_; |
364 | # TODO: implement this for real. Need a mapping between SVG titles |
365 | # and GraphML IDs, as created in make_graphviz. |
366 | $node_id =~ s/^node_//; |
367 | return $node_id; |
368 | } |
369 | |
370 | sub node_to_svg { |
371 | my( $self, $node_id ) = @_; |
372 | # TODO: implement this for real. Need a mapping between SVG titles |
373 | # and GraphML IDs, as created in make_graphviz. |
374 | $node_id = "node_$node_id"; |
375 | return $node_id; |
376 | } |
377 | |
378 | sub colocated_nodes { |
379 | my( $self, $node ) = @_; |
380 | my @cl; |
381 | |
382 | # Get the position of the stated node. |
383 | my $position; |
384 | foreach my $index ( values %{$self->{indices}} ) { |
385 | if( exists( $index->{$node} ) ) { |
386 | if( $position && $position ne $index->{$node} ) { |
387 | warn "Two ms positions for the same node!"; |
388 | } |
389 | $position = $index->{$node}; |
390 | } |
391 | } |
392 | |
393 | # Now find the other nodes in that position, if any. |
394 | foreach my $index ( values %{$self->{indices}} ) { |
395 | my %location = reverse( %$index ); |
396 | push( @cl, $location{$position} ) |
397 | if( exists $location{$position} |
398 | && $location{$position} ne $node ); |
399 | } |
400 | return @cl; |
401 | } |
402 | |
403 | sub identical_nodes { |
404 | my( $self, $node ) = @_; |
405 | return undef unless exists $self->{transpositions} && |
406 | exists $self->{transpositions}->{$node}; |
407 | return $self->{transpositions}->{$node}; |
408 | } |
409 | |
410 | sub text_for_witness { |
411 | my( $self, $wit ) = @_; |
412 | # Get the witness name |
413 | my %wit_id_for = reverse %{$self->{witnesses}}; |
414 | my $wit_id = $wit_id_for{$wit}; |
415 | unless( $wit_id ) { |
416 | warn "Could not find an ID for witness $wit"; |
417 | return; |
418 | } |
419 | |
420 | my $path = $self->{indices}->{$wit_id}; |
421 | my @nodes = sort { $self->_cmp_position( $path->{$a}, $path->{$b} ) } keys( %$path ); |
422 | my @words = map { $self->text_of_node( $_ ) } @nodes; |
423 | return join( ' ', @words ); |
424 | } |
425 | |
426 | sub text_of_node { |
427 | my( $self, $node_id ) = @_; |
428 | my $xpath = '//g:node[@id="' . $self->node_from_svg( $node_id) . |
429 | '"]/g:data[@key="' . $self->{nodedata}->{token} . '"]/child::text()'; |
430 | return $self->{xpc}->findvalue( $xpath ); |
431 | } |
432 | 1; |