Initial library
[scpubgit/stemmatology.git] / lib / Traditions / Graph.pm
CommitLineData
b49c4318 1package Traditions::Graph;
2
3use strict;
4use warnings;
5use Graph::Easy;
6use IPC::Run qw( run binary );
7use Module::Load;
8
9sub new {
10 my $proto = shift;
11 my $class = ref( $proto ) || $proto;
12 my %opts = ( 'on_color' => 'yellow',
13 'off_color' => 'white',
14 @_ );
15 my $self = {};
16
17 # opts can be: GraphML, base+CSV, base+CTE, TEI. We need
18 # something to parse.
19 my @formats = grep { /^(GraphML|CSV|CTE|TEI)$/ } keys( %opts );
20 my $format = shift( @formats );
21 unless( $format ) {
22 warn "No data given to create a graph: need GraphML, CSV, or TEI";
23 return;
24 }
25 if( $format =~ /^(CSV|CTE)$/ && !exists $opts{'base'} ) {
26 warn "Cannot make a graph from $format without a base text";
27 return;
28 }
29
30 # Make a graph object.
31 my $collation_graph = Graph::Easy->new();
32 $collation_graph->set_attribute( 'node', 'shape', 'ellipse' );
33 # Starting point for all texts
34 my $last_node = $collation_graph->add_node( '#START#' );
35
36 $self->{'graph'} = $collation_graph;
37 bless( $self, $class );
38
39 # Now do the parsing.
40 my $mod = "Traditions::Parser::$format";
41 load( $mod );
42 my @args = ( $opts{ $format } );
43 if( $format =~ /^(CSV|CTE)$/ ) {
44 push( @args, $opts{'base'} );
45 }
46 $mod->can('parse')->( $self, @args );
47
48 return $self;
49}
50
51
52### Graph::Easy object accessor methods
53sub node {
54 my $self = shift;
55 return $self->{'graph'}->node( @_ );
56}
57
58sub edge {
59 my $self = shift;
60 return $self->{'graph'}->edge( @_ );
61}
62
63sub add_node {
64 my $self = shift;
65 return $self->{'graph'}->add_node( @_ );
66}
67
68sub add_edge {
69 my $self = shift;
70 return $self->{'graph'}->add_edge( @_ );
71}
72
73sub del_node {
74 my $self = shift;
75 return $self->{'graph'}->del_node( @_ );
76}
77
78sub del_edge {
79 my $self = shift;
80 return $self->{'graph'}->del_edge( @_ );
81}
82
83sub nodes {
84 my $self = shift;
85 return $self->{'graph'}->nodes( @_ );
86}
87
88sub edges {
89 my $self = shift;
90 return $self->{'graph'}->edges( @_ );
91}
92
93sub merge_nodes {
94 my $self = shift;
95 return $self->{'graph'}->merge_nodes( @_ );
96}
97
98### Helper methods for navigating the tree
99
100sub start {
101 # Return the beginning node of the graph.
102 my $self = shift;
103 my( $new_start ) = @_;
104 if( $new_start ) {
105 $self->{'graph'}->rename_node( $new_start, '#START#' );
106 }
107 return $self->{'graph'}->node('#START#');
108}
109
110sub save_positions {
111 my( $self, $positions ) = @_;
112 $self->{'positions'} = $positions;
113}
114
115sub next_word {
116 # Return the successor via the corresponding edge.
117 my( $self, $node, $edge ) = @_;
118 $edge = "base text" unless $edge;
119 my @next_edges = $node->outgoing();
120 return undef unless scalar( @next_edges );
121
122 foreach my $e ( @next_edges ) {
123 next unless $e->label() eq $edge;
124 return $e->to();
125 }
126
127 warn "Could not find node connected to edge $edge";
128 return undef;
129}
130
131sub prior_word {
132 # Return the predecessor via the corresponding edge.
133 my( $self, $node, $edge ) = @_;
134 $edge = "base text" unless $edge;
135 my @prior_edges = $node->incoming();
136 return undef unless scalar( @prior_edges );
137
138 foreach my $e ( @prior_edges ) {
139 next unless $e->label() eq $edge;
140 return $e->from();
141 }
142
143 warn "Could not find node connected from edge $edge";
144 return undef;
145}
146
147sub node_sequence {
148 my( $self, $start, $end, $label ) = @_;
149 # TODO make label able to follow a single MS
150 unless( ref( $start ) eq 'Graph::Easy::Node'
151 && ref( $end ) eq 'Graph::Easy::Node' ) {
152 warn "Called node_sequence without two nodes!";
153 return ();
154 }
155 $label = 'base text' unless $label;
156 my @nodes = ( $start );
157 my %seen;
158 my $n = $start;
159 while( $n ne $end ) {
160 if( exists( $seen{$n->name()} ) ) {
161 warn "Detected loop at " . $n->name();
162 last;
163 }
164 $seen{$n->name()} = 1;
165
166 my @edges = $n->outgoing();
167 my @relevant_edges = grep { $_->label =~ /^$label$/ } @edges;
168 warn "Did not find an edge $label from node " . $n->label
169 unless scalar @relevant_edges;
170 warn "Found more than one edge $label from node " . $n->label
171 unless scalar @relevant_edges == 1;
172 my $next = $relevant_edges[0]->to();
173 push( @nodes, $next );
174 $n = $next;
175 }
176 # Check that the last node is our end node.
177 my $last = $nodes[$#nodes];
178 warn "Last node found from " . $start->label() .
179 " via path $label is not the end!"
180 unless $last eq $end;
181
182 return @nodes;
183}
184
185sub string_lemma {
186 my( $self, $start, $end, $label ) = @_;
187
188 my @nodes = $self->node_sequence( $start, $end, $label );
189 my @words = map { $_->label() } @nodes;
190 return join( ' ', @words );
191}
192
193## Output. We use GraphViz for the layout because it handles large
194## graphs better than Graph::Easy does natively.
195
196sub as_svg {
197 my( $self, $recalc ) = @_;
198 return $self->{'svg'} if( exists $self->{'svg'} && !$recalc );
199
200 $self->{'graphviz'} = $self->{'graph'}->as_graphviz()
201 unless( exists $self->{'graphviz'} && !$recalc );
202
203 my @cmd = qw/dot -Tsvg/;
204 my( $svg, $err );
205 my $in = $self->{'graphviz'};
206 run( \@cmd, \$in, ">", binary(), \$svg );
207 $self->{'svg'} = $svg;
208 return $svg;
209}
210
2111;
212__END__
213#### EXAMINE BELOW ####
214
215# Returns a list of the nodes that are currently on and the nodes for
216# which an ellipsis needs to stand in. Optionally takes a list of
217# nodes that have just been turned off, to include in the list.
218sub active_nodes {
219 my( $self, @toggled_off_nodes ) = @_;
220
221 my $all_nodes = {};
222 map { $all_nodes->{ $_ } = $self->_find_position( $_ ) } keys %{$self->{node_state}};
223 my $positions = _invert_hash( $all_nodes );
224 my $positions_off = {};
225 map { $positions_off->{ $all_nodes->{$_} } = $_ } @toggled_off_nodes;
226
227 # Now for each position, we have to see if a node is on, and we
228 # have to see if a node has been turned off.
229 my @answer;
230 foreach my $pos ( @{$self->{_all_positions}} ) {
231 my $nodes = $positions->{$pos};
232
233 # See if there is an active node for this position.
234 my @active_nodes = grep { $self->{node_state}->{$_} == 1 } @$nodes;
235 warn "More than one active node at position $pos!"
236 unless scalar( @active_nodes ) < 2;
237 my $active;
238 if( scalar( @active_nodes ) ) {
239 $active = $self->node_to_svg( $active_nodes[0] );
240 }
241
242 # Is there a formerly active node that was toggled off?
243 if( exists( $positions_off->{$pos} ) ) {
244 my $off_node = $self->node_to_svg( $positions_off->{$pos} );
245 if( $active ) {
246 push( @answer, [ $off_node, 0 ], [ $active, 1 ] );
247 } elsif ( scalar @$nodes == 1 ) {
248 # This was the only node at its position. No ellipsis.
249 push( @answer, [ $off_node, 0 ] );
250 } else {
251 # More than one node at this position, none now active.
252 # Restore the ellipsis.
253 push( @answer, [ $off_node, undef ] );
254 }
255 # No formerly active node, so we just see if there is a currently
256 # active one.
257 } elsif( $active ) {
258 # Push the active node, whatever it is.
259 push( @answer, [ $active, 1 ] );
260 } else {
261 # There is no change here; we need an ellipsis. Use
262 # the first node in the list, arbitrarily.
263 push( @answer, [ $self->node_to_svg( $nodes->[0] ), undef ] );
264 }
265 }
266
267 return @answer;
268}
269
270# Compares two nodes according to their positions in the witness
271# index hash.
272sub _by_position {
273 my $self = shift;
274 return _cmp_position( $self->_find_position( $a ),
275 $self->_find_position( $b ) );
276}
277
278# Takes two position strings (X,Y) and sorts them.
279sub _cmp_position {
280 my @pos_a = split(/,/, $a );
281 my @pos_b = split(/,/, $b );
282
283 my $big_cmp = $pos_a[0] <=> $pos_b[0];
284 return $big_cmp if $big_cmp;
285 # else
286 return $pos_a[1] <=> $pos_b[1];
287}
288
289# Finds the position of a node in the witness index hash. Warns if
290# the same node has non-identical positions across witnesses. Quite
291# possibly should not warn.
292sub _find_position {
293 my $self = shift;
294 my $node = shift;
295
296 my $position;
297 foreach my $wit ( keys %{$self->{indices}} ) {
298 if( exists $self->{indices}->{$wit}->{$node} ) {
299 if( $position && $self->{indices}->{$wit}->{$node} ne $position ) {
300 warn "Position for node $node varies between witnesses";
301 }
302 $position = $self->{indices}->{$wit}->{$node};
303 }
304 }
305
306 warn "No position found for node $node" unless $position;
307 return $position;
308}
309
310sub _invert_hash {
311 my ( $hash, $plaintext_keys ) = @_;
312 my %new_hash;
313 foreach my $key ( keys %$hash ) {
314 my $val = $hash->{$key};
315 my $valkey = $val;
316 if( $plaintext_keys
317 && ref( $val ) ) {
318 $valkey = $plaintext_keys->{ scalar( $val ) };
319 warn( "No plaintext value given for $val" ) unless $valkey;
320 }
321 if( exists ( $new_hash{$valkey} ) ) {
322 push( @{$new_hash{$valkey}}, $key );
323 } else {
324 $new_hash{$valkey} = [ $key ];
325 }
326 }
327 return \%new_hash;
328}
329
330
331# Takes a node ID to toggle; returns a list of nodes that are
332# turned OFF as a result.
333sub toggle_node {
334 my( $self, $node_id ) = @_;
335 $node_id = $self->node_from_svg( $node_id );
336
337 # Is it a common node? If so, we don't want to turn it off.
338 # Later we might want to allow it off, but give a warning.
339 if( grep { $_ =~ /^$node_id$/ } @{$self->{common_nodes}} ) {
340 return ();
341 }
342
343 my @nodes_off;
344 # If we are about to turn on a node...
345 if( !$self->{node_state}->{$node_id} ) {
346 # Turn on the node.
347 $self->{node_state}->{$node_id} = 1;
348 # Turn off any other 'on' nodes in the same position.
349 push( @nodes_off, $self->colocated_nodes( $node_id ) );
350 # Turn off any node that is an identical transposed one.
351 push( @nodes_off, $self->identical_nodes( $node_id ) )
352 if $self->identical_nodes( $node_id );
353 } else {
354 push( @nodes_off, $node_id );
355 }
356
357 # Turn off the nodes that need to be turned off.
358 map { $self->{node_state}->{$_} = 0 } @nodes_off;
359 return @nodes_off;
360}
361
362sub node_from_svg {
363 my( $self, $node_id ) = @_;
364 # TODO: implement this for real. Need a mapping between SVG titles
365 # and GraphML IDs, as created in make_graphviz.
366 $node_id =~ s/^node_//;
367 return $node_id;
368}
369
370sub node_to_svg {
371 my( $self, $node_id ) = @_;
372 # TODO: implement this for real. Need a mapping between SVG titles
373 # and GraphML IDs, as created in make_graphviz.
374 $node_id = "node_$node_id";
375 return $node_id;
376}
377
378sub colocated_nodes {
379 my( $self, $node ) = @_;
380 my @cl;
381
382 # Get the position of the stated node.
383 my $position;
384 foreach my $index ( values %{$self->{indices}} ) {
385 if( exists( $index->{$node} ) ) {
386 if( $position && $position ne $index->{$node} ) {
387 warn "Two ms positions for the same node!";
388 }
389 $position = $index->{$node};
390 }
391 }
392
393 # Now find the other nodes in that position, if any.
394 foreach my $index ( values %{$self->{indices}} ) {
395 my %location = reverse( %$index );
396 push( @cl, $location{$position} )
397 if( exists $location{$position}
398 && $location{$position} ne $node );
399 }
400 return @cl;
401}
402
403sub identical_nodes {
404 my( $self, $node ) = @_;
405 return undef unless exists $self->{transpositions} &&
406 exists $self->{transpositions}->{$node};
407 return $self->{transpositions}->{$node};
408}
409
410sub text_for_witness {
411 my( $self, $wit ) = @_;
412 # Get the witness name
413 my %wit_id_for = reverse %{$self->{witnesses}};
414 my $wit_id = $wit_id_for{$wit};
415 unless( $wit_id ) {
416 warn "Could not find an ID for witness $wit";
417 return;
418 }
419
420 my $path = $self->{indices}->{$wit_id};
421 my @nodes = sort { $self->_cmp_position( $path->{$a}, $path->{$b} ) } keys( %$path );
422 my @words = map { $self->text_of_node( $_ ) } @nodes;
423 return join( ' ', @words );
424}
425
426sub text_of_node {
427 my( $self, $node_id ) = @_;
428 my $xpath = '//g:node[@id="' . $self->node_from_svg( $node_id) .
429 '"]/g:data[@key="' . $self->{nodedata}->{token} . '"]/child::text()';
430 return $self->{xpc}->findvalue( $xpath );
431}
4321;