Commit | Line | Data |
0f093b35 |
1 | #!/usr/bin/perl |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use feature 'unicode_strings'; |
6 | use lib '/home/tla/stemmatology/lib'; |
7 | use Encode qw/ decode_utf8 /; |
8 | use Fcntl qw/ :flock /; |
9 | use Gearman::Worker; |
10 | use Graph; |
11 | use Graph::Reader::Dot; |
12 | use Text::Tradition::Directory; |
13 | use Text::Tradition::Stemma; |
14 | use IPC::Run qw/ run /; |
15 | use JSON; |
16 | use TryCatch; |
17 | |
d1dfc40d |
18 | my %VARS = ( |
19 | DBTYPE => 'mysql', |
20 | DBHOST => '127.0.0.1', |
21 | DBPORT => '3006', |
22 | DBNAME => 'idpresult', |
23 | DSN => undef, |
24 | DBUSER => undef, |
25 | DBPASS => undef, |
26 | GEARMAN_SERVER => '127.0.0.1:4730', |
27 | IDPBINPATH => '/usr/local/idp/bin', |
28 | IDPSCRIPTPATH => '/usr/local/idp/script' |
29 | ); |
30 | |
31 | if( -f "/etc/graphcalc.conf" ) { |
32 | # Read the variables in from here. |
33 | open( GCCONF, "/etc/graphcalc.conf" ) |
34 | or die "Could not open configuration file /etc/graphcalc.conf"; |
35 | while(<GCCONF>) { |
36 | chomp; |
37 | s/^\s+//; |
38 | my( $name, $val ) = split( /\s*\=\s*/, $_ ); |
39 | if( exists $VARS{$name} ) { |
40 | $VARS{$name} = $val; |
41 | } |
42 | } |
43 | close GCCONF; |
44 | } |
45 | unless( $VARS{DSN} ) { |
46 | $VARS{DSN} = sprintf( "dbi:%s:dbname=%s;host=%s;port=%s", |
47 | $VARS{DBTYPE}, $VARS{DBNAME}, $VARS{DBHOST}, $VARS{DBPORT} ); |
48 | } |
49 | |
0f093b35 |
50 | my $db = Text::Tradition::Directory->new( |
d1dfc40d |
51 | 'dsn' => $VARS{DSN}, |
52 | 'extra_args' => { 'user' => $VARS{DBUSER}, 'password' => $VARS{DBPASS} } ); |
0f093b35 |
53 | my @idp_programs = qw/ findGroupings findClasses /; |
54 | # there is also findSources but it is redundant for now |
55 | my $witness_map = {}; |
56 | |
57 | my $worker = Gearman::Worker->new(); |
d1dfc40d |
58 | $worker->job_servers( $VARS{GEARMAN_SERVER} ); |
0f093b35 |
59 | $worker->register_function( run_idp => \&run_idp ); |
60 | $worker->work while 1; |
61 | |
62 | # Handle a request to run IDP on a list of Text::Tradition::Analysis::Result |
63 | # object IDs. Need to look these up in the DB, set their status to 'running', |
64 | # convert them to JSON, and send them off to be solved. |
65 | |
66 | sub run_idp { |
67 | my $job = shift; |
68 | print "Beginning IDP run for ID(s) " . $job->arg . "\n"; |
69 | my @problemids = split( /\s*,\s*/, $job->arg ); |
70 | my $scope = $db->new_scope(); |
71 | # Look up each problem ID and sort them into distinct groups by graph. |
72 | my %distinct_graphs; |
73 | my %dgproblems; # lookup table for problem ID -> set mapping |
74 | my $denom = 0; # the total number of problems we will solve |
75 | # Clear out the witness map |
76 | $witness_map = {}; |
77 | foreach my $problem ( @problemids ) { |
78 | my $result = $db->lookup( $problem ); |
79 | if( $result ) { |
80 | # Check to see if it already has an answer |
81 | if( $result->status && $result->status eq 'OK' ) { |
82 | print STDERR "Solution already recorded for Analysis::Result problem $problem\n"; |
83 | next; |
84 | } elsif( $result->status && $result->status eq 'running' ) { |
85 | print STDERR "Already working on Analysis::Result problem $problem\n"; |
86 | next; |
87 | } |
88 | # No? Then add it to our list. |
89 | $denom++; |
90 | $result->status( 'running' ); |
91 | $db->save( $result ); |
92 | $distinct_graphs{$result->graph} = [] |
93 | unless exists $distinct_graphs{$result->graph}; |
94 | push( @{$distinct_graphs{$result->graph}}, [ $result->sets ] ); |
95 | $dgproblems{$result->graph} = [] |
96 | unless exists $dgproblems{$result->graph}; |
97 | push( @{$dgproblems{$result->graph}}, $problem ); |
98 | } else { |
99 | print STDERR "Did not find Analysis::Result with ID $problem; skipping\n"; |
100 | } |
101 | } |
102 | |
103 | my $done = 0; |
104 | # Now for each graph problem set, sanitize it, convert it to JSON, |
105 | # and send it to IDP. |
106 | foreach my $dg ( keys %distinct_graphs ) { |
107 | my $idpdata = { graph => $dg, groupings => $distinct_graphs{$dg} }; |
108 | my $datastr = encode_json( _sanitize_names( $idpdata ) ); |
109 | my %idpanswer; |
110 | foreach my $program ( @idp_programs ) { |
111 | # Got the data, so send it to IDP. |
d1dfc40d |
112 | $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin:'.$VARS{IDPBINPATH}; |
113 | chdir( $VARS{IDPSCRIPTPATH} ); |
0f093b35 |
114 | my @cmd = qw! idp -e !; |
115 | push( @cmd, "exec($program)", 'main.idp' ); |
116 | my( $ret, $err ); |
117 | run( \@cmd, \$datastr, \$ret, \$err ); |
118 | |
119 | if( $err =~ /^Error:/m ) { |
120 | print STDERR "Error running idp: $err\n"; |
121 | return; |
122 | } |
123 | |
124 | # Save the result for the given program |
125 | $idpanswer{$program} = _desanitize_names( decode_json( $ret ) ); |
126 | } |
127 | # Now map the results from IDP back into the database. |
128 | foreach my $idx ( 0 .. $#{$dgproblems{$dg}} ) { |
129 | my $result = $db->lookup( $dgproblems{$dg}->[$idx] ); |
130 | my $genanswer = $idpanswer{'findGroupings'}->[$idx]; |
131 | $result->is_genealogical( $genanswer->[1] ? 1 : 0 ); |
132 | |
133 | # We take the groupings as well as the classes from the |
134 | # findClasses answer, to make sure they match |
135 | my $classanswer = $idpanswer{'findClasses'}->[$idx]; |
136 | foreach my $grouping ( @{$classanswer->[0]} ) { |
137 | $result->record_grouping( $grouping ); |
138 | } |
139 | foreach my $class ( keys %{$classanswer->[1]} ) { |
140 | my $class_members = $classanswer->[1]->{$class}; |
141 | map { $result->set_class( $_, $class ) } @$class_members; |
142 | } |
143 | $result->status('OK'); |
144 | print "Saving new IDP result with ID key " . $result->object_key . "\n"; |
145 | $db->save( $result ); |
146 | } |
147 | |
148 | # Update the job status if we have more than one problem |
149 | if( scalar keys %distinct_graphs > 1 ) { |
150 | $done += scalar @{$dgproblems{$dg}}; |
151 | $job->set_status( $done, $denom ); |
152 | } |
153 | } |
154 | return $done; |
155 | } |
156 | |
157 | sub _sanitize_names { |
158 | my( $element ) = @_; |
159 | my $result; |
160 | if( ref( $element ) eq 'HASH' ) { |
161 | my $safe_hash = {}; |
162 | map { my $k = $_; $safe_hash->{$k} = _sanitize_names( $element->{$k} ) } keys %$element; |
163 | $result = $safe_hash; |
164 | } elsif( ref( $element ) eq 'ARRAY' || ref( $element ) eq 'Set::Scalar' ) { |
165 | $result = []; |
166 | foreach my $n ( @$element ) { |
167 | push( @$result, _sanitize_names( $n ) ); |
168 | } |
169 | } elsif( $element =~ /^digraph/ ) { |
170 | my $dotfh; |
171 | open( $dotfh, '<', \$element ); |
172 | binmode( $dotfh, ':utf8' ); |
173 | my $graph = Graph::Reader::Dot->new()->read_graph( $dotfh ); |
174 | die "Could not parse graph from dot: $element" unless $graph; |
175 | # Make a new graph with safe witness names |
176 | my $cgraph = Graph->new(); |
177 | foreach my $v ( $graph->vertices ) { |
178 | my $nv = _sanitize_names( $v ); |
179 | $cgraph->add_vertex( $nv ); |
180 | $cgraph->set_vertex_attribute( $nv, 'class', |
181 | $graph->get_vertex_attribute( $v, 'class' ) ); |
182 | } |
183 | foreach my $e ( $graph->edges ) { |
184 | my $ne = _sanitize_names( $e ); |
185 | $cgraph->add_edge( @$ne ); |
186 | } |
187 | # Now as well as sanitizing the graph we should prune away |
188 | # hypothetical leaf nodes, to optimize the problem for the solver. |
189 | _prune_hypotheticals( $cgraph ); |
190 | # Write the cloned graph out to a string |
191 | $result = Text::Tradition::Stemma::editable_graph( $cgraph, { linesep => ' ' } ); |
192 | $witness_map->{$result} = $element unless $result eq $element; |
193 | } else { |
194 | $result = _safe_witstr( $element ); |
195 | if( $result ne $element ) { |
196 | # Warn if witness_map conflicts |
197 | warn "Ambiguous transformation $result for $element vs. " |
198 | . $witness_map->{$result} |
199 | if( exists( $witness_map->{$result} ) |
200 | && $witness_map->{$result} ne $element ); |
201 | $witness_map->{$result} = $element; |
202 | } |
203 | } |
204 | return $result; |
205 | } |
206 | |
207 | sub _safe_witstr { |
208 | my $witstr = shift; |
209 | $witstr =~ s/\s+/_/g; |
210 | $witstr =~ s/[^\w\d-]//g; |
211 | return $witstr; |
212 | } |
213 | |
214 | sub _desanitize_names { |
215 | my( $element ) = @_; |
216 | my $result = []; |
217 | if( ref( $element ) eq 'ARRAY' ) { |
218 | foreach my $n ( @$element ) { |
219 | push( @$result, _desanitize_names( $n ) ); |
220 | } |
221 | } elsif( ref( $element ) eq 'HASH' ) { |
222 | my $real_hash = {}; |
223 | map { $real_hash->{$_} = _desanitize_names( $element->{$_} ) } keys %$element; |
224 | $result = $real_hash; |
225 | } elsif( exists $witness_map->{$element} ) { |
226 | $result = $witness_map->{$element} |
227 | } else { |
228 | $result = $element; |
229 | } |
230 | return $result; |
231 | } |
232 | |
233 | sub _prune_hypotheticals { |
234 | my $graph = shift; |
235 | my @orphan_hypotheticals; |
236 | do { |
237 | @orphan_hypotheticals = |
238 | grep { $graph->get_vertex_attribute( $_, 'class' ) |
239 | eq 'hypothetical' } $graph->successorless_vertices; |
240 | $graph->delete_vertices( @orphan_hypotheticals ); |
241 | } while( @orphan_hypotheticals ); |
242 | } |