Commit | Line | Data |
0f093b35 |
1 | #!/usr/bin/perl |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use feature 'unicode_strings'; |
6 | use lib '/home/tla/stemmatology/lib'; |
7 | use Encode qw/ decode_utf8 /; |
8 | use Fcntl qw/ :flock /; |
9 | use Gearman::Worker; |
10 | use Graph; |
11 | use Graph::Reader::Dot; |
12 | use Text::Tradition::Directory; |
13 | use Text::Tradition::Stemma; |
14 | use IPC::Run qw/ run /; |
15 | use JSON; |
16 | use TryCatch; |
17 | |
18 | my $db = Text::Tradition::Directory->new( |
19 | 'dsn' => 'dbi:mysql:dbname=idpresult', |
20 | 'extra_args' => { 'user' => 'stemmaweb', 'password' => 'l@chmann' } ); |
21 | my @idp_programs = qw/ findGroupings findClasses /; |
22 | # there is also findSources but it is redundant for now |
23 | my $witness_map = {}; |
24 | |
25 | my $worker = Gearman::Worker->new(); |
26 | $worker->job_servers('127.0.0.1'); |
27 | $worker->register_function( run_idp => \&run_idp ); |
28 | $worker->work while 1; |
29 | |
30 | # Handle a request to run IDP on a list of Text::Tradition::Analysis::Result |
31 | # object IDs. Need to look these up in the DB, set their status to 'running', |
32 | # convert them to JSON, and send them off to be solved. |
33 | |
34 | sub run_idp { |
35 | my $job = shift; |
36 | print "Beginning IDP run for ID(s) " . $job->arg . "\n"; |
37 | my @problemids = split( /\s*,\s*/, $job->arg ); |
38 | my $scope = $db->new_scope(); |
39 | # Look up each problem ID and sort them into distinct groups by graph. |
40 | my %distinct_graphs; |
41 | my %dgproblems; # lookup table for problem ID -> set mapping |
42 | my $denom = 0; # the total number of problems we will solve |
43 | # Clear out the witness map |
44 | $witness_map = {}; |
45 | foreach my $problem ( @problemids ) { |
46 | my $result = $db->lookup( $problem ); |
47 | if( $result ) { |
48 | # Check to see if it already has an answer |
49 | if( $result->status && $result->status eq 'OK' ) { |
50 | print STDERR "Solution already recorded for Analysis::Result problem $problem\n"; |
51 | next; |
52 | } elsif( $result->status && $result->status eq 'running' ) { |
53 | print STDERR "Already working on Analysis::Result problem $problem\n"; |
54 | next; |
55 | } |
56 | # No? Then add it to our list. |
57 | $denom++; |
58 | $result->status( 'running' ); |
59 | $db->save( $result ); |
60 | $distinct_graphs{$result->graph} = [] |
61 | unless exists $distinct_graphs{$result->graph}; |
62 | push( @{$distinct_graphs{$result->graph}}, [ $result->sets ] ); |
63 | $dgproblems{$result->graph} = [] |
64 | unless exists $dgproblems{$result->graph}; |
65 | push( @{$dgproblems{$result->graph}}, $problem ); |
66 | } else { |
67 | print STDERR "Did not find Analysis::Result with ID $problem; skipping\n"; |
68 | } |
69 | } |
70 | |
71 | my $done = 0; |
72 | # Now for each graph problem set, sanitize it, convert it to JSON, |
73 | # and send it to IDP. |
74 | foreach my $dg ( keys %distinct_graphs ) { |
75 | my $idpdata = { graph => $dg, groupings => $distinct_graphs{$dg} }; |
76 | my $datastr = encode_json( _sanitize_names( $idpdata ) ); |
77 | my %idpanswer; |
78 | foreach my $program ( @idp_programs ) { |
79 | # Got the data, so send it to IDP. |
80 | $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin'; |
81 | chdir('/usr/lib/byzantinist/idp'); |
82 | my @cmd = qw! idp -e !; |
83 | push( @cmd, "exec($program)", 'main.idp' ); |
84 | my( $ret, $err ); |
85 | run( \@cmd, \$datastr, \$ret, \$err ); |
86 | |
87 | if( $err =~ /^Error:/m ) { |
88 | print STDERR "Error running idp: $err\n"; |
89 | return; |
90 | } |
91 | |
92 | # Save the result for the given program |
93 | $idpanswer{$program} = _desanitize_names( decode_json( $ret ) ); |
94 | } |
95 | # Now map the results from IDP back into the database. |
96 | foreach my $idx ( 0 .. $#{$dgproblems{$dg}} ) { |
97 | my $result = $db->lookup( $dgproblems{$dg}->[$idx] ); |
98 | my $genanswer = $idpanswer{'findGroupings'}->[$idx]; |
99 | $result->is_genealogical( $genanswer->[1] ? 1 : 0 ); |
100 | |
101 | # We take the groupings as well as the classes from the |
102 | # findClasses answer, to make sure they match |
103 | my $classanswer = $idpanswer{'findClasses'}->[$idx]; |
104 | foreach my $grouping ( @{$classanswer->[0]} ) { |
105 | $result->record_grouping( $grouping ); |
106 | } |
107 | foreach my $class ( keys %{$classanswer->[1]} ) { |
108 | my $class_members = $classanswer->[1]->{$class}; |
109 | map { $result->set_class( $_, $class ) } @$class_members; |
110 | } |
111 | $result->status('OK'); |
112 | print "Saving new IDP result with ID key " . $result->object_key . "\n"; |
113 | $db->save( $result ); |
114 | } |
115 | |
116 | # Update the job status if we have more than one problem |
117 | if( scalar keys %distinct_graphs > 1 ) { |
118 | $done += scalar @{$dgproblems{$dg}}; |
119 | $job->set_status( $done, $denom ); |
120 | } |
121 | } |
122 | return $done; |
123 | } |
124 | |
125 | sub _sanitize_names { |
126 | my( $element ) = @_; |
127 | my $result; |
128 | if( ref( $element ) eq 'HASH' ) { |
129 | my $safe_hash = {}; |
130 | map { my $k = $_; $safe_hash->{$k} = _sanitize_names( $element->{$k} ) } keys %$element; |
131 | $result = $safe_hash; |
132 | } elsif( ref( $element ) eq 'ARRAY' || ref( $element ) eq 'Set::Scalar' ) { |
133 | $result = []; |
134 | foreach my $n ( @$element ) { |
135 | push( @$result, _sanitize_names( $n ) ); |
136 | } |
137 | } elsif( $element =~ /^digraph/ ) { |
138 | my $dotfh; |
139 | open( $dotfh, '<', \$element ); |
140 | binmode( $dotfh, ':utf8' ); |
141 | my $graph = Graph::Reader::Dot->new()->read_graph( $dotfh ); |
142 | die "Could not parse graph from dot: $element" unless $graph; |
143 | # Make a new graph with safe witness names |
144 | my $cgraph = Graph->new(); |
145 | foreach my $v ( $graph->vertices ) { |
146 | my $nv = _sanitize_names( $v ); |
147 | $cgraph->add_vertex( $nv ); |
148 | $cgraph->set_vertex_attribute( $nv, 'class', |
149 | $graph->get_vertex_attribute( $v, 'class' ) ); |
150 | } |
151 | foreach my $e ( $graph->edges ) { |
152 | my $ne = _sanitize_names( $e ); |
153 | $cgraph->add_edge( @$ne ); |
154 | } |
155 | # Now as well as sanitizing the graph we should prune away |
156 | # hypothetical leaf nodes, to optimize the problem for the solver. |
157 | _prune_hypotheticals( $cgraph ); |
158 | # Write the cloned graph out to a string |
159 | $result = Text::Tradition::Stemma::editable_graph( $cgraph, { linesep => ' ' } ); |
160 | $witness_map->{$result} = $element unless $result eq $element; |
161 | } else { |
162 | $result = _safe_witstr( $element ); |
163 | if( $result ne $element ) { |
164 | # Warn if witness_map conflicts |
165 | warn "Ambiguous transformation $result for $element vs. " |
166 | . $witness_map->{$result} |
167 | if( exists( $witness_map->{$result} ) |
168 | && $witness_map->{$result} ne $element ); |
169 | $witness_map->{$result} = $element; |
170 | } |
171 | } |
172 | return $result; |
173 | } |
174 | |
175 | sub _safe_witstr { |
176 | my $witstr = shift; |
177 | $witstr =~ s/\s+/_/g; |
178 | $witstr =~ s/[^\w\d-]//g; |
179 | return $witstr; |
180 | } |
181 | |
182 | sub _desanitize_names { |
183 | my( $element ) = @_; |
184 | my $result = []; |
185 | if( ref( $element ) eq 'ARRAY' ) { |
186 | foreach my $n ( @$element ) { |
187 | push( @$result, _desanitize_names( $n ) ); |
188 | } |
189 | } elsif( ref( $element ) eq 'HASH' ) { |
190 | my $real_hash = {}; |
191 | map { $real_hash->{$_} = _desanitize_names( $element->{$_} ) } keys %$element; |
192 | $result = $real_hash; |
193 | } elsif( exists $witness_map->{$element} ) { |
194 | $result = $witness_map->{$element} |
195 | } else { |
196 | $result = $element; |
197 | } |
198 | return $result; |
199 | } |
200 | |
201 | sub _prune_hypotheticals { |
202 | my $graph = shift; |
203 | my @orphan_hypotheticals; |
204 | do { |
205 | @orphan_hypotheticals = |
206 | grep { $graph->get_vertex_attribute( $_, 'class' ) |
207 | eq 'hypothetical' } $graph->successorless_vertices; |
208 | $graph->delete_vertices( @orphan_hypotheticals ); |
209 | } while( @orphan_hypotheticals ); |
210 | } |