From: tla Date: Mon, 27 Aug 2012 23:01:29 +0000 (+0200) Subject: server-side scripts necessary for IDP problems to be calculated X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0f093b350359832feb21ddf4895c140e27e655a5;p=scpubgit%2Fstemmatology.git server-side scripts necessary for IDP problems to be calculated --- diff --git a/idp_server/graphcalc.cgi b/idp_server/graphcalc.cgi new file mode 100755 index 0000000..7edf758 --- /dev/null +++ b/idp_server/graphcalc.cgi @@ -0,0 +1,121 @@ +#!/usr/bin/perl -T + +use strict; +use warnings; +use lib '/home/tla/stemmatology/lib'; +use CGI; +use Encode qw/ decode /; +use Gearman::Client; +use JSON; +use Text::Tradition::Directory; +use Text::Tradition::Analysis::Result; +use TryCatch; + +my %status = ( + '400' => '400 Bad Request', + '500' => '500 Internal Server Error', +); + +my $q = CGI->new(\*STDIN); +# check that Content-Type is application/json +my $ctype = $q->content_type; +my $encoding = 'UTF-8'; # default +if( $ctype =~ m!^(\w+/[\w+]+);\s*charset=(.*)$! ) { + ( $ctype, $encoding ) = ( $1, $2 ); +} +error( 400, 'Content type must be application/json' ) + unless $ctype eq 'application/json'; + +# Get the post data, and decode it according to the given character set +my $jsonstr = decode( $encoding, $q->param('POSTDATA') ); +$jsonstr =~ s/\&/\n/g; +# TODO eval this; if it breaks return an error. +my $request; +try { + $request = from_json( $jsonstr ); +} catch( $err ) { + error( 400, "JSON parsing error: $err" ); +} +# Request should be a hash that can be used to instantiate an Analysis::Result, +# or else an array of such hashes. +my @problems; +my $first = ref( $request ) eq 'ARRAY' ? shift @$request : $request; +try { + my $result = Text::Tradition::Analysis::Result->new( $first ); + push( @problems, $result ); +} catch( $err ) { + error( 400, "Argument $first is neither a Result serialization nor an array: $err" ); +} +# Now parse the rest of the result objects +unless( $first eq $request ) { + foreach my $rj ( @$request ) { + try { + my $result = Text::Tradition::Analysis::Result->new( $rj ); + push( @problems, $result ); + } catch( $err ) { + error( 400, "Argument $rj is not a Result serialization: $err" ); + } + } +} + +# For each of the result objects, see if its key exists in the DB. Kick off the +# calculation of any that need to be calculated, but don't wait more than two +# seconds for a result. Return the DB version of each of the objects. +my $dir = Text::Tradition::Directory->new( + 'dsn' => 'dbi:mysql:dbname=idpresult', + 'extra_args' => { 'user' => 'stemmaweb', 'password' => 'l@chmann' } ); +my $scope = $dir->new_scope; +my %results; +my @resultorder; # Keep track of the order in which we should return the results +my @needcalc; +foreach my $p ( @problems ) { + my $key = $p->object_key; + push( @resultorder, $key ); + my $result = $dir->lookup( $key ); + if( $result ) { + $results{$key} = $result; + } else { + push( @needcalc, $p ); + $dir->store( $key => $p ); + } +} + +# Now if any of the results need calculation, dispatch them for the purpose. +if( @needcalc ) { + my $arg = join( ',', map { $_->object_key } @needcalc ); + my $client = Gearman::Client->new; + $client->job_servers( '127.0.0.1' ); + my $task = $client->dispatch_background( run_idp => $arg ); + # See if it finishes quickly + my $wait = 3; + sleep( $wait ); + # Now replace the problems in the results hash with the DB results, + # whether finished or still calculating. + foreach my $p ( @needcalc ) { + # this should NOT fail as we stored it above + $results{$p->object_key} = $dir->lookup( $p->object_key ); + } +} + +# Finally, assemble our answer. +my $answer; +if( $first eq $request ) { + $answer = $results{$resultorder[0]}; +} else { + foreach my $key ( @resultorder ) { + push( @$answer, $results{$key} ); + } +} + + +# Now return the response as UTF-8 encoded JSON. +print $q->header(-type => 'application/json', -charset => 'UTF-8' ); +print JSON->new->allow_blessed->convert_blessed->utf8->encode( $answer ); +exit 0; + +sub error { + my( $code, $msg ) = @_; + print $q->header( -type => 'text/plain', -charset => 'UTF-8', -status => $status{$code} ); + print "$msg\n"; + exit 0; +} diff --git a/idp_server/graphcalc_worker.pl b/idp_server/graphcalc_worker.pl new file mode 100755 index 0000000..320c5cd --- /dev/null +++ b/idp_server/graphcalc_worker.pl @@ -0,0 +1,210 @@ +#!/usr/bin/perl + +use strict; +use warnings; +use feature 'unicode_strings'; +use lib '/home/tla/stemmatology/lib'; +use Encode qw/ decode_utf8 /; +use Fcntl qw/ :flock /; +use Gearman::Worker; +use Graph; +use Graph::Reader::Dot; +use Text::Tradition::Directory; +use Text::Tradition::Stemma; +use IPC::Run qw/ run /; +use JSON; +use TryCatch; + +my $db = Text::Tradition::Directory->new( + 'dsn' => 'dbi:mysql:dbname=idpresult', + 'extra_args' => { 'user' => 'stemmaweb', 'password' => 'l@chmann' } ); +my @idp_programs = qw/ findGroupings findClasses /; +# there is also findSources but it is redundant for now +my $witness_map = {}; + +my $worker = Gearman::Worker->new(); +$worker->job_servers('127.0.0.1'); +$worker->register_function( run_idp => \&run_idp ); +$worker->work while 1; + +# Handle a request to run IDP on a list of Text::Tradition::Analysis::Result +# object IDs. Need to look these up in the DB, set their status to 'running', +# convert them to JSON, and send them off to be solved. + +sub run_idp { + my $job = shift; + print "Beginning IDP run for ID(s) " . $job->arg . "\n"; + my @problemids = split( /\s*,\s*/, $job->arg ); + my $scope = $db->new_scope(); + # Look up each problem ID and sort them into distinct groups by graph. + my %distinct_graphs; + my %dgproblems; # lookup table for problem ID -> set mapping + my $denom = 0; # the total number of problems we will solve + # Clear out the witness map + $witness_map = {}; + foreach my $problem ( @problemids ) { + my $result = $db->lookup( $problem ); + if( $result ) { + # Check to see if it already has an answer + if( $result->status && $result->status eq 'OK' ) { + print STDERR "Solution already recorded for Analysis::Result problem $problem\n"; + next; + } elsif( $result->status && $result->status eq 'running' ) { + print STDERR "Already working on Analysis::Result problem $problem\n"; + next; + } + # No? Then add it to our list. + $denom++; + $result->status( 'running' ); + $db->save( $result ); + $distinct_graphs{$result->graph} = [] + unless exists $distinct_graphs{$result->graph}; + push( @{$distinct_graphs{$result->graph}}, [ $result->sets ] ); + $dgproblems{$result->graph} = [] + unless exists $dgproblems{$result->graph}; + push( @{$dgproblems{$result->graph}}, $problem ); + } else { + print STDERR "Did not find Analysis::Result with ID $problem; skipping\n"; + } + } + + my $done = 0; + # Now for each graph problem set, sanitize it, convert it to JSON, + # and send it to IDP. + foreach my $dg ( keys %distinct_graphs ) { + my $idpdata = { graph => $dg, groupings => $distinct_graphs{$dg} }; + my $datastr = encode_json( _sanitize_names( $idpdata ) ); + my %idpanswer; + foreach my $program ( @idp_programs ) { + # Got the data, so send it to IDP. + $ENV{'PATH'} = '/bin:/usr/bin:/usr/local/bin'; + chdir('/usr/lib/byzantinist/idp'); + my @cmd = qw! idp -e !; + push( @cmd, "exec($program)", 'main.idp' ); + my( $ret, $err ); + run( \@cmd, \$datastr, \$ret, \$err ); + + if( $err =~ /^Error:/m ) { + print STDERR "Error running idp: $err\n"; + return; + } + + # Save the result for the given program + $idpanswer{$program} = _desanitize_names( decode_json( $ret ) ); + } + # Now map the results from IDP back into the database. + foreach my $idx ( 0 .. $#{$dgproblems{$dg}} ) { + my $result = $db->lookup( $dgproblems{$dg}->[$idx] ); + my $genanswer = $idpanswer{'findGroupings'}->[$idx]; + $result->is_genealogical( $genanswer->[1] ? 1 : 0 ); + + # We take the groupings as well as the classes from the + # findClasses answer, to make sure they match + my $classanswer = $idpanswer{'findClasses'}->[$idx]; + foreach my $grouping ( @{$classanswer->[0]} ) { + $result->record_grouping( $grouping ); + } + foreach my $class ( keys %{$classanswer->[1]} ) { + my $class_members = $classanswer->[1]->{$class}; + map { $result->set_class( $_, $class ) } @$class_members; + } + $result->status('OK'); + print "Saving new IDP result with ID key " . $result->object_key . "\n"; + $db->save( $result ); + } + + # Update the job status if we have more than one problem + if( scalar keys %distinct_graphs > 1 ) { + $done += scalar @{$dgproblems{$dg}}; + $job->set_status( $done, $denom ); + } + } + return $done; +} + +sub _sanitize_names { + my( $element ) = @_; + my $result; + if( ref( $element ) eq 'HASH' ) { + my $safe_hash = {}; + map { my $k = $_; $safe_hash->{$k} = _sanitize_names( $element->{$k} ) } keys %$element; + $result = $safe_hash; + } elsif( ref( $element ) eq 'ARRAY' || ref( $element ) eq 'Set::Scalar' ) { + $result = []; + foreach my $n ( @$element ) { + push( @$result, _sanitize_names( $n ) ); + } + } elsif( $element =~ /^digraph/ ) { + my $dotfh; + open( $dotfh, '<', \$element ); + binmode( $dotfh, ':utf8' ); + my $graph = Graph::Reader::Dot->new()->read_graph( $dotfh ); + die "Could not parse graph from dot: $element" unless $graph; + # Make a new graph with safe witness names + my $cgraph = Graph->new(); + foreach my $v ( $graph->vertices ) { + my $nv = _sanitize_names( $v ); + $cgraph->add_vertex( $nv ); + $cgraph->set_vertex_attribute( $nv, 'class', + $graph->get_vertex_attribute( $v, 'class' ) ); + } + foreach my $e ( $graph->edges ) { + my $ne = _sanitize_names( $e ); + $cgraph->add_edge( @$ne ); + } + # Now as well as sanitizing the graph we should prune away + # hypothetical leaf nodes, to optimize the problem for the solver. + _prune_hypotheticals( $cgraph ); + # Write the cloned graph out to a string + $result = Text::Tradition::Stemma::editable_graph( $cgraph, { linesep => ' ' } ); + $witness_map->{$result} = $element unless $result eq $element; + } else { + $result = _safe_witstr( $element ); + if( $result ne $element ) { + # Warn if witness_map conflicts + warn "Ambiguous transformation $result for $element vs. " + . $witness_map->{$result} + if( exists( $witness_map->{$result} ) + && $witness_map->{$result} ne $element ); + $witness_map->{$result} = $element; + } + } + return $result; +} + +sub _safe_witstr { + my $witstr = shift; + $witstr =~ s/\s+/_/g; + $witstr =~ s/[^\w\d-]//g; + return $witstr; +} + +sub _desanitize_names { + my( $element ) = @_; + my $result = []; + if( ref( $element ) eq 'ARRAY' ) { + foreach my $n ( @$element ) { + push( @$result, _desanitize_names( $n ) ); + } + } elsif( ref( $element ) eq 'HASH' ) { + my $real_hash = {}; + map { $real_hash->{$_} = _desanitize_names( $element->{$_} ) } keys %$element; + $result = $real_hash; + } elsif( exists $witness_map->{$element} ) { + $result = $witness_map->{$element} + } else { + $result = $element; + } + return $result; +} + +sub _prune_hypotheticals { + my $graph = shift; + my @orphan_hypotheticals; + do { + @orphan_hypotheticals = + grep { $graph->get_vertex_attribute( $_, 'class' ) + eq 'hypothetical' } $graph->successorless_vertices; + $graph->delete_vertices( @orphan_hypotheticals ); + } while( @orphan_hypotheticals ); +}