use strict;
use warnings;
use Benchmark;
+use Encode qw/ encode_utf8 /;
use Exporter 'import';
+use JSON qw/ encode_json decode_json /;
+use LWP::UserAgent;
use Text::Tradition;
use Text::Tradition::Stemma;
=head1 SUBROUTINES
-=head2 run_analysis( $tradition, $stemma_id, @merge_relationship_types )
+=head2 run_analysis( $tradition, %opts )
-Runs the analysis described in analyze_variant_location on every location
-in the collation of the given tradition, against the stemma specified in
-$stemma_id. If $stemma_id is not specified, it defaults to 0 (referencing
-the first stemma saved for the tradition.)
+Runs the analysis described in analyze_variant_location on every location in the
+collation of the given tradition, with the given options. These include:
-The optional @merge_relationship_types contains a list of relationship types
-to treat as equivalent for the analysis.
+=over 4
+
+=item * stemma_id - Specify which of the tradition's stemmata to use. Default
+is 0 (i.e. the first).
+
+=item * ranks - Specify a list of location ranks to analyze; exclude the rest.
+
+=item * merge_types - Specify a list of relationship types, where related readings
+should be treated as identical for the purposes of analysis.
+
+=back
=begin testing
my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
+my %expected_genealogical = (
+ 1 => '',
+ 2 => 1,
+ 3 => '',
+ 5 => '',
+ 7 => '',
+ 8 => '',
+ 10 => '',
+ 13 => 1,
+ 33 => '',
+ 34 => '',
+ 37 => '',
+ 60 => '',
+ 81 => 1,
+ 84 => '',
+ 87 => '',
+ 101 => '',
+ 102 => '',
+ 122 => 1,
+ 157 => '',
+ 166 => 1,
+ 169 => 1,
+ 200 => 1,
+ 216 => 1,
+ 217 => 1,
+ 219 => 1,
+ 241 => 1,
+ 242 => 1,
+ 243 => 1,
+);
+
my $data = run_analysis( $tradition );
-# TODO Check genealogical count
-is( $data->{'genealogical_count'}, 13, "Got right genealogical count" );
+foreach my $row ( @{$data->{'variants'}} ) {
+ is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
+ "Got correct genealogical flag for row " . $row->{'id'} );
+}
is( $data->{'conflict_count'}, 16, "Got right conflict count" );
is( $data->{'variant_count'}, 28, "Got right total variant number" );
=cut
sub run_analysis {
- my( $tradition, $stemma_id, @collapse ) = @_;
- $stemma_id = 0 unless $stemma_id;
-
- # Run the variant analysis on every rank in the graph that doesn't
- # have a common reading. Return the results.
- my @variants; # holds results from analyze_variant_location
- my $genealogical; # counter of 'genealogical' variants
- my $conflicts; # counter of conflicting readings
+ my( $tradition, %opts ) = @_;
+ my $c = $tradition->collation;
+
+ my $stemma_id = $opts{'stemma_id'} || 0;
+ my @ranks = @{$opts{'ranks'}} if ref( $opts{'ranks'} ) eq 'ARRAY';
+ my @collapse = @{$opts{'merge_types'}} if ref( $opts{'merge_types'} ) eq 'ARRAY';
+
+ # Get the stemma
+ my $stemma = $tradition->stemma( $stemma_id );
+ # Figure out which witnesses we are working with
+ my @lacunose = $stemma->hypotheticals;
+ push( @lacunose, _symmdiff( [ $stemma->witnesses ],
+ [ map { $_->sigil } $tradition->witnesses ] ) );
+
+ # Find and mark 'common' ranks for exclusion, unless they were
+ # explicitly specified.
+ unless( @ranks ) {
+ my %common_rank;
+ foreach my $rdg ( $tradition->collation->common_readings ) {
+ $common_rank{$rdg->rank} = 1;
+ }
+ @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
+ }
- # Find and mark 'common' ranks for exclusion.
- my %common_rank;
- foreach my $rdg ( $tradition->collation->common_readings ) {
- $common_rank{$rdg->rank} = 1;
+ # Group the variants to send to the solver
+ my @groups;
+ foreach my $rank ( @ranks ) {
+ push( @groups, group_variants( $tradition, $rank, \@lacunose, \@collapse ) );
}
- foreach my $rank ( 1 .. $tradition->collation->end->rank-1 ) {
- next if $common_rank{$rank};
- my $variant_row = analyze_variant_location(
- $tradition, $rank, $stemma_id, @collapse );
- next unless $variant_row;
- push( @variants, $variant_row );
- $genealogical++ if $variant_row->{'genealogical'};
- $conflicts += grep { $_->{'conflict'} } @{$variant_row->{'readings'}};
+ # Parse the answer
+ my $answer = solve_variants( $stemma->editable( ' ' ), @groups );
+
+ # Do further analysis on the answer
+ foreach my $idx ( 0 .. $#ranks ) {
+ my $location = $answer->{'variants'}->[$idx];
+ # Add the rank back in
+ $location->{'id'} = $ranks[$idx];
+ # Run the extra analysis we need.
+ # For each reading we need missing, conflict, reading_parents,
+ # independent_occurrence, followed, not_followed, follow_unknown
+ analyze_location( $tradition, $stemma->graph, $location );
}
- return {
- 'variants' => \@variants,
- 'variant_count' => scalar @variants, # TODO redundant
- 'conflict_count' => $conflicts,
- 'genealogical_count' => $genealogical,
- };
+ return $answer;
}
=head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
my %grouped_readings;
foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses } values %readings_at_rank ) {
# Skip readings that have been collapsed into others.
- next if exists $grouped_readings{$rdg->text} && !$grouped_readings{$rdg->text};
+ next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
my @wits = $rdg->witnesses;
if( $collapse ) {
my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
foreach my $other ( $rdg->related_readings( $filter ) ) {
push( @wits, $other->witnesses );
- $grouped_readings{$other->text} = 0;
+ $grouped_readings{$other->id} = 0;
}
}
- $grouped_readings{$rdg->text} = \@wits;
+ $grouped_readings{$rdg->id} = \@wits;
}
$grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
# Get rid of our collapsed readings
return \%grouped_readings;
}
+=head2 solve_variants( $graph, @groups )
+
+Sends the set of groups to the external graph solver service and returns
+a cleaned-up answer, adding the rank IDs back where they belong.
+
+The JSON has the form
+ { "graph": [ stemmagraph DOT string without newlines ],
+ "groupings": [ array of arrays of groups, one per rank ] }
+
+The answer has the form
+ { "variants" => [ array of variant location structures ],
+ "variant_count" => total,
+ "conflict_count" => number of conflicts detected,
+ "genealogical_count" => number of solutions found }
+
+=cut
+
+sub solve_variants {
+ my( $graph, @groups ) = @_;
+
+ # Make the json with stemma + groups
+ my $jsonstruct = { 'graph' => $graph, 'groupings' => [] };
+ foreach my $ghash ( @groups ) {
+ my @grouping;
+ foreach my $k ( sort keys %$ghash ) {
+ push( @grouping, $ghash->{$k} );
+ }
+ push( @{$jsonstruct->{'groupings'}}, \@grouping );
+ }
+ my $json = encode_json( $jsonstruct );
+
+ # Send it off and get the result
+ my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
+ my $ua = LWP::UserAgent->new();
+ my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
+ 'Content' => $json );
+
+ my $answer;
+ if( $resp->is_success ) {
+ $answer = decode_json( $resp->content );
+ } else {
+ # Either throw an error or fall back to the old method.
+ die "Solver returned " . $resp->status_line . " / " . $resp->content;
+ }
+
+ # Fold the result back into what we know about the groups.
+ my $variants = [];
+ my $genealogical = 0;
+ foreach my $idx ( 0 .. $#groups ) {
+ my( $calc_groups, $result ) = @{$answer->[$idx]};
+ $genealogical++ if $result;
+ my $input_group = $groups[$idx];
+ foreach my $k ( sort keys %$input_group ) {
+ my $cg = shift @$calc_groups;
+ $input_group->{$k} = $cg;
+ }
+ my $vstruct = {
+ 'genealogical' => $result,
+ 'readings' => [],
+ }
+ foreach my $k ( keys %$input_group ) {
+ push( @{$vstruct->{'readings'}},
+ { 'readingid' => $k, 'group' => $dg } );
+ }
+ push( @$variants, $vstruct );
+ }
+
+ return { 'variants' => $variants,
+ 'variant_count' => scalar @$variants,
+ 'genealogical_count' => $genealogical };
+}
+
=head2 analyze_variant_location( $tradition, $rank, $stemma_id, @merge_relationship_types )
Runs an analysis of the given tradition, at the location given in $rank,
{ 'id' => $rank,
'genealogical' => boolean,
- 'readings => [ { text => $reading_text,
+ 'readings => [ { readingid => $reading_id,
group => [ witnesses ],
conflict => [ conflicting ],
missing => [ excluded ] }, ... ]
my %reading_roots;
my $variant_row = { 'id' => $rank, 'readings' => [] };
# Mark each ms as in its own group, first.
+ $DB::single = 1 if $rank == 81;
foreach my $g ( @$groups ) {
my $gst = wit_stringify( $g );
map { $contig->{$_} = $gst } @$g;
# Now for each unmarked node in the graph, initialize an array
# for possible group memberships. We will use this later to
# resolve potential conflicts.
- $DB::single = 1 if $rank == 636;
map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
my $gst = wit_stringify( $g ); # This is the group name
}
} else {
# Dispense with the trivial case of one reading.
- @group_roots = @$g;
- _prune_subtree( $part, @group_roots, $contig );
+ my $wit = pop @$g;
+ @group_roots = ( $wit );
+ foreach my $v ( $part->vertices ) {
+ $part->delete_vertex( $v ) unless $v eq $wit;
+ }
}
}
# Start to write the reading, and save the group subgraph.
- my $reading = { 'text' => $group_readings->{$gst},
+ my $reading = { 'readingid' => $group_readings->{$gst},
'missing' => wit_stringify( \@lacunose ),
'group' => $gst }; # This will change if we find no conflict
# Save the relevant subgraph.
$rdghash->{'independent_occurrence'} = scalar @roots;
$rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
# Find the parent readings, if any, of this reading.
- my @rdgparents;
+ my %rdgparents;
foreach my $wit ( @roots ) {
- # Look in the main stemma to find this witness's parent(s), and look
- # up the reading that the parent holds.
- foreach my $wparent( $graph->predecessors( $wit ) ) {
- my $pgroup = $contig->{$wparent};
- if( $pgroup ) {
- push( @rdgparents, $group_readings->{$pgroup} );
- }
- }
+ # Look in the main stemma to find this witness's extant or known-reading
+ # immediate ancestor(s), and look up the reading that each ancestor olds.
+ my @check = $graph->predecessors( $wit );
+ while( @check ) {
+ my @next;
+ foreach my $wparent( @check ) {
+ my $pgroup = $contig->{$wparent};
+ if( $pgroup ) {
+ $rdgparents{$group_readings->{$pgroup}} = 1;
+ } else {
+ push( @next, $graph->predecessors( $wparent ) );
+ }
+ }
+ @check = @next;
+ }
}
- $rdghash->{'reading_parents'} = \@rdgparents;
+ $rdghash->{'reading_parents'} = [ keys %rdgparents ];
# Find the number of times this reading was altered, and the number of
# times we're not sure.
# Now write the group and conflict information into the respective rows.
foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
- $rdghash->{'conflict'} = $conflict->{$rdghash->{'text'}};
+ $rdghash->{'conflict'} = $conflict->{$rdghash->{'readingid'}};
my @members = grep { $contig->{$_} eq $rdghash->{'group'} } keys %$contig;
$rdghash->{'group'} = wit_stringify( \@members );
}