works now apart from the perl solver fallback
[scpubgit/stemmatology.git] / lib / Text / Tradition / Analysis.pm
CommitLineData
d71100ed 1package Text::Tradition::Analysis;
2
3use strict;
4use warnings;
e4386ba9 5use Benchmark;
88a6bac5 6use Encode qw/ encode_utf8 /;
d1348d38 7use Exporter 'import';
88a6bac5 8use JSON qw/ encode_json decode_json /;
9use LWP::UserAgent;
d71100ed 10use Text::Tradition;
11use Text::Tradition::Stemma;
12
d1348d38 13use vars qw/ @EXPORT_OK /;
a2cf85dd 14@EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
d1348d38 15
7f52eac8 16=head1 NAME
17
18Text::Tradition::Analysis - functions for stemma analysis of a tradition
19
20=head1 SYNOPSIS
21
22 use Text::Tradition;
23 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
24 my $t = Text::Tradition->new(
25 'name' => 'this is a text',
26 'input' => 'TEI',
27 'file' => '/path/to/tei_parallel_seg_file.xml' );
28 $t->add_stemma( 'dotfile' => $stemmafile );
29
30 my $variant_data = run_analysis( $tradition );
31 # Recalculate rank $n treating all orthographic variants as equivalent
32 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
33
34=head1 DESCRIPTION
35
36Text::Tradition is a library for representation and analysis of collated
37texts, particularly medieval ones. The Collation is the central feature of
38a Tradition, where the text, its sequence of readings, and its relationships
39between readings are actually kept.
40
41=head1 SUBROUTINES
42
88a6bac5 43=head2 run_analysis( $tradition, %opts )
7f52eac8 44
88a6bac5 45Runs the analysis described in analyze_variant_location on every location in the
46collation of the given tradition, with the given options. These include:
7f52eac8 47
88a6bac5 48=over 4
49
50=item * stemma_id - Specify which of the tradition's stemmata to use. Default
51is 0 (i.e. the first).
52
53=item * ranks - Specify a list of location ranks to analyze; exclude the rest.
54
55=item * merge_types - Specify a list of relationship types, where related readings
56should be treated as identical for the purposes of analysis.
57
58=back
7f52eac8 59
60=begin testing
61
62use Text::Tradition;
63use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
64
65my $datafile = 't/data/florilegium_tei_ps.xml';
66my $tradition = Text::Tradition->new( 'input' => 'TEI',
67 'name' => 'test0',
68 'file' => $datafile );
69my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
70is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
71
f00cefe8 72my %expected_genealogical = (
73 1 => '',
74 2 => 1,
75 3 => '',
76 5 => '',
77 7 => '',
78 8 => '',
79 10 => '',
80 13 => 1,
81 33 => '',
82 34 => '',
83 37 => '',
84 60 => '',
85 81 => 1,
86 84 => '',
87 87 => '',
88 101 => '',
89 102 => '',
90 122 => 1,
91 157 => '',
92 166 => 1,
93 169 => 1,
94 200 => 1,
95 216 => 1,
96 217 => 1,
97 219 => 1,
98 241 => 1,
99 242 => 1,
100 243 => 1,
101);
102
7f52eac8 103my $data = run_analysis( $tradition );
f00cefe8 104foreach my $row ( @{$data->{'variants'}} ) {
105 is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
106 "Got correct genealogical flag for row " . $row->{'id'} );
107}
5be0cdeb 108is( $data->{'conflict_count'}, 16, "Got right conflict count" );
109is( $data->{'variant_count'}, 28, "Got right total variant number" );
7f52eac8 110
111=end testing
112
113=cut
114
d71100ed 115sub run_analysis {
88a6bac5 116 my( $tradition, %opts ) = @_;
f00cefe8 117 my $c = $tradition->collation;
88a6bac5 118
119 my $stemma_id = $opts{'stemma_id'} || 0;
120 my @ranks = @{$opts{'ranks'}} if ref( $opts{'ranks'} ) eq 'ARRAY';
121 my @collapse = @{$opts{'merge_types'}} if ref( $opts{'merge_types'} ) eq 'ARRAY';
122
123 # Get the stemma
124 my $stemma = $tradition->stemma( $stemma_id );
125 # Figure out which witnesses we are working with
126 my @lacunose = $stemma->hypotheticals;
fae07016 127 my @tradition_wits = map { $_->sigil } $tradition->witnesses;
128 map { push( @tradition_wits, $_->sigil."_ac" ) if $_->is_layered }
129 $tradition->witnesses;
130 push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
88a6bac5 131
132 # Find and mark 'common' ranks for exclusion, unless they were
133 # explicitly specified.
134 unless( @ranks ) {
135 my %common_rank;
136 foreach my $rdg ( $tradition->collation->common_readings ) {
137 $common_rank{$rdg->rank} = 1;
138 }
139 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
d71100ed 140 }
7f52eac8 141
88a6bac5 142 # Group the variants to send to the solver
143 my @groups;
144 foreach my $rank ( @ranks ) {
145 push( @groups, group_variants( $tradition, $rank, \@lacunose, \@collapse ) );
d71100ed 146 }
147
88a6bac5 148 # Parse the answer
149 my $answer = solve_variants( $stemma->editable( ' ' ), @groups );
fae07016 150 $DB::single = 1;
151
88a6bac5 152 # Do further analysis on the answer
153 foreach my $idx ( 0 .. $#ranks ) {
154 my $location = $answer->{'variants'}->[$idx];
155 # Add the rank back in
156 $location->{'id'} = $ranks[$idx];
157 # Run the extra analysis we need.
88a6bac5 158 analyze_location( $tradition, $stemma->graph, $location );
159 }
f00cefe8 160
88a6bac5 161 return $answer;
d71100ed 162}
163
7f52eac8 164=head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
165
166Groups the variants at the given $rank of the collation, treating any
167relationships in @merge_relationship_types as equivalent. $lacunose should
168be a reference to an array, to which the sigla of lacunose witnesses at this
169rank will be appended.
170
171Returns two ordered lists $readings, $groups, where $readings->[$n] is attested
172by the witnesses listed in $groups->[$n].
173
174=cut
175
176# Return group_readings, groups, lacunose
d1348d38 177sub group_variants {
7f52eac8 178 my( $tradition, $rank, $lacunose, $collapse ) = @_;
179 my $c = $tradition->collation;
fae07016 180 my $aclabel = $c->ac_label;
7f52eac8 181 # Get the alignment table readings
182 my %readings_at_rank;
183 my @gap_wits;
184 foreach my $tablewit ( @{$tradition->collation->alignment_table->{'alignment'}} ) {
185 my $rdg = $tablewit->{'tokens'}->[$rank-1];
fae07016 186 my $wit = $tablewit->{'witness'};
187 $wit =~ s/^(.*)\Q$aclabel\E$/${1}_ac/;
7f52eac8 188 if( $rdg && $rdg->{'t'}->is_lacuna ) {
fae07016 189 _add_to_witlist( $wit, $lacunose, '_ac' );
7f52eac8 190 } elsif( $rdg ) {
191 $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
192 } else {
fae07016 193 _add_to_witlist( $wit, \@gap_wits, '_ac' );
7f52eac8 194 }
195 }
d1348d38 196
7f52eac8 197 # Group the readings, collapsing groups by relationship if needed
198 my %grouped_readings;
199 foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses } values %readings_at_rank ) {
200 # Skip readings that have been collapsed into others.
f00cefe8 201 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
7f52eac8 202 my @wits = $rdg->witnesses;
fae07016 203 map { s/\Q$aclabel\E$/_ac/ } @wits;
7f52eac8 204 if( $collapse ) {
205 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
206 foreach my $other ( $rdg->related_readings( $filter ) ) {
fae07016 207 my @otherwits = $other->witnesses;
208 map { s/\Q$aclabel\E$/_ac/ } @otherwits;
209 push( @wits, @otherwits );
f00cefe8 210 $grouped_readings{$other->id} = 0;
d1348d38 211 }
212 }
f00cefe8 213 $grouped_readings{$rdg->id} = \@wits;
7f52eac8 214 }
215 $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
216 # Get rid of our collapsed readings
217 map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
218 keys %grouped_readings
219 if $collapse;
220
5be0cdeb 221 return \%grouped_readings;
d1348d38 222}
223
88a6bac5 224=head2 solve_variants( $graph, @groups )
225
226Sends the set of groups to the external graph solver service and returns
227a cleaned-up answer, adding the rank IDs back where they belong.
228
229The JSON has the form
230 { "graph": [ stemmagraph DOT string without newlines ],
231 "groupings": [ array of arrays of groups, one per rank ] }
232
233The answer has the form
234 { "variants" => [ array of variant location structures ],
235 "variant_count" => total,
236 "conflict_count" => number of conflicts detected,
237 "genealogical_count" => number of solutions found }
238
239=cut
240
241sub solve_variants {
242 my( $graph, @groups ) = @_;
243
244 # Make the json with stemma + groups
245 my $jsonstruct = { 'graph' => $graph, 'groupings' => [] };
246 foreach my $ghash ( @groups ) {
247 my @grouping;
248 foreach my $k ( sort keys %$ghash ) {
249 push( @grouping, $ghash->{$k} );
250 }
251 push( @{$jsonstruct->{'groupings'}}, \@grouping );
252 }
253 my $json = encode_json( $jsonstruct );
254
255 # Send it off and get the result
256 my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
257 my $ua = LWP::UserAgent->new();
258 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
259 'Content' => $json );
260
261 my $answer;
262 if( $resp->is_success ) {
263 $answer = decode_json( $resp->content );
264 } else {
fae07016 265 # Fall back to the old method.
266 warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
267 . "; falling back to perl method";
268 $answer = perl_solver( $graph, @groups );
88a6bac5 269 }
270
271 # Fold the result back into what we know about the groups.
272 my $variants = [];
273 my $genealogical = 0;
274 foreach my $idx ( 0 .. $#groups ) {
275 my( $calc_groups, $result ) = @{$answer->[$idx]};
276 $genealogical++ if $result;
277 my $input_group = $groups[$idx];
278 foreach my $k ( sort keys %$input_group ) {
279 my $cg = shift @$calc_groups;
280 $input_group->{$k} = $cg;
281 }
282 my $vstruct = {
283 'genealogical' => $result,
284 'readings' => [],
fae07016 285 };
88a6bac5 286 foreach my $k ( keys %$input_group ) {
287 push( @{$vstruct->{'readings'}},
fae07016 288 { 'readingid' => $k, 'group' => $input_group->{$k}} );
88a6bac5 289 }
290 push( @$variants, $vstruct );
291 }
292
293 return { 'variants' => $variants,
294 'variant_count' => scalar @$variants,
295 'genealogical_count' => $genealogical };
296}
297
fae07016 298=head2 analyze_location ( $tradition, $graph, $location_hash )
7f52eac8 299
fae07016 300Given the tradition, its stemma graph, and the solution from the graph solver,
301work out the rest of the information we want. For each reading we need missing,
302conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown. Alters the location_hash in place.
7f52eac8 303
304=cut
732152b1 305
fae07016 306sub analyze_location {
307 my ( $tradition, $graph, $variant_row ) = @_;
308
309 # Make a hash of all known node memberships, and make the subgraphs.
310 my $contig = {};
311 my $reading_roots = {};
312 my $subgraph = {};
313 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
314 my $rid = $rdghash->{'readingid'};
315 map { $contig->{$_} = $rid } @{$rdghash->{'group'}};
c4a4fb1b 316
fae07016 317 # Make the subgraph.
318 my $part = $graph->copy;
319 my %these_vertices;
320 map { $these_vertices{$_} = 1 } @{$rdghash->{'group'}};
321 $part->delete_vertices( grep { !$these_vertices{$_} } $part->vertices );
322 $subgraph->{$rid} = $part;
323 # Get the reading roots.
324 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
bebec0e9 325 }
326
fae07016 327 # Now that we have all the node group memberships, calculate followed/
bebec0e9 328 # non-followed/unknown values for each reading. Also figure out the
329 # reading's evident parent(s).
330 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
fae07016 331 # Group string key - TODO do we need this?
332 my $gst = wit_stringify( $rdghash->{'group'} );
333 my $rid = $rdghash->{'readingid'};
334 # Get the subgraph
335 my $part = $subgraph->{$rid};
336
337 # Start figuring things out.
bebec0e9 338 my @roots = $part->predecessorless_vertices;
339 $rdghash->{'independent_occurrence'} = scalar @roots;
340 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
341 # Find the parent readings, if any, of this reading.
f00cefe8 342 my %rdgparents;
fae07016 343 $DB::single = 1;
bebec0e9 344 foreach my $wit ( @roots ) {
f00cefe8 345 # Look in the main stemma to find this witness's extant or known-reading
346 # immediate ancestor(s), and look up the reading that each ancestor olds.
347 my @check = $graph->predecessors( $wit );
348 while( @check ) {
349 my @next;
350 foreach my $wparent( @check ) {
fae07016 351 my $preading = $contig->{$wparent};
352 if( $preading ) {
353 $rdgparents{$preading} = 1;
f00cefe8 354 } else {
355 push( @next, $graph->predecessors( $wparent ) );
356 }
357 }
358 @check = @next;
359 }
bebec0e9 360 }
f00cefe8 361 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
bebec0e9 362
363 # Find the number of times this reading was altered, and the number of
364 # times we're not sure.
365 my( %nofollow, %unknownfollow );
366 foreach my $wit ( $part->vertices ) {
367 foreach my $wchild ( $graph->successors( $wit ) ) {
368 next if $part->has_vertex( $wchild );
fae07016 369 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
bebec0e9 370 # It definitely changed here.
371 $nofollow{$wchild} = 1;
372 } elsif( !($contig->{$wchild}) ) {
373 # The child is a hypothetical node not definitely in
374 # any group. Answer is unknown.
375 $unknownfollow{$wchild} = 1;
376 } # else it's a non-root node in a known group, and therefore
377 # is presumed to have its reading from its group, not this link.
378 }
379 }
380 $rdghash->{'not_followed'} = keys %nofollow;
381 $rdghash->{'follow_unknown'} = keys %unknownfollow;
fae07016 382
383 # Now say whether this reading represents a conflict.
384 unless( $variant_row->{'genealogical'} ) {
385 $rdghash->{'conflict'} = @roots != 1;
386 }
c4a4fb1b 387 }
d71100ed 388}
389
fae07016 390
391=head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
392
393** NOTE ** This method should hopefully not be called - it is not guaranteed
394to be correct. Serves as a backup for the real solver.
395
396Runs an analysis of the given tradition, at the location given in $rank,
397against the graph of the stemma specified in $stemma_id. The argument
398@merge_relationship_types is an optional list of relationship types for
399which readings so related should be treated as equivalent.
400
401Returns a nested array data structure as follows:
402
403 [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
404
405where the group list is the array of arrays passed in for each element of @groups,
406possibly with the addition of hypothetical readings.
407
408
409=cut
410
411sub perl_solver {
412 my( $graph, @groups ) = @_;
413
414 warn "Not implemented yet";
415 return [];
416}
417
418 # Now do the work.
419# my $contig = {};
420# my $subgraph = {};
421# my $is_conflicted;
422# my $conflict = {};
423# my %reading_roots;
424# my $variant_row = { 'id' => $rank, 'readings' => [] };
425# # Mark each ms as in its own group, first.
426# foreach my $g ( @$groups ) {
427# my $gst = wit_stringify( $g );
428# map { $contig->{$_} = $gst } @$g;
429# }
430# # Now for each unmarked node in the graph, initialize an array
431# # for possible group memberships. We will use this later to
432# # resolve potential conflicts.
433# map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
434# foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
435# my $gst = wit_stringify( $g ); # This is the group name
436# # Copy the graph, and delete all non-members from the new graph.
437# my $part = $graph->copy;
438# my @group_roots;
439# $part->delete_vertices(
440# grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
441#
442# # Now look to see if our group is connected.
443# if( $undirected ) { # For use with distance trees etc.
444# # Find all vertices reachable from the first (arbitrary) group
445# # member. If we are genealogical this should include them all.
446# my $reachable = {};
447# map { $reachable->{$_} = 1 } $part->all_reachable( $g->[0] );
448# # TODO This is a terrible way to do distance trees, since all
449# # non-leaf nodes are included in every graph part now. We may
450# # have to go back to SPDP.
451# } else {
452# if( @$g > 1 ) {
453# # We have to take directionality into account.
454# # How many root nodes do we have?
455# my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
456# $part->predecessorless_vertices;
457# # Assuming that @$g > 1, find the first root node that has at
458# # least one successor belonging to our group. If this reading
459# # is genealogical, there should be only one, but we will check
460# # that implicitly later.
461# foreach my $root ( @roots ) {
462# # Prune the tree to get rid of extraneous hypotheticals.
463# $root = _prune_subtree( $part, $root, $contig );
464# next unless $root;
465# # Save this root for our group.
466# push( @group_roots, $root );
467# # Get all the successor nodes of our root.
468# }
469# } else {
470# # Dispense with the trivial case of one reading.
471# my $wit = pop @$g;
472# @group_roots = ( $wit );
473# foreach my $v ( $part->vertices ) {
474# $part->delete_vertex( $v ) unless $v eq $wit;
475# }
476# }
477# }
478#
479# map { $reading_roots{$_} = 1 } @group_roots;
480# if( @group_roots > 1 ) {
481# $conflict->{$group_readings->{$gst}} = 1;
482# $is_conflicted = 1;
483# }
484# # Paint the 'hypotheticals' with our group.
485# foreach my $wit ( $part->vertices ) {
486# if( ref( $contig->{$wit} ) ) {
487# push( @{$contig->{$wit}}, $gst );
488# } elsif( $contig->{$wit} ne $gst ) {
489# warn "How did we get here?";
490# }
491# }
492#
493#
494# # Start to write the reading, and save the group subgraph.
495# my $reading = { 'readingid' => $group_readings->{$gst},
496# 'missing' => wit_stringify( \@lacunose ),
497# 'group' => $gst }; # This will change if we find no conflict
498# # Save the relevant subgraph.
499# $subgraph->{$gst} = $part;
500# push( @{$variant_row->{'readings'}}, $reading );
501# }
502#
503# # For each of our hypothetical readings, flatten its 'contig' array if
504# # the array contains zero or one group. If we have any unflattened arrays,
505# # we may need to run the resolution process. If the reading is already known
506# # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
507# # it.
508# my @resolve;
509# foreach my $wit ( keys %$contig ) {
510# next unless ref( $contig->{$wit} );
511# if( @{$contig->{$wit}} > 1 ) {
512# if( $is_conflicted ) {
513# $contig->{$wit} = ''; # We aren't going to decide.
514# } else {
515# push( @resolve, $wit );
516# }
517# } else {
518# my $gst = pop @{$contig->{$wit}};
519# $contig->{$wit} = $gst || '';
520# }
521# }
522#
523# if( @resolve ) {
524# my $still_contig = {};
525# foreach my $h ( @resolve ) {
526# # For each of the hypothetical readings with more than one possibility,
527# # try deleting it from each of its member subgraphs in turn, and see
528# # if that breaks the contiguous grouping.
529# # TODO This can still break in a corner case where group A can use
530# # either vertex 1 or 2, and group B can use either vertex 2 or 1.
531# # Revisit this if necessary; it could get brute-force nasty.
532# foreach my $gst ( @{$contig->{$h}} ) {
533# my $gpart = $subgraph->{$gst}->copy();
534# # If we have come this far, there is only one root and everything
535# # is reachable from it.
536# my( $root ) = $gpart->predecessorless_vertices;
537# my $reachable = {};
538# map { $reachable->{$_} = 1 } $gpart->vertices;
539#
540# # Try deleting the hypothetical node.
541# $gpart->delete_vertex( $h );
542# if( $h eq $root ) {
543# # See if we still have a single root.
544# my @roots = $gpart->predecessorless_vertices;
545# warn "This shouldn't have happened" unless @roots;
546# if( @roots > 1 ) {
547# # $h is needed by this group.
548# if( exists( $still_contig->{$h} ) ) {
549# # Conflict!
550# $conflict->{$group_readings->{$gst}} = 1;
551# $still_contig->{$h} = '';
552# } else {
553# $still_contig->{$h} = $gst;
554# }
555# }
556# } else {
557# # $h is somewhere in the middle. See if everything
558# # else can still be reached from the root.
559# my %still_reachable = ( $root => 1 );
560# map { $still_reachable{$_} = 1 }
561# $gpart->all_successors( $root );
562# foreach my $v ( keys %$reachable ) {
563# next if $v eq $h;
564# if( !$still_reachable{$v}
565# && ( $contig->{$v} eq $gst
566# || ( exists $still_contig->{$v}
567# && $still_contig->{$v} eq $gst ) ) ) {
568# # We need $h.
569# if( exists $still_contig->{$h} ) {
570# # Conflict!
571# $conflict->{$group_readings->{$gst}} = 1;
572# $still_contig->{$h} = '';
573# } else {
574# $still_contig->{$h} = $gst;
575# }
576# last;
577# } # else we don't need $h in this group.
578# } # end foreach $v
579# } # endif $h eq $root
580# } # end foreach $gst
581# } # end foreach $h
582#
583# # Now we have some hypothetical vertices in $still_contig that are the
584# # "real" group memberships. Replace these in $contig.
585# foreach my $v ( keys %$contig ) {
586# next unless ref $contig->{$v};
587# $contig->{$v} = $still_contig->{$v};
588# }
589# } # end if @resolve
590#
591#
592# $variant_row->{'genealogical'} = !( keys %$conflict );
593# return $variant_row;
594# }
595
7f52eac8 596sub _prune_subtree {
231d71fc 597 my( $tree, $root, $contighash ) = @_;
598 # First, delete hypothetical leaves / orphans until there are none left.
599 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
600 $tree->successorless_vertices;
601 while( @orphan_hypotheticals ) {
602 $tree->delete_vertices( @orphan_hypotheticals );
603 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
604 $tree->successorless_vertices;
605 }
606 # Then delete a hypothetical root with only one successor, moving the
bebec0e9 607 # root to the first child that has no other predecessors.
231d71fc 608 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
609 my @nextroot = $tree->successors( $root );
610 $tree->delete_vertex( $root );
bebec0e9 611 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
231d71fc 612 }
613 # The tree has been modified in place, but we need to know the new root.
bebec0e9 614 $root = undef unless $root && $tree->has_vertex( $root );
231d71fc 615 return $root;
616}
d71100ed 617# Add the variant, subject to a.c. representation logic.
618# This assumes that we will see the 'main' version before the a.c. version.
619sub add_variant_wit {
620 my( $arr, $wit, $acstr ) = @_;
621 my $skip;
622 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
623 my $real = $1;
624 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
625 }
626 push( @$arr, $wit ) unless $skip;
627}
628
5be0cdeb 629sub _useful_variant {
630 my( $group_readings, $graph, $acstr ) = @_;
631
632 # TODO Decide what to do with AC witnesses
633
634 # Sort by group size and return
635 my $is_useful = 0;
636 my( @readings, @groups ); # The sorted groups for our answer.
637 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
638 keys %$group_readings ) {
639 push( @readings, $rdg );
640 push( @groups, $group_readings->{$rdg} );
641 if( @{$group_readings->{$rdg}} > 1 ) {
642 $is_useful++;
643 } else {
644 my( $wit ) = @{$group_readings->{$rdg}};
645 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
646 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
647 }
648 }
649 if( $is_useful > 1 ) {
650 return( \@readings, \@groups );
651 } else {
652 return( [], [] );
653 }
654}
655
7f52eac8 656=head2 wit_stringify( $groups )
657
658Takes an array of witness groupings and produces a string like
659['A','B'] / ['C','D','E'] / ['F']
d71100ed 660
7f52eac8 661=cut
d71100ed 662
663sub wit_stringify {
664 my $groups = shift;
665 my @gst;
666 # If we were passed an array of witnesses instead of an array of
667 # groupings, then "group" the witnesses first.
668 unless( ref( $groups->[0] ) ) {
669 my $mkgrp = [ $groups ];
670 $groups = $mkgrp;
671 }
672 foreach my $g ( @$groups ) {
673 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
674 }
675 return join( ' / ', @gst );
676}
7f52eac8 677
5be0cdeb 678# Helper function to ensure that X and X a.c. never appear in the same list.
679sub _add_to_witlist {
680 my( $wit, $list, $acstr ) = @_;
681 my %inlist;
682 my $idx = 0;
683 map { $inlist{$_} = $idx++ } @$list;
684 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
685 my $acwit = $1;
686 unless( exists $inlist{$acwit} ) {
687 push( @$list, $acwit.$acstr );
688 }
689 } else {
690 if( exists( $inlist{$wit.$acstr} ) ) {
691 # Replace the a.c. version with the main witness
692 my $i = $inlist{$wit.$acstr};
693 $list->[$i] = $wit;
694 } else {
695 push( @$list, $wit );
696 }
697 }
698}
699
bebec0e9 700sub _symmdiff {
701 my( $lista, $listb ) = @_;
7f52eac8 702 my %union;
703 my %scalars;
704 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
705 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
bebec0e9 706 my @set = grep { $union{$_} == 1 } keys %union;
7f52eac8 707 return map { $scalars{$_} } @set;
708}
709
7101;
711
712=head1 LICENSE
713
714This package is free software and is provided "as is" without express
715or implied warranty. You can redistribute it and/or modify it under
716the same terms as Perl itself.
717
718=head1 AUTHOR
719
720Tara L Andrews E<lt>aurum@cpan.orgE<gt>