UNFINISHED change to Analysis to incorporate IDP solver
[scpubgit/stemmatology.git] / lib / Text / Tradition / Analysis.pm
CommitLineData
d71100ed 1package Text::Tradition::Analysis;
2
3use strict;
4use warnings;
e4386ba9 5use Benchmark;
88a6bac5 6use Encode qw/ encode_utf8 /;
d1348d38 7use Exporter 'import';
88a6bac5 8use JSON qw/ encode_json decode_json /;
9use LWP::UserAgent;
d71100ed 10use Text::Tradition;
11use Text::Tradition::Stemma;
12
d1348d38 13use vars qw/ @EXPORT_OK /;
a2cf85dd 14@EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
d1348d38 15
7f52eac8 16=head1 NAME
17
18Text::Tradition::Analysis - functions for stemma analysis of a tradition
19
20=head1 SYNOPSIS
21
22 use Text::Tradition;
23 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
24 my $t = Text::Tradition->new(
25 'name' => 'this is a text',
26 'input' => 'TEI',
27 'file' => '/path/to/tei_parallel_seg_file.xml' );
28 $t->add_stemma( 'dotfile' => $stemmafile );
29
30 my $variant_data = run_analysis( $tradition );
31 # Recalculate rank $n treating all orthographic variants as equivalent
32 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
33
34=head1 DESCRIPTION
35
36Text::Tradition is a library for representation and analysis of collated
37texts, particularly medieval ones. The Collation is the central feature of
38a Tradition, where the text, its sequence of readings, and its relationships
39between readings are actually kept.
40
41=head1 SUBROUTINES
42
88a6bac5 43=head2 run_analysis( $tradition, %opts )
7f52eac8 44
88a6bac5 45Runs the analysis described in analyze_variant_location on every location in the
46collation of the given tradition, with the given options. These include:
7f52eac8 47
88a6bac5 48=over 4
49
50=item * stemma_id - Specify which of the tradition's stemmata to use. Default
51is 0 (i.e. the first).
52
53=item * ranks - Specify a list of location ranks to analyze; exclude the rest.
54
55=item * merge_types - Specify a list of relationship types, where related readings
56should be treated as identical for the purposes of analysis.
57
58=back
7f52eac8 59
60=begin testing
61
62use Text::Tradition;
63use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
64
65my $datafile = 't/data/florilegium_tei_ps.xml';
66my $tradition = Text::Tradition->new( 'input' => 'TEI',
67 'name' => 'test0',
68 'file' => $datafile );
69my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
70is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
71
f00cefe8 72my %expected_genealogical = (
73 1 => '',
74 2 => 1,
75 3 => '',
76 5 => '',
77 7 => '',
78 8 => '',
79 10 => '',
80 13 => 1,
81 33 => '',
82 34 => '',
83 37 => '',
84 60 => '',
85 81 => 1,
86 84 => '',
87 87 => '',
88 101 => '',
89 102 => '',
90 122 => 1,
91 157 => '',
92 166 => 1,
93 169 => 1,
94 200 => 1,
95 216 => 1,
96 217 => 1,
97 219 => 1,
98 241 => 1,
99 242 => 1,
100 243 => 1,
101);
102
7f52eac8 103my $data = run_analysis( $tradition );
f00cefe8 104foreach my $row ( @{$data->{'variants'}} ) {
105 is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
106 "Got correct genealogical flag for row " . $row->{'id'} );
107}
5be0cdeb 108is( $data->{'conflict_count'}, 16, "Got right conflict count" );
109is( $data->{'variant_count'}, 28, "Got right total variant number" );
7f52eac8 110
111=end testing
112
113=cut
114
d71100ed 115sub run_analysis {
88a6bac5 116 my( $tradition, %opts ) = @_;
f00cefe8 117 my $c = $tradition->collation;
88a6bac5 118
119 my $stemma_id = $opts{'stemma_id'} || 0;
120 my @ranks = @{$opts{'ranks'}} if ref( $opts{'ranks'} ) eq 'ARRAY';
121 my @collapse = @{$opts{'merge_types'}} if ref( $opts{'merge_types'} ) eq 'ARRAY';
122
123 # Get the stemma
124 my $stemma = $tradition->stemma( $stemma_id );
125 # Figure out which witnesses we are working with
126 my @lacunose = $stemma->hypotheticals;
127 push( @lacunose, _symmdiff( [ $stemma->witnesses ],
128 [ map { $_->sigil } $tradition->witnesses ] ) );
129
130 # Find and mark 'common' ranks for exclusion, unless they were
131 # explicitly specified.
132 unless( @ranks ) {
133 my %common_rank;
134 foreach my $rdg ( $tradition->collation->common_readings ) {
135 $common_rank{$rdg->rank} = 1;
136 }
137 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
d71100ed 138 }
7f52eac8 139
88a6bac5 140 # Group the variants to send to the solver
141 my @groups;
142 foreach my $rank ( @ranks ) {
143 push( @groups, group_variants( $tradition, $rank, \@lacunose, \@collapse ) );
d71100ed 144 }
145
88a6bac5 146 # Parse the answer
147 my $answer = solve_variants( $stemma->editable( ' ' ), @groups );
148
149 # Do further analysis on the answer
150 foreach my $idx ( 0 .. $#ranks ) {
151 my $location = $answer->{'variants'}->[$idx];
152 # Add the rank back in
153 $location->{'id'} = $ranks[$idx];
154 # Run the extra analysis we need.
155 # For each reading we need missing, conflict, reading_parents,
156 # independent_occurrence, followed, not_followed, follow_unknown
157 analyze_location( $tradition, $stemma->graph, $location );
158 }
f00cefe8 159
88a6bac5 160 return $answer;
d71100ed 161}
162
7f52eac8 163=head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
164
165Groups the variants at the given $rank of the collation, treating any
166relationships in @merge_relationship_types as equivalent. $lacunose should
167be a reference to an array, to which the sigla of lacunose witnesses at this
168rank will be appended.
169
170Returns two ordered lists $readings, $groups, where $readings->[$n] is attested
171by the witnesses listed in $groups->[$n].
172
173=cut
174
175# Return group_readings, groups, lacunose
d1348d38 176sub group_variants {
7f52eac8 177 my( $tradition, $rank, $lacunose, $collapse ) = @_;
178 my $c = $tradition->collation;
179 # Get the alignment table readings
180 my %readings_at_rank;
181 my @gap_wits;
182 foreach my $tablewit ( @{$tradition->collation->alignment_table->{'alignment'}} ) {
183 my $rdg = $tablewit->{'tokens'}->[$rank-1];
184 if( $rdg && $rdg->{'t'}->is_lacuna ) {
5be0cdeb 185 _add_to_witlist( $tablewit->{'witness'}, $lacunose,
186 $tradition->collation->ac_label );
7f52eac8 187 } elsif( $rdg ) {
188 $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
189 } else {
5be0cdeb 190 _add_to_witlist( $tablewit->{'witness'}, \@gap_wits,
191 $tradition->collation->ac_label );
7f52eac8 192 }
193 }
d1348d38 194
7f52eac8 195 # Group the readings, collapsing groups by relationship if needed
196 my %grouped_readings;
197 foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses } values %readings_at_rank ) {
198 # Skip readings that have been collapsed into others.
f00cefe8 199 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
7f52eac8 200 my @wits = $rdg->witnesses;
201 if( $collapse ) {
202 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
203 foreach my $other ( $rdg->related_readings( $filter ) ) {
204 push( @wits, $other->witnesses );
f00cefe8 205 $grouped_readings{$other->id} = 0;
d1348d38 206 }
207 }
f00cefe8 208 $grouped_readings{$rdg->id} = \@wits;
7f52eac8 209 }
210 $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
211 # Get rid of our collapsed readings
212 map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
213 keys %grouped_readings
214 if $collapse;
215
5be0cdeb 216 return \%grouped_readings;
d1348d38 217}
218
88a6bac5 219=head2 solve_variants( $graph, @groups )
220
221Sends the set of groups to the external graph solver service and returns
222a cleaned-up answer, adding the rank IDs back where they belong.
223
224The JSON has the form
225 { "graph": [ stemmagraph DOT string without newlines ],
226 "groupings": [ array of arrays of groups, one per rank ] }
227
228The answer has the form
229 { "variants" => [ array of variant location structures ],
230 "variant_count" => total,
231 "conflict_count" => number of conflicts detected,
232 "genealogical_count" => number of solutions found }
233
234=cut
235
236sub solve_variants {
237 my( $graph, @groups ) = @_;
238
239 # Make the json with stemma + groups
240 my $jsonstruct = { 'graph' => $graph, 'groupings' => [] };
241 foreach my $ghash ( @groups ) {
242 my @grouping;
243 foreach my $k ( sort keys %$ghash ) {
244 push( @grouping, $ghash->{$k} );
245 }
246 push( @{$jsonstruct->{'groupings'}}, \@grouping );
247 }
248 my $json = encode_json( $jsonstruct );
249
250 # Send it off and get the result
251 my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
252 my $ua = LWP::UserAgent->new();
253 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
254 'Content' => $json );
255
256 my $answer;
257 if( $resp->is_success ) {
258 $answer = decode_json( $resp->content );
259 } else {
260 # Either throw an error or fall back to the old method.
261 die "Solver returned " . $resp->status_line . " / " . $resp->content;
262 }
263
264 # Fold the result back into what we know about the groups.
265 my $variants = [];
266 my $genealogical = 0;
267 foreach my $idx ( 0 .. $#groups ) {
268 my( $calc_groups, $result ) = @{$answer->[$idx]};
269 $genealogical++ if $result;
270 my $input_group = $groups[$idx];
271 foreach my $k ( sort keys %$input_group ) {
272 my $cg = shift @$calc_groups;
273 $input_group->{$k} = $cg;
274 }
275 my $vstruct = {
276 'genealogical' => $result,
277 'readings' => [],
278 }
279 foreach my $k ( keys %$input_group ) {
280 push( @{$vstruct->{'readings'}},
281 { 'readingid' => $k, 'group' => $dg } );
282 }
283 push( @$variants, $vstruct );
284 }
285
286 return { 'variants' => $variants,
287 'variant_count' => scalar @$variants,
288 'genealogical_count' => $genealogical };
289}
290
7f52eac8 291=head2 analyze_variant_location( $tradition, $rank, $stemma_id, @merge_relationship_types )
f6c8ea08 292
7f52eac8 293Runs an analysis of the given tradition, at the location given in $rank,
294against the graph of the stemma specified in $stemma_id. The argument
295@merge_relationship_types is an optional list of relationship types for
296which readings so related should be treated as equivalent.
f6c8ea08 297
7f52eac8 298Returns a data structure as follows:
299
300 { 'id' => $rank,
301 'genealogical' => boolean,
f00cefe8 302 'readings => [ { readingid => $reading_id,
7f52eac8 303 group => [ witnesses ],
304 conflict => [ conflicting ],
305 missing => [ excluded ] }, ... ]
306 }
307where 'conflicting' is the list of witnesses whose readings conflict with
308this group, and 'excluded' is the list of witnesses either not present in
309the stemma or lacunose at this location.
310
311=cut
732152b1 312
d71100ed 313sub analyze_variant_location {
7f52eac8 314 my( $tradition, $rank, $sid, @collapse ) = @_;
7f52eac8 315 # Get the readings in this tradition at this rank
316 my @rank_rdgs = grep { $_->rank == $rank } $tradition->collation->readings;
317 # Get the applicable stemma
318 my $undirected; # TODO Allow undirected distance tree analysis too
319 my $stemma = $tradition->stemma( $sid );
320 my $graph = $stemma->graph;
321 # Figure out which witnesses we are working with
bebec0e9 322 my @lacunose = $stemma->hypotheticals;
323 push( @lacunose, _symmdiff( [ $stemma->witnesses ],
324 [ map { $_->sigil } $tradition->witnesses ] ) );
7f52eac8 325
326 # Now group the readings
5be0cdeb 327 my( $readings, $groups ) = _useful_variant(
328 group_variants( $tradition, $rank, \@lacunose, \@collapse ),
329 $graph, $tradition->collation->ac_label );
330 return unless scalar @$readings;
7f52eac8 331 my $group_readings = {};
332 # Lookup table group string -> readings
333 foreach my $x ( 0 .. $#$groups ) {
334 $group_readings->{wit_stringify( $groups->[$x] )} = $readings->[$x];
335 }
336
337 # Now do the work.
231d71fc 338 my $contig = {};
339 my $subgraph = {};
c4a4fb1b 340 my $is_conflicted;
d71100ed 341 my $conflict = {};
bebec0e9 342 my %reading_roots;
7f52eac8 343 my $variant_row = { 'id' => $rank, 'readings' => [] };
94a077d6 344 # Mark each ms as in its own group, first.
f00cefe8 345 $DB::single = 1 if $rank == 81;
94a077d6 346 foreach my $g ( @$groups ) {
347 my $gst = wit_stringify( $g );
231d71fc 348 map { $contig->{$_} = $gst } @$g;
94a077d6 349 }
c4a4fb1b 350 # Now for each unmarked node in the graph, initialize an array
351 # for possible group memberships. We will use this later to
352 # resolve potential conflicts.
231d71fc 353 map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
d71100ed 354 foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
c4a4fb1b 355 my $gst = wit_stringify( $g ); # This is the group name
08e0fb85 356 # Copy the graph, and delete all non-members from the new graph.
c4a4fb1b 357 my $part = $graph->copy;
bebec0e9 358 my @group_roots;
c4a4fb1b 359 $part->delete_vertices(
231d71fc 360 grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
c4a4fb1b 361
362 # Now look to see if our group is connected.
363 if( $undirected ) { # For use with distance trees etc.
364 # Find all vertices reachable from the first (arbitrary) group
bebec0e9 365 # member. If we are genealogical this should include them all.
366 my $reachable = {};
c4a4fb1b 367 map { $reachable->{$_} = 1 } $part->all_reachable( $g->[0] );
368 # TODO This is a terrible way to do distance trees, since all
369 # non-leaf nodes are included in every graph part now. We may
370 # have to go back to SPDP.
371 } else {
372 if( @$g > 1 ) {
c4a4fb1b 373 # We have to take directionality into account.
374 # How many root nodes do we have?
231d71fc 375 my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
bebec0e9 376 $part->predecessorless_vertices;
c4a4fb1b 377 # Assuming that @$g > 1, find the first root node that has at
378 # least one successor belonging to our group. If this reading
379 # is genealogical, there should be only one, but we will check
380 # that implicitly later.
c4a4fb1b 381 foreach my $root ( @roots ) {
231d71fc 382 # Prune the tree to get rid of extraneous hypotheticals.
7f52eac8 383 $root = _prune_subtree( $part, $root, $contig );
bebec0e9 384 next unless $root;
385 # Save this root for our group.
386 push( @group_roots, $root );
c4a4fb1b 387 # Get all the successor nodes of our root.
c4a4fb1b 388 }
bebec0e9 389 } else {
390 # Dispense with the trivial case of one reading.
f00cefe8 391 my $wit = pop @$g;
392 @group_roots = ( $wit );
393 foreach my $v ( $part->vertices ) {
394 $part->delete_vertex( $v ) unless $v eq $wit;
395 }
bebec0e9 396 }
c4a4fb1b 397 }
398
bebec0e9 399 map { $reading_roots{$_} = 1 } @group_roots;
400 if( @group_roots > 1 ) {
401 $conflict->{$group_readings->{$gst}} = 1;
402 $is_conflicted = 1;
08e0fb85 403 }
bebec0e9 404 # Paint the 'hypotheticals' with our group.
405 foreach my $wit ( $part->vertices ) {
406 if( ref( $contig->{$wit} ) ) {
407 push( @{$contig->{$wit}}, $gst );
408 } elsif( $contig->{$wit} ne $gst ) {
409 warn "How did we get here?";
410 }
411 }
08e0fb85 412
c4a4fb1b 413
bebec0e9 414 # Start to write the reading, and save the group subgraph.
f00cefe8 415 my $reading = { 'readingid' => $group_readings->{$gst},
7f52eac8 416 'missing' => wit_stringify( \@lacunose ),
c4a4fb1b 417 'group' => $gst }; # This will change if we find no conflict
bebec0e9 418 # Save the relevant subgraph.
419 $subgraph->{$gst} = $part;
732152b1 420 push( @{$variant_row->{'readings'}}, $reading );
d71100ed 421 }
c4a4fb1b 422
bebec0e9 423 # For each of our hypothetical readings, flatten its 'contig' array if
424 # the array contains zero or one group. If we have any unflattened arrays,
425 # we may need to run the resolution process. If the reading is already known
426 # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
427 # it.
428 my @resolve;
429 foreach my $wit ( keys %$contig ) {
430 next unless ref( $contig->{$wit} );
431 if( @{$contig->{$wit}} > 1 ) {
432 if( $is_conflicted ) {
433 $contig->{$wit} = ''; # We aren't going to decide.
434 } else {
435 push( @resolve, $wit );
436 }
437 } else {
438 my $gst = pop @{$contig->{$wit}};
439 $contig->{$wit} = $gst || '';
440 }
441 }
442
443 if( @resolve ) {
231d71fc 444 my $still_contig = {};
c4a4fb1b 445 foreach my $h ( @resolve ) {
446 # For each of the hypothetical readings with more than one possibility,
447 # try deleting it from each of its member subgraphs in turn, and see
448 # if that breaks the contiguous grouping.
449 # TODO This can still break in a corner case where group A can use
450 # either vertex 1 or 2, and group B can use either vertex 2 or 1.
451 # Revisit this if necessary; it could get brute-force nasty.
231d71fc 452 foreach my $gst ( @{$contig->{$h}} ) {
bebec0e9 453 my $gpart = $subgraph->{$gst}->copy();
454 # If we have come this far, there is only one root and everything
455 # is reachable from it.
456 my( $root ) = $gpart->predecessorless_vertices;
457 my $reachable = {};
458 map { $reachable->{$_} = 1 } $gpart->vertices;
459
460 # Try deleting the hypothetical node.
c4a4fb1b 461 $gpart->delete_vertex( $h );
bebec0e9 462 if( $h eq $root ) {
463 # See if we still have a single root.
464 my @roots = $gpart->predecessorless_vertices;
465 warn "This shouldn't have happened" unless @roots;
466 if( @roots > 1 ) {
467 # $h is needed by this group.
468 if( exists( $still_contig->{$h} ) ) {
469 # Conflict!
470 $conflict->{$group_readings->{$gst}} = 1;
471 $still_contig->{$h} = '';
472 } else {
473 $still_contig->{$h} = $gst;
474 }
475 }
476 } else {
477 # $h is somewhere in the middle. See if everything
478 # else can still be reached from the root.
479 my %still_reachable = ( $root => 1 );
480 map { $still_reachable{$_} = 1 }
481 $gpart->all_successors( $root );
482 foreach my $v ( keys %$reachable ) {
483 next if $v eq $h;
484 if( !$still_reachable{$v}
485 && ( $contig->{$v} eq $gst
486 || ( exists $still_contig->{$v}
487 && $still_contig->{$v} eq $gst ) ) ) {
488 # We need $h.
489 if( exists $still_contig->{$h} ) {
490 # Conflict!
491 $conflict->{$group_readings->{$gst}} = 1;
492 $still_contig->{$h} = '';
493 } else {
494 $still_contig->{$h} = $gst;
495 }
496 last;
497 } # else we don't need $h in this group.
498 } # end foreach $v
499 } # endif $h eq $root
500 } # end foreach $gst
501 } # end foreach $h
c4a4fb1b 502
bebec0e9 503 # Now we have some hypothetical vertices in $still_contig that are the
504 # "real" group memberships. Replace these in $contig.
505 foreach my $v ( keys %$contig ) {
506 next unless ref $contig->{$v};
507 $contig->{$v} = $still_contig->{$v};
508 }
509 } # end if @resolve
510
511 # Now that we have all the node group memberships, calculate followed/
512 # non-followed/unknown values for each reading. Also figure out the
513 # reading's evident parent(s).
514 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
515 my $gst = $rdghash->{'group'};
516 my $part = $subgraph->{$gst};
517 my @roots = $part->predecessorless_vertices;
518 $rdghash->{'independent_occurrence'} = scalar @roots;
519 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
520 # Find the parent readings, if any, of this reading.
f00cefe8 521 my %rdgparents;
bebec0e9 522 foreach my $wit ( @roots ) {
f00cefe8 523 # Look in the main stemma to find this witness's extant or known-reading
524 # immediate ancestor(s), and look up the reading that each ancestor olds.
525 my @check = $graph->predecessors( $wit );
526 while( @check ) {
527 my @next;
528 foreach my $wparent( @check ) {
529 my $pgroup = $contig->{$wparent};
530 if( $pgroup ) {
531 $rdgparents{$group_readings->{$pgroup}} = 1;
532 } else {
533 push( @next, $graph->predecessors( $wparent ) );
534 }
535 }
536 @check = @next;
537 }
bebec0e9 538 }
f00cefe8 539 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
bebec0e9 540
541 # Find the number of times this reading was altered, and the number of
542 # times we're not sure.
543 my( %nofollow, %unknownfollow );
544 foreach my $wit ( $part->vertices ) {
545 foreach my $wchild ( $graph->successors( $wit ) ) {
546 next if $part->has_vertex( $wchild );
547 if( $reading_roots{$wchild} && $contig->{$wchild} ) {
548 # It definitely changed here.
549 $nofollow{$wchild} = 1;
550 } elsif( !($contig->{$wchild}) ) {
551 # The child is a hypothetical node not definitely in
552 # any group. Answer is unknown.
553 $unknownfollow{$wchild} = 1;
554 } # else it's a non-root node in a known group, and therefore
555 # is presumed to have its reading from its group, not this link.
556 }
557 }
558 $rdghash->{'not_followed'} = keys %nofollow;
559 $rdghash->{'follow_unknown'} = keys %unknownfollow;
c4a4fb1b 560 }
bebec0e9 561
c4a4fb1b 562 # Now write the group and conflict information into the respective rows.
bebec0e9 563 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
f00cefe8 564 $rdghash->{'conflict'} = $conflict->{$rdghash->{'readingid'}};
bebec0e9 565 my @members = grep { $contig->{$_} eq $rdghash->{'group'} } keys %$contig;
566 $rdghash->{'group'} = wit_stringify( \@members );
c4a4fb1b 567 }
568
08e0fb85 569 $variant_row->{'genealogical'} = !( keys %$conflict );
732152b1 570 return $variant_row;
d71100ed 571}
572
7f52eac8 573sub _prune_subtree {
231d71fc 574 my( $tree, $root, $contighash ) = @_;
575 # First, delete hypothetical leaves / orphans until there are none left.
576 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
577 $tree->successorless_vertices;
578 while( @orphan_hypotheticals ) {
579 $tree->delete_vertices( @orphan_hypotheticals );
580 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
581 $tree->successorless_vertices;
582 }
583 # Then delete a hypothetical root with only one successor, moving the
bebec0e9 584 # root to the first child that has no other predecessors.
231d71fc 585 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
586 my @nextroot = $tree->successors( $root );
587 $tree->delete_vertex( $root );
bebec0e9 588 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
231d71fc 589 }
590 # The tree has been modified in place, but we need to know the new root.
bebec0e9 591 $root = undef unless $root && $tree->has_vertex( $root );
231d71fc 592 return $root;
593}
d71100ed 594# Add the variant, subject to a.c. representation logic.
595# This assumes that we will see the 'main' version before the a.c. version.
596sub add_variant_wit {
597 my( $arr, $wit, $acstr ) = @_;
598 my $skip;
599 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
600 my $real = $1;
601 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
602 }
603 push( @$arr, $wit ) unless $skip;
604}
605
5be0cdeb 606sub _useful_variant {
607 my( $group_readings, $graph, $acstr ) = @_;
608
609 # TODO Decide what to do with AC witnesses
610
611 # Sort by group size and return
612 my $is_useful = 0;
613 my( @readings, @groups ); # The sorted groups for our answer.
614 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
615 keys %$group_readings ) {
616 push( @readings, $rdg );
617 push( @groups, $group_readings->{$rdg} );
618 if( @{$group_readings->{$rdg}} > 1 ) {
619 $is_useful++;
620 } else {
621 my( $wit ) = @{$group_readings->{$rdg}};
622 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
623 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
624 }
625 }
626 if( $is_useful > 1 ) {
627 return( \@readings, \@groups );
628 } else {
629 return( [], [] );
630 }
631}
632
7f52eac8 633=head2 wit_stringify( $groups )
634
635Takes an array of witness groupings and produces a string like
636['A','B'] / ['C','D','E'] / ['F']
d71100ed 637
7f52eac8 638=cut
d71100ed 639
640sub wit_stringify {
641 my $groups = shift;
642 my @gst;
643 # If we were passed an array of witnesses instead of an array of
644 # groupings, then "group" the witnesses first.
645 unless( ref( $groups->[0] ) ) {
646 my $mkgrp = [ $groups ];
647 $groups = $mkgrp;
648 }
649 foreach my $g ( @$groups ) {
650 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
651 }
652 return join( ' / ', @gst );
653}
7f52eac8 654
5be0cdeb 655# Helper function to ensure that X and X a.c. never appear in the same list.
656sub _add_to_witlist {
657 my( $wit, $list, $acstr ) = @_;
658 my %inlist;
659 my $idx = 0;
660 map { $inlist{$_} = $idx++ } @$list;
661 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
662 my $acwit = $1;
663 unless( exists $inlist{$acwit} ) {
664 push( @$list, $acwit.$acstr );
665 }
666 } else {
667 if( exists( $inlist{$wit.$acstr} ) ) {
668 # Replace the a.c. version with the main witness
669 my $i = $inlist{$wit.$acstr};
670 $list->[$i] = $wit;
671 } else {
672 push( @$list, $wit );
673 }
674 }
675}
676
bebec0e9 677sub _symmdiff {
678 my( $lista, $listb ) = @_;
7f52eac8 679 my %union;
680 my %scalars;
681 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
682 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
bebec0e9 683 my @set = grep { $union{$_} == 1 } keys %union;
7f52eac8 684 return map { $scalars{$_} } @set;
685}
686
6871;
688
689=head1 LICENSE
690
691This package is free software and is provided "as is" without express
692or implied warranty. You can redistribute it and/or modify it under
693the same terms as Perl itself.
694
695=head1 AUTHOR
696
697Tara L Andrews E<lt>aurum@cpan.orgE<gt>