1 package Text::Tradition::Analysis;
6 use Encode qw/ encode_utf8 /;
9 use JSON qw/ encode_json decode_json /;
12 use Text::Tradition::Stemma;
14 use vars qw/ @EXPORT_OK /;
15 @EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
19 Text::Tradition::Analysis - functions for stemma analysis of a tradition
24 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
25 my $t = Text::Tradition->new(
26 'name' => 'this is a text',
28 'file' => '/path/to/tei_parallel_seg_file.xml' );
29 $t->add_stemma( 'dotfile' => $stemmafile );
31 my $variant_data = run_analysis( $tradition );
32 # Recalculate rank $n treating all orthographic variants as equivalent
33 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
37 Text::Tradition is a library for representation and analysis of collated
38 texts, particularly medieval ones. The Collation is the central feature of
39 a Tradition, where the text, its sequence of readings, and its relationships
40 between readings are actually kept.
44 =head2 run_analysis( $tradition, %opts )
46 Runs the analysis described in analyze_variant_location on every location in the
47 collation of the given tradition, with the given options. These include:
51 =item * stemma_id - Specify which of the tradition's stemmata to use. Default
52 is 0 (i.e. the first).
54 =item * ranks - Specify a list of location ranks to analyze; exclude the rest.
56 =item * merge_types - Specify a list of relationship types, where related readings
57 should be treated as identical for the purposes of analysis.
59 =item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
66 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
68 my $datafile = 't/data/florilegium_tei_ps.xml';
69 my $tradition = Text::Tradition->new( 'input' => 'TEI',
71 'file' => $datafile );
72 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
73 is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
75 my %expected_genealogical = (
106 my $data = run_analysis( $tradition );
107 foreach my $row ( @{$data->{'variants'}} ) {
108 # Account for rows that used to be "not useful"
109 unless( exists $expected_genealogical{$row->{'id'}} ) {
110 $expected_genealogical{$row->{'id'}} = 1;
112 is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
113 "Got correct genealogical flag for row " . $row->{'id'} );
115 is( $data->{'variant_count'}, 58, "Got right total variant number" );
116 # TODO Make something meaningful of conflict count, maybe test other bits
123 my( $tradition, %opts ) = @_;
124 my $c = $tradition->collation;
126 my $stemma_id = $opts{'stemma_id'} || 0;
127 my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
128 my @collapse = ref( $opts{'merge_types'} ) eq 'ARRAY' ? @{$opts{'merge_types'}} : ();
131 my $stemma = $tradition->stemma( $stemma_id );
133 # Figure out which witnesses we are working with
134 my @lacunose = $stemma->hypotheticals;
135 my @tradition_wits = map { $_->sigil } $tradition->witnesses;
136 map { push( @tradition_wits, $_->sigil.$c->ac_label ) if $_->is_layered }
137 $tradition->witnesses;
138 push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
140 # Find and mark 'common' ranks for exclusion, unless they were
141 # explicitly specified.
144 foreach my $rdg ( $c->common_readings ) {
145 $common_rank{$rdg->rank} = 1;
147 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
150 # Group the variants to send to the solver
154 foreach my $rank ( @ranks ) {
155 $DB::single = 1 if $rank == 1003;
156 my $missing = [ @lacunose ];
157 my $rankgroup = group_variants( $tradition, $rank, $missing, \@collapse );
158 if( $opts{'exclude_type1'} ) {
159 # Check to see whether this is a "useful" group.
160 my( $rdgs, $grps ) = _useful_variant( $rankgroup,
161 $stemma->graph, $c->ac_label );
164 push( @use_ranks, $rank );
165 push( @groups, $rankgroup );
166 $lacunae{$rank} = $missing;
169 my $answer = solve_variants( $stemma, @groups );
171 # Do further analysis on the answer
172 my $conflict_count = 0;
173 foreach my $idx ( 0 .. $#use_ranks ) {
174 my $location = $answer->{'variants'}->[$idx];
175 # Add the rank back in
176 $location->{'id'} = $use_ranks[$idx];
177 # Add the lacunae back in
178 $location->{'missing'} = $lacunae{$use_ranks[$idx]};
180 map { $lmiss{$_} = 1 } @{$location->{'missing'}};
181 # Run the extra analysis we need.
182 analyze_location( $tradition, $stemma->graph, $location );
183 foreach my $rdghash ( @{$location->{'readings'}} ) {
185 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
186 # Add the reading text back in
187 my $rdg = $c->reading( $rdghash->{'readingid'} );
188 $rdghash->{'text'} = $rdg ? $rdg->text : $rdghash->{'readingid'};
189 # Remove lacunose witnesses from this reading's list now that the
192 map { push( @realgroup, $_ ) unless $lmiss{$_} } $rdghash->{'group'};
193 $rdghash->{'group'} = \@realgroup;
196 $answer->{'conflict_count'} = $conflict_count;
201 =head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
203 Groups the variants at the given $rank of the collation, treating any
204 relationships in @merge_relationship_types as equivalent. $lacunose should
205 be a reference to an array, to which the sigla of lacunose witnesses at this
206 rank will be appended.
208 Returns a hash $group_readings where $rdg is attested by the witnesses listed
209 in $group_readings->{$rdg}.
213 # Return group_readings, groups, lacunose
215 my( $tradition, $rank, $lacunose, $collapse ) = @_;
216 my $c = $tradition->collation;
217 my $aclabel = $c->ac_label;
218 # Get the alignment table readings
219 my %readings_at_rank;
220 my %is_lacunose; # lookup table for $lacunose
221 map { $is_lacunose{$_} = 1 } @$lacunose;
223 foreach my $tablewit ( @{$c->alignment_table->{'alignment'}} ) {
224 my $rdg = $tablewit->{'tokens'}->[$rank-1];
225 my $wit = $tablewit->{'witness'};
226 # Exclude the witness if it is "lacunose" which if we got here
227 # means "not in the stemma".
228 next if $is_lacunose{$wit};
229 if( $rdg && $rdg->{'t'}->is_lacuna ) {
230 _add_to_witlist( $wit, $lacunose, $aclabel );
232 $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
234 _add_to_witlist( $wit, \@gap_wits, $aclabel );
238 # Group the readings, collapsing groups by relationship if needed
239 my %grouped_readings;
240 foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses }
241 values %readings_at_rank ) {
242 # Skip readings that have been collapsed into others.
243 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
244 my @wits = $rdg->witnesses;
246 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
247 foreach my $other ( $rdg->related_readings( $filter ) ) {
248 my @otherwits = $other->witnesses;
249 push( @wits, @otherwits );
250 $grouped_readings{$other->id} = 0;
253 my @use_wits = grep { !$is_lacunose{$_} } @wits;
254 $grouped_readings{$rdg->id} = \@use_wits;
256 $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
257 # Get rid of our collapsed readings
258 map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
259 keys %grouped_readings
262 return \%grouped_readings;
265 =head2 solve_variants( $graph, @groups )
267 Sends the set of groups to the external graph solver service and returns
268 a cleaned-up answer, adding the rank IDs back where they belong.
270 The JSON has the form
271 { "graph": [ stemmagraph DOT string without newlines ],
272 "groupings": [ array of arrays of groups, one per rank ] }
274 The answer has the form
275 { "variants" => [ array of variant location structures ],
276 "variant_count" => total,
277 "conflict_count" => number of conflicts detected,
278 "genealogical_count" => number of solutions found }
283 my( $stemma, @groups ) = @_;
285 # Make the json with stemma + groups
287 foreach my $ghash ( @groups ) {
289 foreach my $k ( keys %$ghash ) {
290 push( @grouping, $ghash->{$k} );
292 push( @$groupings, \@grouping );
294 ## Witness map is a HACK to get around limitations in node names from IDP
295 my $witness_map = {};
296 my $json = encode_json( _safe_wit_strings( $stemma, $groupings, $witness_map ) );
298 # Send it off and get the result
299 my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
300 my $ua = LWP::UserAgent->new();
301 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
302 'Content' => $json );
306 if( $resp->is_success ) {
307 $answer = _desanitize_names( decode_json( $resp->content ), $witness_map );
310 # Fall back to the old method.
311 warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
312 . "; falling back to perl method";
313 $answer = perl_solver( $stemma, @$groupings );
316 # Fold the result back into what we know about the groups.
318 my $genealogical = 0;
319 foreach my $idx ( 0 .. $#groups ) {
320 my( $calc_groups, $result ) = @{$answer->[$idx]};
323 # Prune the calculated groups, in case the IDP solver failed to.
326 foreach my $cg ( @$calc_groups ) {
327 my @pg = _prune_group( $cg, $stemma );
328 push( @pruned_groups, \@pg );
330 $calc_groups = \@pruned_groups;
333 my $input_group = $groups[$idx];
334 foreach my $k ( keys %$input_group ) {
335 my $cg = shift @$calc_groups;
336 $input_group->{$k} = $cg;
339 'genealogical' => $result,
342 foreach my $k ( sort { @{$input_group->{$b}} <=> @{$input_group->{$a}} }
343 keys %$input_group ) {
344 push( @{$vstruct->{'readings'}},
345 { 'readingid' => $k, 'group' => $input_group->{$k}} );
347 push( @$variants, $vstruct );
350 return { 'variants' => $variants,
351 'variant_count' => scalar @$variants,
352 'genealogical_count' => $genealogical };
355 #### HACKERY to cope with IDP's limited idea of what a node name looks like ###
357 sub _safe_wit_strings {
358 my( $stemma, $groupings, $witness_map ) = @_;
359 my $safegraph = Graph->new();
360 # Convert the graph to a safe representation and store the conversion.
361 foreach my $n ( $stemma->graph->vertices ) {
362 my $sn = _safe_witstr( $n );
363 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
364 if exists $witness_map->{$sn};
365 $witness_map->{$sn} = $n;
366 $safegraph->add_vertex( $sn );
367 $safegraph->set_vertex_attributes( $sn,
368 $stemma->graph->get_vertex_attributes( $n ) );
370 foreach my $e ( $stemma->graph->edges ) {
371 my @safe_e = ( _safe_witstr( $e->[0] ), _safe_witstr( $e->[1] ) );
372 $safegraph->add_edge( @safe_e );
374 my $safe_stemma = Text::Tradition::Stemma->new(
375 'collation' => $stemma->collation, 'graph' => $safegraph );
377 # Now convert the witness groupings to a safe representation.
378 my $safe_groupings = [];
379 foreach my $grouping ( @$groupings ) {
380 my $safe_grouping = [];
381 foreach my $group ( @$grouping ) {
383 foreach my $n ( @$group ) {
384 my $sn = _safe_witstr( $n );
385 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
386 if exists $witness_map->{$sn} && $witness_map->{$sn} ne $n;
387 $witness_map->{$sn} = $n;
388 push( @$safe_group, $sn );
390 push( @$safe_grouping, $safe_group );
392 push( @$safe_groupings, $safe_grouping );
395 # Return it all in the struct we expect. We have stored the reductions
396 # in the $witness_map that we were passed.
397 return { 'graph' => $safe_stemma->editable( ' ' ), 'groupings' => $safe_groupings };
402 $witstr =~ s/\s+/_/g;
403 $witstr =~ s/[^\w\d-]//g;
407 sub _desanitize_names {
408 my( $jsonstruct, $witness_map ) = @_;
410 foreach my $grouping ( @$jsonstruct ) {
411 my $real_grouping = [];
412 foreach my $element ( @$grouping ) {
413 if( ref( $element ) eq 'ARRAY' ) {
415 my $real_groupset = [];
416 foreach my $group ( @$element ) {
418 foreach my $n ( @$group ) {
419 my $rn = $witness_map->{$n};
420 push( @$real_group, $rn );
422 push( @$real_groupset, $real_group );
424 push( @$real_grouping, $real_groupset );
426 # It is the boolean, not actually a group.
427 push( @$real_grouping, $element );
430 push( @$result, $real_grouping );
437 =head2 analyze_location ( $tradition, $graph, $location_hash )
439 Given the tradition, its stemma graph, and the solution from the graph solver,
440 work out the rest of the information we want. For each reading we need missing,
441 conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown. Alters the location_hash in place.
445 sub analyze_location {
446 my ( $tradition, $graph, $variant_row ) = @_;
448 # Make a hash of all known node memberships, and make the subgraphs.
450 my $reading_roots = {};
452 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
453 my $rid = $rdghash->{'readingid'};
454 map { $contig->{$_} = $rid } @{$rdghash->{'group'}};
457 my $part = $graph->copy;
459 map { $these_vertices{$_} = 1 } @{$rdghash->{'group'}};
460 $part->delete_vertices( grep { !$these_vertices{$_} } $part->vertices );
461 $subgraph->{$rid} = $part;
462 # Get the reading roots.
463 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
466 # Now that we have all the node group memberships, calculate followed/
467 # non-followed/unknown values for each reading. Also figure out the
468 # reading's evident parent(s).
469 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
470 # Group string key - TODO do we need this?
471 my $gst = wit_stringify( $rdghash->{'group'} );
472 my $rid = $rdghash->{'readingid'};
474 my $part = $subgraph->{$rid};
476 # Start figuring things out.
477 my @roots = $part->predecessorless_vertices;
478 $rdghash->{'independent_occurrence'} = scalar @roots;
479 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
480 # Find the parent readings, if any, of this reading.
482 foreach my $wit ( @roots ) {
483 # Look in the main stemma to find this witness's extant or known-reading
484 # immediate ancestor(s), and look up the reading that each ancestor olds.
485 my @check = $graph->predecessors( $wit );
488 foreach my $wparent( @check ) {
489 my $preading = $contig->{$wparent};
491 $rdgparents{$preading} = 1;
493 push( @next, $graph->predecessors( $wparent ) );
499 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
501 # Find the number of times this reading was altered, and the number of
502 # times we're not sure.
503 my( %nofollow, %unknownfollow );
504 foreach my $wit ( $part->vertices ) {
505 foreach my $wchild ( $graph->successors( $wit ) ) {
506 next if $part->has_vertex( $wchild );
507 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
508 # It definitely changed here.
509 $nofollow{$wchild} = 1;
510 } elsif( !($contig->{$wchild}) ) {
511 # The child is a hypothetical node not definitely in
512 # any group. Answer is unknown.
513 $unknownfollow{$wchild} = 1;
514 } # else it's a non-root node in a known group, and therefore
515 # is presumed to have its reading from its group, not this link.
518 $rdghash->{'not_followed'} = keys %nofollow;
519 $rdghash->{'follow_unknown'} = keys %unknownfollow;
521 # Now say whether this reading represents a conflict.
522 unless( $variant_row->{'genealogical'} ) {
523 $rdghash->{'conflict'} = @roots != 1;
529 =head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
531 ** NOTE ** This method should hopefully not be called - it is not guaranteed
532 to be correct. Serves as a backup for the real solver.
534 Runs an analysis of the given tradition, at the location given in $rank,
535 against the graph of the stemma specified in $stemma_id. The argument
536 @merge_relationship_types is an optional list of relationship types for
537 which readings so related should be treated as equivalent.
539 Returns a nested array data structure as follows:
541 [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
543 where the group list is the array of arrays passed in for each element of @groups,
544 possibly with the addition of hypothetical readings.
550 my( $stemma, @groups ) = @_;
551 my $graph = $stemma->graph;
553 foreach my $g ( @groups ) {
554 push( @answer, _solve_variant_location( $graph, $g ) );
559 sub _solve_variant_location {
560 my( $graph, $groups ) = @_;
567 # Mark each ms as in its own group, first.
568 foreach my $g ( @$groups ) {
569 my $gst = wit_stringify( $g );
570 map { $contig->{$_} = $gst } @$g;
573 # Now for each unmarked node in the graph, initialize an array
574 # for possible group memberships. We will use this later to
575 # resolve potential conflicts.
576 map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
577 foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
578 my $gst = wit_stringify( $g ); # This is the group name
579 # Copy the graph, and delete all non-members from the new graph.
580 my $part = $graph->copy;
582 $part->delete_vertices(
583 grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
585 # Now look to see if our group is connected.
587 # We have to take directionality into account.
588 # How many root nodes do we have?
589 my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
590 $part->predecessorless_vertices;
591 # Assuming that @$g > 1, find the first root node that has at
592 # least one successor belonging to our group. If this reading
593 # is genealogical, there should be only one, but we will check
594 # that implicitly later.
595 foreach my $root ( @roots ) {
596 # Prune the tree to get rid of extraneous hypotheticals.
597 $root = _prune_subtree( $part, $root, $contig );
599 # Save this root for our group.
600 push( @group_roots, $root );
601 # Get all the successor nodes of our root.
604 # Dispense with the trivial case of one reading.
606 @group_roots = ( $wit );
607 foreach my $v ( $part->vertices ) {
608 $part->delete_vertex( $v ) unless $v eq $wit;
612 if( @group_roots > 1 ) {
613 $conflict->{$gst} = 1;
616 # Paint the 'hypotheticals' with our group.
617 foreach my $wit ( $part->vertices ) {
618 if( ref( $contig->{$wit} ) ) {
619 push( @{$contig->{$wit}}, $gst );
620 } elsif( $contig->{$wit} ne $gst ) {
621 warn "How did we get here?";
626 # Save the relevant subgraph.
627 $subgraph->{$gst} = $part;
630 # For each of our hypothetical readings, flatten its 'contig' array if
631 # the array contains zero or one group. If we have any unflattened arrays,
632 # we may need to run the resolution process. If the reading is already known
633 # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
636 foreach my $wit ( keys %$contig ) {
637 next unless ref( $contig->{$wit} );
638 if( @{$contig->{$wit}} > 1 ) {
639 if( $is_conflicted ) {
640 $contig->{$wit} = ''; # We aren't going to decide.
642 push( @resolve, $wit );
645 my $gst = pop @{$contig->{$wit}};
646 $contig->{$wit} = $gst || '';
651 my $still_contig = {};
652 foreach my $h ( @resolve ) {
653 # For each of the hypothetical readings with more than one possibility,
654 # try deleting it from each of its member subgraphs in turn, and see
655 # if that breaks the contiguous grouping.
656 # TODO This can still break in a corner case where group A can use
657 # either vertex 1 or 2, and group B can use either vertex 2 or 1.
658 # Revisit this if necessary; it could get brute-force nasty.
659 foreach my $gst ( @{$contig->{$h}} ) {
660 my $gpart = $subgraph->{$gst}->copy();
661 # If we have come this far, there is only one root and everything
662 # is reachable from it.
663 my( $root ) = $gpart->predecessorless_vertices;
665 map { $reachable->{$_} = 1 } $gpart->vertices;
667 # Try deleting the hypothetical node.
668 $gpart->delete_vertex( $h );
670 # See if we still have a single root.
671 my @roots = $gpart->predecessorless_vertices;
672 warn "This shouldn't have happened" unless @roots;
674 # $h is needed by this group.
675 if( exists( $still_contig->{$h} ) ) {
677 $conflict->{$gst} = 1;
678 $still_contig->{$h} = '';
680 $still_contig->{$h} = $gst;
684 # $h is somewhere in the middle. See if everything
685 # else can still be reached from the root.
686 my %still_reachable = ( $root => 1 );
687 map { $still_reachable{$_} = 1 }
688 $gpart->all_successors( $root );
689 foreach my $v ( keys %$reachable ) {
691 if( !$still_reachable{$v}
692 && ( $contig->{$v} eq $gst
693 || ( exists $still_contig->{$v}
694 && $still_contig->{$v} eq $gst ) ) ) {
696 if( exists $still_contig->{$h} ) {
698 $conflict->{$gst} = 1;
699 $still_contig->{$h} = '';
701 $still_contig->{$h} = $gst;
704 } # else we don't need $h in this group.
706 } # endif $h eq $root
710 # Now we have some hypothetical vertices in $still_contig that are the
711 # "real" group memberships. Replace these in $contig.
712 foreach my $v ( keys %$contig ) {
713 next unless ref $contig->{$v};
714 $contig->{$v} = $still_contig->{$v};
718 my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
719 my $variant_row = [ [], $is_genealogical ];
720 # Fill in the groupings from $contig.
721 foreach my $g ( @$groups ) {
722 my $gst = wit_stringify( $g );
723 my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
724 push( @{$variant_row->[0]}, \@realgroup );
730 my( $group, $stemma ) = @_;
731 # Get these into a form prune_subtree will recognize. Make a "contighash"
733 map { $hypohash->{$_} = 1 } @$group;
734 # ...with reference values for hypotheticals.
735 map { $hypohash->{$_} = [] } $stemma->hypotheticals;
737 my $subgraph = $stemma->graph->copy;
738 map { $subgraph->delete_vertex( $_ ) unless exists $hypohash->{$_} }
740 # ...and find the root.
741 my( $root ) = $subgraph->predecessorless_vertices;
742 # Now prune and return the remaining vertices.
743 _prune_subtree( $subgraph, $root, $hypohash );
744 return $subgraph->vertices;
748 my( $tree, $root, $contighash ) = @_;
749 # First, delete hypothetical leaves / orphans until there are none left.
750 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
751 $tree->successorless_vertices;
752 while( @orphan_hypotheticals ) {
753 $tree->delete_vertices( @orphan_hypotheticals );
754 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
755 $tree->successorless_vertices;
757 # Then delete a hypothetical root with only one successor, moving the
758 # root to the first child that has no other predecessors.
759 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
760 my @nextroot = $tree->successors( $root );
761 $tree->delete_vertex( $root );
762 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
764 # The tree has been modified in place, but we need to know the new root.
765 $root = undef unless $root && $tree->has_vertex( $root );
768 # Add the variant, subject to a.c. representation logic.
769 # This assumes that we will see the 'main' version before the a.c. version.
770 sub add_variant_wit {
771 my( $arr, $wit, $acstr ) = @_;
773 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
775 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
777 push( @$arr, $wit ) unless $skip;
780 sub _useful_variant {
781 my( $group_readings, $graph, $acstr ) = @_;
783 # TODO Decide what to do with AC witnesses
785 # Sort by group size and return
787 my( @readings, @groups ); # The sorted groups for our answer.
788 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
789 keys %$group_readings ) {
790 push( @readings, $rdg );
791 push( @groups, $group_readings->{$rdg} );
792 if( @{$group_readings->{$rdg}} > 1 ) {
795 my( $wit ) = @{$group_readings->{$rdg}};
796 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
797 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
800 if( $is_useful > 1 ) {
801 return( \@readings, \@groups );
807 =head2 wit_stringify( $groups )
809 Takes an array of witness groupings and produces a string like
810 ['A','B'] / ['C','D','E'] / ['F']
817 # If we were passed an array of witnesses instead of an array of
818 # groupings, then "group" the witnesses first.
819 unless( ref( $groups->[0] ) ) {
820 my $mkgrp = [ $groups ];
823 foreach my $g ( @$groups ) {
824 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
826 return join( ' / ', @gst );
829 # Helper function to ensure that X and X a.c. never appear in the same list.
830 sub _add_to_witlist {
831 my( $wit, $list, $acstr ) = @_;
834 map { $inlist{$_} = $idx++ } @$list;
835 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
837 unless( exists $inlist{$acwit} ) {
838 push( @$list, $acwit.$acstr );
841 if( exists( $inlist{$wit.$acstr} ) ) {
842 # Replace the a.c. version with the main witness
843 my $i = $inlist{$wit.$acstr};
846 push( @$list, $wit );
852 my( $lista, $listb ) = @_;
855 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
856 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
857 my @set = grep { $union{$_} == 1 } keys %union;
858 return map { $scalars{$_} } @set;
865 This package is free software and is provided "as is" without express
866 or implied warranty. You can redistribute it and/or modify it under
867 the same terms as Perl itself.
871 Tara L Andrews E<lt>aurum@cpan.orgE<gt>