1 package Text::Tradition::Analysis;
6 use Encode qw/ encode_utf8 /;
9 use JSON qw/ encode_json decode_json /;
12 use Text::Tradition::Stemma;
14 use vars qw/ @EXPORT_OK /;
15 @EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
19 Text::Tradition::Analysis - functions for stemma analysis of a tradition
24 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
25 my $t = Text::Tradition->new(
26 'name' => 'this is a text',
28 'file' => '/path/to/tei_parallel_seg_file.xml' );
29 $t->add_stemma( 'dotfile' => $stemmafile );
31 my $variant_data = run_analysis( $tradition );
32 # Recalculate rank $n treating all orthographic variants as equivalent
33 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
37 Text::Tradition is a library for representation and analysis of collated
38 texts, particularly medieval ones. The Collation is the central feature of
39 a Tradition, where the text, its sequence of readings, and its relationships
40 between readings are actually kept.
44 =head2 run_analysis( $tradition, %opts )
46 Runs the analysis described in analyze_variant_location on every location in the
47 collation of the given tradition, with the given options. These include:
51 =item * stemma_id - Specify which of the tradition's stemmata to use. Default
52 is 0 (i.e. the first).
54 =item * ranks - Specify a list of location ranks to analyze; exclude the rest.
56 =item * merge_types - Specify a list of relationship types, where related readings
57 should be treated as identical for the purposes of analysis.
59 =item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
66 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
68 my $datafile = 't/data/florilegium_tei_ps.xml';
69 my $tradition = Text::Tradition->new( 'input' => 'TEI',
71 'file' => $datafile );
72 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
73 is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
75 my %expected_genealogical = (
106 my $data = run_analysis( $tradition );
107 foreach my $row ( @{$data->{'variants'}} ) {
108 # Account for rows that used to be "not useful"
109 unless( exists $expected_genealogical{$row->{'id'}} ) {
110 $expected_genealogical{$row->{'id'}} = 1;
112 is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
113 "Got correct genealogical flag for row " . $row->{'id'} );
115 is( $data->{'variant_count'}, 58, "Got right total variant number" );
116 # TODO Make something meaningful of conflict count, maybe test other bits
123 my( $tradition, %opts ) = @_;
124 my $c = $tradition->collation;
126 my $stemma_id = $opts{'stemma_id'} || 0;
127 my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
128 my @collapse = ref( $opts{'merge_types'} ) eq 'ARRAY' ? @{$opts{'merge_types'}} : ();
131 my $stemma = $tradition->stemma( $stemma_id );
133 # Figure out which witnesses we are working with
134 my @lacunose = $stemma->hypotheticals;
135 my @tradition_wits = map { $_->sigil } $tradition->witnesses;
136 map { push( @tradition_wits, $_->sigil.$c->ac_label ) if $_->is_layered }
137 $tradition->witnesses;
138 push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
140 # Find and mark 'common' ranks for exclusion, unless they were
141 # explicitly specified.
144 foreach my $rdg ( $c->common_readings ) {
145 $common_rank{$rdg->rank} = 1;
147 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
150 # Group the variants to send to the solver
153 foreach my $rank ( @ranks ) {
154 my $missing = [ @lacunose ];
155 my $rankgroup = group_variants( $tradition, $rank, $missing, \@collapse );
156 if( $opts{'exclude_type1'} ) {
157 # Check to see whether this is a "useful" group.
158 my( $rdgs, $grps ) = _useful_variant( $rankgroup,
159 $stemma->graph, $c->ac_label );
162 push( @groups, $rankgroup );
163 $lacunae{$rank} = $missing;
167 my $answer = solve_variants( $stemma, @groups );
169 # Do further analysis on the answer
170 my $conflict_count = 0;
171 foreach my $idx ( 0 .. $#ranks ) {
172 my $location = $answer->{'variants'}->[$idx];
173 # Add the rank back in
174 $location->{'id'} = $ranks[$idx];
175 # Add the lacunae back in
176 $location->{'missing'} = $lacunae{$ranks[$idx]};
177 # Run the extra analysis we need.
178 analyze_location( $tradition, $stemma->graph, $location );
179 # Add the reading text back in
180 foreach my $rdghash ( @{$location->{'readings'}} ) {
182 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
183 my $rdg = $c->reading( $rdghash->{'readingid'} );
184 $rdghash->{'text'} = $rdg ? $rdg->text : $rdghash->{'readingid'};
187 $answer->{'conflict_count'} = $conflict_count;
192 =head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
194 Groups the variants at the given $rank of the collation, treating any
195 relationships in @merge_relationship_types as equivalent. $lacunose should
196 be a reference to an array, to which the sigla of lacunose witnesses at this
197 rank will be appended.
199 Returns a hash $group_readings where $rdg is attested by the witnesses listed
200 in $group_readings->{$rdg}.
204 # Return group_readings, groups, lacunose
206 my( $tradition, $rank, $lacunose, $collapse ) = @_;
207 my $c = $tradition->collation;
208 my $aclabel = $c->ac_label;
209 # Get the alignment table readings
210 my %readings_at_rank;
211 my %is_lacunose; # lookup table for $lacunose
212 map { $is_lacunose{$_} = 1 } @$lacunose;
214 foreach my $tablewit ( @{$c->alignment_table->{'alignment'}} ) {
215 my $rdg = $tablewit->{'tokens'}->[$rank-1];
216 my $wit = $tablewit->{'witness'};
217 # Exclude the witness if it is "lacunose" which if we got here
218 # means "not in the stemma".
219 next if $is_lacunose{$wit};
220 if( $rdg && $rdg->{'t'}->is_lacuna ) {
221 _add_to_witlist( $wit, $lacunose, $aclabel );
223 $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
225 _add_to_witlist( $wit, \@gap_wits, $aclabel );
229 # Group the readings, collapsing groups by relationship if needed
230 my %grouped_readings;
231 foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses }
232 values %readings_at_rank ) {
233 # Skip readings that have been collapsed into others.
234 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
235 my @wits = $rdg->witnesses;
237 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
238 foreach my $other ( $rdg->related_readings( $filter ) ) {
239 my @otherwits = $other->witnesses;
240 push( @wits, @otherwits );
241 $grouped_readings{$other->id} = 0;
244 my @use_wits = grep { !$is_lacunose{$_} } @wits;
245 $grouped_readings{$rdg->id} = \@use_wits;
247 $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
248 # Get rid of our collapsed readings
249 map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
250 keys %grouped_readings
253 return \%grouped_readings;
256 =head2 solve_variants( $graph, @groups )
258 Sends the set of groups to the external graph solver service and returns
259 a cleaned-up answer, adding the rank IDs back where they belong.
261 The JSON has the form
262 { "graph": [ stemmagraph DOT string without newlines ],
263 "groupings": [ array of arrays of groups, one per rank ] }
265 The answer has the form
266 { "variants" => [ array of variant location structures ],
267 "variant_count" => total,
268 "conflict_count" => number of conflicts detected,
269 "genealogical_count" => number of solutions found }
274 my( $stemma, @groups ) = @_;
276 # Make the json with stemma + groups
278 foreach my $ghash ( @groups ) {
280 foreach my $k ( keys %$ghash ) {
281 push( @grouping, $ghash->{$k} );
283 push( @$groupings, \@grouping );
285 ## Witness map is a HACK to get around limitations in node names from IDP
286 my $witness_map = {};
287 my $json = encode_json( _safe_wit_strings( $stemma, $groupings, $witness_map ) );
289 # Send it off and get the result
290 my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
291 my $ua = LWP::UserAgent->new();
292 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
293 'Content' => $json );
297 if( $resp->is_success ) {
298 $answer = _desanitize_names( decode_json( $resp->content ), $witness_map );
301 # Fall back to the old method.
302 warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
303 . "; falling back to perl method";
304 $answer = perl_solver( $stemma, @$groupings );
307 # Fold the result back into what we know about the groups.
309 my $genealogical = 0;
310 foreach my $idx ( 0 .. $#groups ) {
311 my( $calc_groups, $result ) = @{$answer->[$idx]};
314 # Prune the calculated groups, in case the IDP solver failed to.
317 foreach my $cg ( @$calc_groups ) {
318 my @pg = _prune_group( $cg, $stemma );
319 push( @pruned_groups, \@pg );
321 $calc_groups = \@pruned_groups;
324 my $input_group = $groups[$idx];
325 foreach my $k ( sort keys %$input_group ) {
326 my $cg = shift @$calc_groups;
327 $input_group->{$k} = $cg;
330 'genealogical' => $result,
333 foreach my $k ( keys %$input_group ) {
334 push( @{$vstruct->{'readings'}},
335 { 'readingid' => $k, 'group' => $input_group->{$k}} );
337 push( @$variants, $vstruct );
340 return { 'variants' => $variants,
341 'variant_count' => scalar @$variants,
342 'genealogical_count' => $genealogical };
345 #### HACKERY to cope with IDP's limited idea of what a node name looks like ###
347 sub _safe_wit_strings {
348 my( $stemma, $groupings, $witness_map ) = @_;
349 my $safegraph = Graph->new();
350 # Convert the graph to a safe representation and store the conversion.
351 foreach my $n ( $stemma->graph->vertices ) {
352 my $sn = _safe_witstr( $n );
353 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
354 if exists $witness_map->{$sn};
355 $witness_map->{$sn} = $n;
356 $safegraph->add_vertex( $sn );
357 $safegraph->set_vertex_attributes( $sn,
358 $stemma->graph->get_vertex_attributes( $n ) );
360 foreach my $e ( $stemma->graph->edges ) {
361 my @safe_e = ( _safe_witstr( $e->[0] ), _safe_witstr( $e->[1] ) );
362 $safegraph->add_edge( @safe_e );
364 my $safe_stemma = Text::Tradition::Stemma->new(
365 'collation' => $stemma->collation, 'graph' => $safegraph );
367 # Now convert the witness groupings to a safe representation.
368 my $safe_groupings = [];
369 foreach my $grouping ( @$groupings ) {
370 my $safe_grouping = [];
371 foreach my $group ( @$grouping ) {
373 foreach my $n ( @$group ) {
374 my $sn = _safe_witstr( $n );
375 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
376 if exists $witness_map->{$sn} && $witness_map->{$sn} ne $n;
377 $witness_map->{$sn} = $n;
378 push( @$safe_group, $sn );
380 push( @$safe_grouping, $safe_group );
382 push( @$safe_groupings, $safe_grouping );
385 # Return it all in the struct we expect. We have stored the reductions
386 # in the $witness_map that we were passed.
387 return { 'graph' => $safe_stemma->editable( ' ' ), 'groupings' => $safe_groupings };
392 $witstr =~ s/\s+/_/g;
393 $witstr =~ s/[^\w\d-]//g;
397 sub _desanitize_names {
398 my( $jsonstruct, $witness_map ) = @_;
400 foreach my $grouping ( @$jsonstruct ) {
401 my $real_grouping = [];
402 foreach my $element ( @$grouping ) {
403 if( ref( $element ) eq 'ARRAY' ) {
405 my $real_groupset = [];
406 foreach my $group ( @$element ) {
408 foreach my $n ( @$group ) {
409 my $rn = $witness_map->{$n};
410 push( @$real_group, $rn );
412 push( @$real_groupset, $real_group );
414 push( @$real_grouping, $real_groupset );
416 # It is the boolean, not actually a group.
417 push( @$real_grouping, $element );
420 push( @$result, $real_grouping );
427 =head2 analyze_location ( $tradition, $graph, $location_hash )
429 Given the tradition, its stemma graph, and the solution from the graph solver,
430 work out the rest of the information we want. For each reading we need missing,
431 conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown. Alters the location_hash in place.
435 sub analyze_location {
436 my ( $tradition, $graph, $variant_row ) = @_;
438 # Make a hash of all known node memberships, and make the subgraphs.
440 my $reading_roots = {};
442 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
443 my $rid = $rdghash->{'readingid'};
444 map { $contig->{$_} = $rid } @{$rdghash->{'group'}};
447 my $part = $graph->copy;
449 map { $these_vertices{$_} = 1 } @{$rdghash->{'group'}};
450 $part->delete_vertices( grep { !$these_vertices{$_} } $part->vertices );
451 $subgraph->{$rid} = $part;
452 # Get the reading roots.
453 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
456 # Now that we have all the node group memberships, calculate followed/
457 # non-followed/unknown values for each reading. Also figure out the
458 # reading's evident parent(s).
459 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
460 # Group string key - TODO do we need this?
461 my $gst = wit_stringify( $rdghash->{'group'} );
462 my $rid = $rdghash->{'readingid'};
464 my $part = $subgraph->{$rid};
466 # Start figuring things out.
467 my @roots = $part->predecessorless_vertices;
468 $rdghash->{'independent_occurrence'} = scalar @roots;
469 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
470 # Find the parent readings, if any, of this reading.
472 foreach my $wit ( @roots ) {
473 # Look in the main stemma to find this witness's extant or known-reading
474 # immediate ancestor(s), and look up the reading that each ancestor olds.
475 my @check = $graph->predecessors( $wit );
478 foreach my $wparent( @check ) {
479 my $preading = $contig->{$wparent};
481 $rdgparents{$preading} = 1;
483 push( @next, $graph->predecessors( $wparent ) );
489 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
491 # Find the number of times this reading was altered, and the number of
492 # times we're not sure.
493 my( %nofollow, %unknownfollow );
494 foreach my $wit ( $part->vertices ) {
495 foreach my $wchild ( $graph->successors( $wit ) ) {
496 next if $part->has_vertex( $wchild );
497 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
498 # It definitely changed here.
499 $nofollow{$wchild} = 1;
500 } elsif( !($contig->{$wchild}) ) {
501 # The child is a hypothetical node not definitely in
502 # any group. Answer is unknown.
503 $unknownfollow{$wchild} = 1;
504 } # else it's a non-root node in a known group, and therefore
505 # is presumed to have its reading from its group, not this link.
508 $rdghash->{'not_followed'} = keys %nofollow;
509 $rdghash->{'follow_unknown'} = keys %unknownfollow;
511 # Now say whether this reading represents a conflict.
512 unless( $variant_row->{'genealogical'} ) {
513 $rdghash->{'conflict'} = @roots != 1;
519 =head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
521 ** NOTE ** This method should hopefully not be called - it is not guaranteed
522 to be correct. Serves as a backup for the real solver.
524 Runs an analysis of the given tradition, at the location given in $rank,
525 against the graph of the stemma specified in $stemma_id. The argument
526 @merge_relationship_types is an optional list of relationship types for
527 which readings so related should be treated as equivalent.
529 Returns a nested array data structure as follows:
531 [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
533 where the group list is the array of arrays passed in for each element of @groups,
534 possibly with the addition of hypothetical readings.
540 my( $stemma, @groups ) = @_;
541 my $graph = $stemma->graph;
543 foreach my $g ( @groups ) {
544 push( @answer, _solve_variant_location( $graph, $g ) );
549 sub _solve_variant_location {
550 my( $graph, $groups ) = @_;
557 # Mark each ms as in its own group, first.
558 foreach my $g ( @$groups ) {
559 my $gst = wit_stringify( $g );
560 map { $contig->{$_} = $gst } @$g;
563 # Now for each unmarked node in the graph, initialize an array
564 # for possible group memberships. We will use this later to
565 # resolve potential conflicts.
566 map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
567 foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
568 my $gst = wit_stringify( $g ); # This is the group name
569 # Copy the graph, and delete all non-members from the new graph.
570 my $part = $graph->copy;
572 $part->delete_vertices(
573 grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
575 # Now look to see if our group is connected.
577 # We have to take directionality into account.
578 # How many root nodes do we have?
579 my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
580 $part->predecessorless_vertices;
581 # Assuming that @$g > 1, find the first root node that has at
582 # least one successor belonging to our group. If this reading
583 # is genealogical, there should be only one, but we will check
584 # that implicitly later.
585 foreach my $root ( @roots ) {
586 # Prune the tree to get rid of extraneous hypotheticals.
587 $root = _prune_subtree( $part, $root, $contig );
589 # Save this root for our group.
590 push( @group_roots, $root );
591 # Get all the successor nodes of our root.
594 # Dispense with the trivial case of one reading.
596 @group_roots = ( $wit );
597 foreach my $v ( $part->vertices ) {
598 $part->delete_vertex( $v ) unless $v eq $wit;
602 if( @group_roots > 1 ) {
603 $conflict->{$gst} = 1;
606 # Paint the 'hypotheticals' with our group.
607 foreach my $wit ( $part->vertices ) {
608 if( ref( $contig->{$wit} ) ) {
609 push( @{$contig->{$wit}}, $gst );
610 } elsif( $contig->{$wit} ne $gst ) {
611 warn "How did we get here?";
616 # Save the relevant subgraph.
617 $subgraph->{$gst} = $part;
620 # For each of our hypothetical readings, flatten its 'contig' array if
621 # the array contains zero or one group. If we have any unflattened arrays,
622 # we may need to run the resolution process. If the reading is already known
623 # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
626 foreach my $wit ( keys %$contig ) {
627 next unless ref( $contig->{$wit} );
628 if( @{$contig->{$wit}} > 1 ) {
629 if( $is_conflicted ) {
630 $contig->{$wit} = ''; # We aren't going to decide.
632 push( @resolve, $wit );
635 my $gst = pop @{$contig->{$wit}};
636 $contig->{$wit} = $gst || '';
641 my $still_contig = {};
642 foreach my $h ( @resolve ) {
643 # For each of the hypothetical readings with more than one possibility,
644 # try deleting it from each of its member subgraphs in turn, and see
645 # if that breaks the contiguous grouping.
646 # TODO This can still break in a corner case where group A can use
647 # either vertex 1 or 2, and group B can use either vertex 2 or 1.
648 # Revisit this if necessary; it could get brute-force nasty.
649 foreach my $gst ( @{$contig->{$h}} ) {
650 my $gpart = $subgraph->{$gst}->copy();
651 # If we have come this far, there is only one root and everything
652 # is reachable from it.
653 my( $root ) = $gpart->predecessorless_vertices;
655 map { $reachable->{$_} = 1 } $gpart->vertices;
657 # Try deleting the hypothetical node.
658 $gpart->delete_vertex( $h );
660 # See if we still have a single root.
661 my @roots = $gpart->predecessorless_vertices;
662 warn "This shouldn't have happened" unless @roots;
664 # $h is needed by this group.
665 if( exists( $still_contig->{$h} ) ) {
667 $conflict->{$gst} = 1;
668 $still_contig->{$h} = '';
670 $still_contig->{$h} = $gst;
674 # $h is somewhere in the middle. See if everything
675 # else can still be reached from the root.
676 my %still_reachable = ( $root => 1 );
677 map { $still_reachable{$_} = 1 }
678 $gpart->all_successors( $root );
679 foreach my $v ( keys %$reachable ) {
681 if( !$still_reachable{$v}
682 && ( $contig->{$v} eq $gst
683 || ( exists $still_contig->{$v}
684 && $still_contig->{$v} eq $gst ) ) ) {
686 if( exists $still_contig->{$h} ) {
688 $conflict->{$gst} = 1;
689 $still_contig->{$h} = '';
691 $still_contig->{$h} = $gst;
694 } # else we don't need $h in this group.
696 } # endif $h eq $root
700 # Now we have some hypothetical vertices in $still_contig that are the
701 # "real" group memberships. Replace these in $contig.
702 foreach my $v ( keys %$contig ) {
703 next unless ref $contig->{$v};
704 $contig->{$v} = $still_contig->{$v};
708 my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
709 my $variant_row = [ [], $is_genealogical ];
710 # Fill in the groupings from $contig.
711 foreach my $g ( @$groups ) {
712 my $gst = wit_stringify( $g );
713 my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
714 push( @{$variant_row->[0]}, \@realgroup );
720 my( $group, $stemma ) = @_;
721 # Get these into a form prune_subtree will recognize. Make a "contighash"
723 map { $hypohash->{$_} = 1 } @$group;
724 # ...with reference values for hypotheticals.
725 map { $hypohash->{$_} = [] } $stemma->hypotheticals;
727 my $subgraph = $stemma->graph->copy;
728 map { $subgraph->delete_vertex( $_ ) unless exists $hypohash->{$_} }
730 # ...and find the root.
731 my( $root ) = $subgraph->predecessorless_vertices;
732 # Now prune and return the remaining vertices.
733 _prune_subtree( $subgraph, $root, $hypohash );
734 return $subgraph->vertices;
738 my( $tree, $root, $contighash ) = @_;
739 # First, delete hypothetical leaves / orphans until there are none left.
740 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
741 $tree->successorless_vertices;
742 while( @orphan_hypotheticals ) {
743 $tree->delete_vertices( @orphan_hypotheticals );
744 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
745 $tree->successorless_vertices;
747 # Then delete a hypothetical root with only one successor, moving the
748 # root to the first child that has no other predecessors.
749 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
750 my @nextroot = $tree->successors( $root );
751 $tree->delete_vertex( $root );
752 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
754 # The tree has been modified in place, but we need to know the new root.
755 $root = undef unless $root && $tree->has_vertex( $root );
758 # Add the variant, subject to a.c. representation logic.
759 # This assumes that we will see the 'main' version before the a.c. version.
760 sub add_variant_wit {
761 my( $arr, $wit, $acstr ) = @_;
763 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
765 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
767 push( @$arr, $wit ) unless $skip;
770 sub _useful_variant {
771 my( $group_readings, $graph, $acstr ) = @_;
773 # TODO Decide what to do with AC witnesses
775 # Sort by group size and return
777 my( @readings, @groups ); # The sorted groups for our answer.
778 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
779 keys %$group_readings ) {
780 push( @readings, $rdg );
781 push( @groups, $group_readings->{$rdg} );
782 if( @{$group_readings->{$rdg}} > 1 ) {
785 my( $wit ) = @{$group_readings->{$rdg}};
786 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
787 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
790 if( $is_useful > 1 ) {
791 return( \@readings, \@groups );
797 =head2 wit_stringify( $groups )
799 Takes an array of witness groupings and produces a string like
800 ['A','B'] / ['C','D','E'] / ['F']
807 # If we were passed an array of witnesses instead of an array of
808 # groupings, then "group" the witnesses first.
809 unless( ref( $groups->[0] ) ) {
810 my $mkgrp = [ $groups ];
813 foreach my $g ( @$groups ) {
814 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
816 return join( ' / ', @gst );
819 # Helper function to ensure that X and X a.c. never appear in the same list.
820 sub _add_to_witlist {
821 my( $wit, $list, $acstr ) = @_;
824 map { $inlist{$_} = $idx++ } @$list;
825 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
827 unless( exists $inlist{$acwit} ) {
828 push( @$list, $acwit.$acstr );
831 if( exists( $inlist{$wit.$acstr} ) ) {
832 # Replace the a.c. version with the main witness
833 my $i = $inlist{$wit.$acstr};
836 push( @$list, $wit );
842 my( $lista, $listb ) = @_;
845 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
846 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
847 my @set = grep { $union{$_} == 1 } keys %union;
848 return map { $scalars{$_} } @set;
855 This package is free software and is provided "as is" without express
856 or implied warranty. You can redistribute it and/or modify it under
857 the same terms as Perl itself.
861 Tara L Andrews E<lt>aurum@cpan.orgE<gt>