1 package Text::Tradition::Analysis;
6 use Encode qw/ encode_utf8 /;
9 use JSON qw/ encode_json decode_json /;
12 use Text::Tradition::Stemma;
14 use vars qw/ @EXPORT_OK /;
15 @EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
19 Text::Tradition::Analysis - functions for stemma analysis of a tradition
24 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
25 my $t = Text::Tradition->new(
26 'name' => 'this is a text',
28 'file' => '/path/to/tei_parallel_seg_file.xml' );
29 $t->add_stemma( 'dotfile' => $stemmafile );
31 my $variant_data = run_analysis( $tradition );
32 # Recalculate rank $n treating all orthographic variants as equivalent
33 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
37 Text::Tradition is a library for representation and analysis of collated
38 texts, particularly medieval ones. The Collation is the central feature of
39 a Tradition, where the text, its sequence of readings, and its relationships
40 between readings are actually kept.
44 =head2 run_analysis( $tradition, %opts )
46 Runs the analysis described in analyze_variant_location on every location in the
47 collation of the given tradition, with the given options. These include:
51 =item * stemma_id - Specify which of the tradition's stemmata to use. Default
52 is 0 (i.e. the first).
54 =item * ranks - Specify a list of location ranks to analyze; exclude the rest.
56 =item * merge_types - Specify a list of relationship types, where related readings
57 should be treated as identical for the purposes of analysis.
59 =item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
66 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
68 my $datafile = 't/data/florilegium_tei_ps.xml';
69 my $tradition = Text::Tradition->new( 'input' => 'TEI',
71 'file' => $datafile );
72 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
73 is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
75 my %expected_genealogical = (
106 my $data = run_analysis( $tradition );
107 my $c = $tradition->collation;
108 foreach my $row ( @{$data->{'variants'}} ) {
109 # Account for rows that used to be "not useful"
110 unless( exists $expected_genealogical{$row->{'id'}} ) {
111 $expected_genealogical{$row->{'id'}} = 1;
113 is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
114 "Got correct genealogical flag for row " . $row->{'id'} );
115 # Check that we have the right row with the right groups
116 my $rank = $row->{'id'};
117 foreach my $rdghash ( @{$row->{'readings'}} ) {
118 # Skip 'readings' that aren't really
119 next unless $c->reading( $rdghash->{'readingid'} );
121 is( $c->reading( $rdghash->{'readingid'} )->rank, $rank,
122 "Got correct reading rank" );
123 # Check the witnesses
124 my @realwits = sort $c->reading_witnesses( $rdghash->{'readingid'} );
125 my @sgrp = sort @{$rdghash->{'group'}};
126 is_deeply( \@sgrp, \@realwits, "Reading analyzed with correct groups" );
129 is( $data->{'variant_count'}, 58, "Got right total variant number" );
130 # TODO Make something meaningful of conflict count, maybe test other bits
137 my( $tradition, %opts ) = @_;
138 my $c = $tradition->collation;
140 my $stemma_id = $opts{'stemma_id'} || 0;
141 my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
142 my @collapse = ref( $opts{'merge_types'} ) eq 'ARRAY' ? @{$opts{'merge_types'}} : ();
145 my $stemma = $tradition->stemma( $stemma_id );
147 # Figure out which witnesses we are working with - that is, the ones that
148 # appear both in the stemma and in the tradition. All others are 'lacunose'
150 my @lacunose = $stemma->hypotheticals;
151 my @tradition_wits = map { $_->sigil } $tradition->witnesses;
152 map { push( @tradition_wits, $_->sigil.$c->ac_label ) if $_->is_layered }
153 $tradition->witnesses;
154 push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
156 # Find and mark 'common' ranks for exclusion, unless they were
157 # explicitly specified.
160 foreach my $rdg ( $c->common_readings ) {
161 $common_rank{$rdg->rank} = 1;
163 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
166 # Group the variants to send to the solver
170 foreach my $rank ( @ranks ) {
171 my $missing = [ @lacunose ];
172 my $rankgroup = group_variants( $tradition, $rank, $missing, \@collapse );
173 if( $opts{'exclude_type1'} ) {
174 # Check to see whether this is a "useful" group.
175 my( $rdgs, $grps ) = _useful_variant( $rankgroup,
176 $stemma->graph, $c->ac_label );
179 push( @use_ranks, $rank );
180 push( @groups, $rankgroup );
181 $lacunae{$rank} = $missing;
184 my $answer = solve_variants( $stemma, @groups );
186 # Do further analysis on the answer
187 my $conflict_count = 0;
188 my $aclabel = $c->ac_label;
189 foreach my $idx ( 0 .. $#use_ranks ) {
190 my $location = $answer->{'variants'}->[$idx];
191 # Add the rank back in
192 $location->{'id'} = $use_ranks[$idx];
193 $DB::single = 1 if $use_ranks[$idx] == 87;
194 # Note what our lacunae are
196 map { $lmiss{$_} = 1 } @{$lacunae{$use_ranks[$idx]}};
197 # Run through the reading groups and add as 'lacunae' any redundant
198 # a.c. witnesses (yes, we have to do this before the analysis, thus
199 # identical loops before and after. Boo.)
200 # TODO Consider making these callbacks to analyze_location
201 foreach my $rdghash ( @{$location->{'readings'}} ) {
203 map { $rwits{$_} = 1 } @{$rdghash->{'group'}};
204 foreach my $rw ( keys %rwits ) {
205 if( $rw =~ /^(.*)\Q$aclabel\E$/ ) {
206 if( exists $rwits{$1} ) {
212 $rdghash->{'group'} = [ keys %rwits ];
214 $location->{'missing'} = [ keys %lmiss ];
216 # Run the extra analysis we need.
217 analyze_location( $tradition, $stemma->graph, $location );
219 # Do the final post-analysis tidying up of the data.
220 foreach my $rdghash ( @{$location->{'readings'}} ) {
222 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
223 # Add the reading text back in
224 my $rdg = $c->reading( $rdghash->{'readingid'} );
225 $rdghash->{'text'} = $rdg ? $rdg->text : $rdghash->{'readingid'};
226 # Remove lacunose witnesses from this reading's list now that the
229 map { push( @realgroup, $_ ) unless $lmiss{$_} } @{$rdghash->{'group'}};
230 $rdghash->{'group'} = \@realgroup;
231 # TODO Record hypotheticals used to create group, if we end up
235 $answer->{'conflict_count'} = $conflict_count;
240 =head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
242 Groups the variants at the given $rank of the collation, treating any
243 relationships in @merge_relationship_types as equivalent. $lacunose should
244 be a reference to an array, to which the sigla of lacunose witnesses at this
245 rank will be appended.
247 Returns a hash $group_readings where $rdg is attested by the witnesses listed
248 in $group_readings->{$rdg}.
252 # Return group_readings, groups, lacunose
254 my( $tradition, $rank, $lacunose, $collapse ) = @_;
255 my $c = $tradition->collation;
256 my $aclabel = $c->ac_label;
258 map { $seen_acwits{$_->sigil.$aclabel} = 0 if $_->is_layered } $tradition->witnesses;
259 # Get the alignment table readings
260 my %readings_at_rank;
261 my %is_lacunose; # lookup table for $lacunose
262 map { $is_lacunose{$_} = 1 } @$lacunose;
264 foreach my $tablewit ( @{$c->alignment_table->{'alignment'}} ) {
265 my $rdg = $tablewit->{'tokens'}->[$rank-1];
266 my $wit = $tablewit->{'witness'};
267 # Exclude the witness if it is "lacunose" which if we got here
268 # means "not in the stemma".
269 next if $is_lacunose{$wit};
270 if( $rdg && $rdg->{'t'}->is_lacuna ) {
271 push( @$lacunose, $wit );
273 $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
275 $seen_acwits{$wit} = 1 if exists $seen_acwits{$wit};
276 push( @gap_wits, $wit );
280 # Group the readings, collapsing groups by relationship if needed
281 my %grouped_readings;
282 foreach my $rdg ( values %readings_at_rank ) {
283 # Skip readings that have been collapsed into others.
284 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
285 # Get the witness list, including from readings collapsed into this one.
286 my @wits = $rdg->witnesses;
288 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
289 foreach my $other ( $rdg->related_readings( $filter ) ) {
290 my @otherwits = $other->witnesses;
291 push( @wits, @otherwits );
292 $grouped_readings{$other->id} = 0;
295 # Filter the group to those witnesses in the stemma, and note any
296 # a.c. witnesses explicitly returned.
298 foreach my $wit ( @wits ) {
299 next if $is_lacunose{$wit};
300 push( @use_wits, $wit );
301 $seen_acwits{$wit} = 1 if exists $seen_acwits{$wit};
303 $grouped_readings{$rdg->id} = \@use_wits;
305 $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
306 # Get rid of our collapsed readings
307 map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
308 keys %grouped_readings
310 # Any unseen a.c. witnesses should be made lacunose
311 map { push( @$lacunose, $_ ) unless $seen_acwits{$_} } keys %seen_acwits;
314 return \%grouped_readings;
317 =head2 solve_variants( $graph, @groups )
319 Sends the set of groups to the external graph solver service and returns
320 a cleaned-up answer, adding the rank IDs back where they belong.
322 The JSON has the form
323 { "graph": [ stemmagraph DOT string without newlines ],
324 "groupings": [ array of arrays of groups, one per rank ] }
326 The answer has the form
327 { "variants" => [ array of variant location structures ],
328 "variant_count" => total,
329 "conflict_count" => number of conflicts detected,
330 "genealogical_count" => number of solutions found }
335 my( $stemma, @groups ) = @_;
337 # Make the json with stemma + groups
339 foreach my $ghash ( @groups ) {
341 foreach my $k ( sort keys %$ghash ) {
342 push( @grouping, $ghash->{$k} );
344 push( @$groupings, \@grouping );
346 ## Witness map is a HACK to get around limitations in node names from IDP
347 my $witness_map = {};
348 my $json = encode_json( _safe_wit_strings( $stemma, $groupings, $witness_map ) );
350 # Send it off and get the result
351 my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
352 my $ua = LWP::UserAgent->new();
353 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
354 'Content' => $json );
358 if( $resp->is_success ) {
359 $answer = _desanitize_names( decode_json( $resp->content ), $witness_map );
362 # Fall back to the old method.
363 warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
364 . "; falling back to perl method";
365 $answer = perl_solver( $stemma, @$groupings );
368 # Fold the result back into what we know about the groups.
370 my $genealogical = 0;
371 foreach my $idx ( 0 .. $#groups ) {
372 my( $calc_groups, $result ) = @{$answer->[$idx]};
375 # Prune the calculated groups, in case the IDP solver failed to.
378 foreach my $cg ( @$calc_groups ) {
379 my @pg = _prune_group( $cg, $stemma );
380 push( @pruned_groups, \@pg );
382 $calc_groups = \@pruned_groups;
385 my $input_group = $groups[$idx];
386 foreach my $k ( sort keys %$input_group ) {
387 my $cg = shift @$calc_groups;
388 $input_group->{$k} = $cg;
391 'genealogical' => $result,
394 foreach my $k ( sort { @{$input_group->{$b}} <=> @{$input_group->{$a}} }
395 keys %$input_group ) {
396 push( @{$vstruct->{'readings'}},
397 { 'readingid' => $k, 'group' => $input_group->{$k}} );
399 push( @$variants, $vstruct );
402 return { 'variants' => $variants,
403 'variant_count' => scalar @$variants,
404 'genealogical_count' => $genealogical };
407 #### HACKERY to cope with IDP's limited idea of what a node name looks like ###
409 sub _safe_wit_strings {
410 my( $stemma, $groupings, $witness_map ) = @_;
411 my $safegraph = Graph->new();
412 # Convert the graph to a safe representation and store the conversion.
413 foreach my $n ( $stemma->graph->vertices ) {
414 my $sn = _safe_witstr( $n );
415 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
416 if exists $witness_map->{$sn};
417 $witness_map->{$sn} = $n;
418 $safegraph->add_vertex( $sn );
419 $safegraph->set_vertex_attributes( $sn,
420 $stemma->graph->get_vertex_attributes( $n ) );
422 foreach my $e ( $stemma->graph->edges ) {
423 my @safe_e = ( _safe_witstr( $e->[0] ), _safe_witstr( $e->[1] ) );
424 $safegraph->add_edge( @safe_e );
426 my $safe_stemma = Text::Tradition::Stemma->new(
427 'collation' => $stemma->collation, 'graph' => $safegraph );
429 # Now convert the witness groupings to a safe representation.
430 my $safe_groupings = [];
431 foreach my $grouping ( @$groupings ) {
432 my $safe_grouping = [];
433 foreach my $group ( @$grouping ) {
435 foreach my $n ( @$group ) {
436 my $sn = _safe_witstr( $n );
437 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
438 if exists $witness_map->{$sn} && $witness_map->{$sn} ne $n;
439 $witness_map->{$sn} = $n;
440 push( @$safe_group, $sn );
442 push( @$safe_grouping, $safe_group );
444 push( @$safe_groupings, $safe_grouping );
447 # Return it all in the struct we expect. We have stored the reductions
448 # in the $witness_map that we were passed.
449 return { 'graph' => $safe_stemma->editable( ' ' ), 'groupings' => $safe_groupings };
454 $witstr =~ s/\s+/_/g;
455 $witstr =~ s/[^\w\d-]//g;
459 sub _desanitize_names {
460 my( $jsonstruct, $witness_map ) = @_;
462 foreach my $grouping ( @$jsonstruct ) {
463 my $real_grouping = [];
464 foreach my $element ( @$grouping ) {
465 if( ref( $element ) eq 'ARRAY' ) {
467 my $real_groupset = [];
468 foreach my $group ( @$element ) {
470 foreach my $n ( @$group ) {
471 my $rn = $witness_map->{$n};
472 push( @$real_group, $rn );
474 push( @$real_groupset, $real_group );
476 push( @$real_grouping, $real_groupset );
478 # It is the boolean, not actually a group.
479 push( @$real_grouping, $element );
482 push( @$result, $real_grouping );
489 =head2 analyze_location ( $tradition, $graph, $location_hash )
491 Given the tradition, its stemma graph, and the solution from the graph solver,
492 work out the rest of the information we want. For each reading we need missing,
493 conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown. Alters the location_hash in place.
497 sub analyze_location {
498 my ( $tradition, $graph, $variant_row ) = @_;
500 # Make a hash of all known node memberships, and make the subgraphs.
502 my $reading_roots = {};
504 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
505 my $rid = $rdghash->{'readingid'};
506 map { $contig->{$_} = $rid } @{$rdghash->{'group'}};
509 my $part = $graph->copy;
511 map { $these_vertices{$_} = 1 } @{$rdghash->{'group'}};
512 $part->delete_vertices( grep { !$these_vertices{$_} } $part->vertices );
513 $subgraph->{$rid} = $part;
514 # Get the reading roots.
515 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
518 # Now that we have all the node group memberships, calculate followed/
519 # non-followed/unknown values for each reading. Also figure out the
520 # reading's evident parent(s).
521 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
522 # Group string key - TODO do we need this?
523 my $gst = wit_stringify( $rdghash->{'group'} );
524 my $rid = $rdghash->{'readingid'};
526 my $part = $subgraph->{$rid};
528 # Start figuring things out.
529 my @roots = $part->predecessorless_vertices;
530 $rdghash->{'independent_occurrence'} = scalar @roots;
531 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
532 # Find the parent readings, if any, of this reading.
534 foreach my $wit ( @roots ) {
535 # Look in the main stemma to find this witness's extant or known-reading
536 # immediate ancestor(s), and look up the reading that each ancestor olds.
537 my @check = $graph->predecessors( $wit );
540 foreach my $wparent( @check ) {
541 my $preading = $contig->{$wparent};
543 $rdgparents{$preading} = 1;
545 push( @next, $graph->predecessors( $wparent ) );
551 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
553 # Find the number of times this reading was altered, and the number of
554 # times we're not sure.
555 my( %nofollow, %unknownfollow );
556 foreach my $wit ( $part->vertices ) {
557 foreach my $wchild ( $graph->successors( $wit ) ) {
558 next if $part->has_vertex( $wchild );
559 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
560 # It definitely changed here.
561 $nofollow{$wchild} = 1;
562 } elsif( !($contig->{$wchild}) ) {
563 # The child is a hypothetical node not definitely in
564 # any group. Answer is unknown.
565 $unknownfollow{$wchild} = 1;
566 } # else it's a non-root node in a known group, and therefore
567 # is presumed to have its reading from its group, not this link.
570 $rdghash->{'not_followed'} = keys %nofollow;
571 $rdghash->{'follow_unknown'} = keys %unknownfollow;
573 # Now say whether this reading represents a conflict.
574 unless( $variant_row->{'genealogical'} ) {
575 $rdghash->{'conflict'} = @roots != 1;
581 =head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
583 ** NOTE ** This method should hopefully not be called - it is not guaranteed
584 to be correct. Serves as a backup for the real solver.
586 Runs an analysis of the given tradition, at the location given in $rank,
587 against the graph of the stemma specified in $stemma_id. The argument
588 @merge_relationship_types is an optional list of relationship types for
589 which readings so related should be treated as equivalent.
591 Returns a nested array data structure as follows:
593 [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
595 where the group list is the array of arrays passed in for each element of @groups,
596 possibly with the addition of hypothetical readings.
602 my( $stemma, @groups ) = @_;
603 my $graph = $stemma->graph;
605 foreach my $g ( @groups ) {
606 push( @answer, _solve_variant_location( $graph, $g ) );
611 sub _solve_variant_location {
612 my( $graph, $groups ) = @_;
619 # Mark each ms as in its own group, first.
620 foreach my $g ( @$groups ) {
621 my $gst = wit_stringify( $g );
622 map { $contig->{$_} = $gst } @$g;
625 # Now for each unmarked node in the graph, initialize an array
626 # for possible group memberships. We will use this later to
627 # resolve potential conflicts.
628 map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
629 foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
630 my $gst = wit_stringify( $g ); # This is the group name
631 # Copy the graph, and delete all non-members from the new graph.
632 my $part = $graph->copy;
634 $part->delete_vertices(
635 grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
637 # Now look to see if our group is connected.
639 # We have to take directionality into account.
640 # How many root nodes do we have?
641 my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
642 $part->predecessorless_vertices;
643 # Assuming that @$g > 1, find the first root node that has at
644 # least one successor belonging to our group. If this reading
645 # is genealogical, there should be only one, but we will check
646 # that implicitly later.
647 foreach my $root ( @roots ) {
648 # Prune the tree to get rid of extraneous hypotheticals.
649 $root = _prune_subtree( $part, $root, $contig );
651 # Save this root for our group.
652 push( @group_roots, $root );
653 # Get all the successor nodes of our root.
656 # Dispense with the trivial case of one reading.
658 @group_roots = ( $wit );
659 foreach my $v ( $part->vertices ) {
660 $part->delete_vertex( $v ) unless $v eq $wit;
664 if( @group_roots > 1 ) {
665 $conflict->{$gst} = 1;
668 # Paint the 'hypotheticals' with our group.
669 foreach my $wit ( $part->vertices ) {
670 if( ref( $contig->{$wit} ) ) {
671 push( @{$contig->{$wit}}, $gst );
672 } elsif( $contig->{$wit} ne $gst ) {
673 warn "How did we get here?";
678 # Save the relevant subgraph.
679 $subgraph->{$gst} = $part;
682 # For each of our hypothetical readings, flatten its 'contig' array if
683 # the array contains zero or one group. If we have any unflattened arrays,
684 # we may need to run the resolution process. If the reading is already known
685 # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
688 foreach my $wit ( keys %$contig ) {
689 next unless ref( $contig->{$wit} );
690 if( @{$contig->{$wit}} > 1 ) {
691 if( $is_conflicted ) {
692 $contig->{$wit} = ''; # We aren't going to decide.
694 push( @resolve, $wit );
697 my $gst = pop @{$contig->{$wit}};
698 $contig->{$wit} = $gst || '';
703 my $still_contig = {};
704 foreach my $h ( @resolve ) {
705 # For each of the hypothetical readings with more than one possibility,
706 # try deleting it from each of its member subgraphs in turn, and see
707 # if that breaks the contiguous grouping.
708 # TODO This can still break in a corner case where group A can use
709 # either vertex 1 or 2, and group B can use either vertex 2 or 1.
710 # Revisit this if necessary; it could get brute-force nasty.
711 foreach my $gst ( @{$contig->{$h}} ) {
712 my $gpart = $subgraph->{$gst}->copy();
713 # If we have come this far, there is only one root and everything
714 # is reachable from it.
715 my( $root ) = $gpart->predecessorless_vertices;
717 map { $reachable->{$_} = 1 } $gpart->vertices;
719 # Try deleting the hypothetical node.
720 $gpart->delete_vertex( $h );
722 # See if we still have a single root.
723 my @roots = $gpart->predecessorless_vertices;
724 warn "This shouldn't have happened" unless @roots;
726 # $h is needed by this group.
727 if( exists( $still_contig->{$h} ) ) {
729 $conflict->{$gst} = 1;
730 $still_contig->{$h} = '';
732 $still_contig->{$h} = $gst;
736 # $h is somewhere in the middle. See if everything
737 # else can still be reached from the root.
738 my %still_reachable = ( $root => 1 );
739 map { $still_reachable{$_} = 1 }
740 $gpart->all_successors( $root );
741 foreach my $v ( keys %$reachable ) {
743 if( !$still_reachable{$v}
744 && ( $contig->{$v} eq $gst
745 || ( exists $still_contig->{$v}
746 && $still_contig->{$v} eq $gst ) ) ) {
748 if( exists $still_contig->{$h} ) {
750 $conflict->{$gst} = 1;
751 $still_contig->{$h} = '';
753 $still_contig->{$h} = $gst;
756 } # else we don't need $h in this group.
758 } # endif $h eq $root
762 # Now we have some hypothetical vertices in $still_contig that are the
763 # "real" group memberships. Replace these in $contig.
764 foreach my $v ( keys %$contig ) {
765 next unless ref $contig->{$v};
766 $contig->{$v} = $still_contig->{$v};
770 my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
771 my $variant_row = [ [], $is_genealogical ];
772 # Fill in the groupings from $contig.
773 foreach my $g ( @$groups ) {
774 my $gst = wit_stringify( $g );
775 my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
776 push( @{$variant_row->[0]}, \@realgroup );
782 my( $group, $stemma ) = @_;
783 # Get these into a form prune_subtree will recognize. Make a "contighash"
785 map { $hypohash->{$_} = 1 } @$group;
786 # ...with reference values for hypotheticals.
787 map { $hypohash->{$_} = [] } $stemma->hypotheticals;
789 my $subgraph = $stemma->graph->copy;
790 map { $subgraph->delete_vertex( $_ ) unless exists $hypohash->{$_} }
792 # ...and find the root.
793 my( $root ) = $subgraph->predecessorless_vertices;
794 # Now prune and return the remaining vertices.
795 _prune_subtree( $subgraph, $root, $hypohash );
796 return $subgraph->vertices;
800 my( $tree, $root, $contighash ) = @_;
801 # First, delete hypothetical leaves / orphans until there are none left.
802 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
803 $tree->successorless_vertices;
804 while( @orphan_hypotheticals ) {
805 $tree->delete_vertices( @orphan_hypotheticals );
806 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
807 $tree->successorless_vertices;
809 # Then delete a hypothetical root with only one successor, moving the
810 # root to the first child that has no other predecessors.
811 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
812 my @nextroot = $tree->successors( $root );
813 $tree->delete_vertex( $root );
814 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
816 # The tree has been modified in place, but we need to know the new root.
817 $root = undef unless $root && $tree->has_vertex( $root );
820 # Add the variant, subject to a.c. representation logic.
821 # This assumes that we will see the 'main' version before the a.c. version.
822 sub add_variant_wit {
823 my( $arr, $wit, $acstr ) = @_;
825 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
827 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
829 push( @$arr, $wit ) unless $skip;
832 sub _useful_variant {
833 my( $group_readings, $graph, $acstr ) = @_;
835 # TODO Decide what to do with AC witnesses
837 # Sort by group size and return
839 my( @readings, @groups ); # The sorted groups for our answer.
840 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
841 keys %$group_readings ) {
842 push( @readings, $rdg );
843 push( @groups, $group_readings->{$rdg} );
844 if( @{$group_readings->{$rdg}} > 1 ) {
847 my( $wit ) = @{$group_readings->{$rdg}};
848 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
849 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
852 if( $is_useful > 1 ) {
853 return( \@readings, \@groups );
859 =head2 wit_stringify( $groups )
861 Takes an array of witness groupings and produces a string like
862 ['A','B'] / ['C','D','E'] / ['F']
869 # If we were passed an array of witnesses instead of an array of
870 # groupings, then "group" the witnesses first.
871 unless( ref( $groups->[0] ) ) {
872 my $mkgrp = [ $groups ];
875 foreach my $g ( @$groups ) {
876 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
878 return join( ' / ', @gst );
882 my( $lista, $listb ) = @_;
885 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
886 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
887 my @set = grep { $union{$_} == 1 } keys %union;
888 return map { $scalars{$_} } @set;
895 This package is free software and is provided "as is" without express
896 or implied warranty. You can redistribute it and/or modify it under
897 the same terms as Perl itself.
901 Tara L Andrews E<lt>aurum@cpan.orgE<gt>