lib/Text/Tradition/Analysis.pm

   1 package Text::Tradition::Analysis;
   2
   3 use strict;
   4 use warnings;
   5 use Benchmark;
   6 use Encode qw/ encode_utf8 /;
   7 use Exporter 'import';
   8 use JSON qw/ encode_json decode_json /;
   9 use LWP::UserAgent;
  10 use Text::Tradition;
  11 use Text::Tradition::Stemma;
  12
  13 use vars qw/ @EXPORT_OK /;
  14 @EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
  15
  16 =head1 NAME
  17
  18 Text::Tradition::Analysis - functions for stemma analysis of a tradition
  19
  20 =head1 SYNOPSIS
  21
  22   use Text::Tradition;
  23   use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  24   my $t = Text::Tradition->new(
  25     'name' => 'this is a text',
  26     'input' => 'TEI',
  27     'file' => '/path/to/tei_parallel_seg_file.xml' );
  28   $t->add_stemma( 'dotfile' => $stemmafile );
  29
  30   my $variant_data = run_analysis( $tradition );
  31   # Recalculate rank $n treating all orthographic variants as equivalent
  32   my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
  33
  34 =head1 DESCRIPTION
  35
  36 Text::Tradition is a library for representation and analysis of collated
  37 texts, particularly medieval ones.  The Collation is the central feature of
  38 a Tradition, where the text, its sequence of readings, and its relationships
  39 between readings are actually kept.
  40
  41 =head1 SUBROUTINES
  42
  43 =head2 run_analysis( $tradition, %opts )
  44
  45 Runs the analysis described in analyze_variant_location on every location in the
  46 collation of the given tradition, with the given options. These include:
  47
  48 =over 4
  49
  50 =item * stemma_id - Specify which of the tradition's stemmata to use. Default
  51 is 0 (i.e. the first).
  52
  53 =item * ranks - Specify a list of location ranks to analyze; exclude the rest.
  54
  55 =item * merge_types - Specify a list of relationship types, where related readings
  56 should be treated as identical for the purposes of analysis.
  57
  58 =back
  59
  60 =begin testing
  61
  62 use Text::Tradition;
  63 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  64
  65 my $datafile = 't/data/florilegium_tei_ps.xml';
  66 my $tradition = Text::Tradition->new( 'input' => 'TEI',
  67                                       'name' => 'test0',
  68                                       'file' => $datafile );
  69 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
  70 is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
  71
  72 my %expected_genealogical = (
  73         1 => 0,
  74         2 => 1,
  75         3 =>  0,
  76         5 =>  0,
  77         7 =>  0,
  78         8 =>  0,
  79         10 => 0,
  80         13 => 1,
  81         33 => 0,
  82         34 => 0,
  83         37 => 0,
  84         60 => 0,
  85         81 => 1,
  86         84 => 0,
  87         87 => 0,
  88         101 => 0,
  89         102 => 0,
  90         122 => 1,
  91         157 => 0,
  92         166 => 1,
  93         169 => 1,
  94         200 => 0,
  95         216 => 1,
  96         217 => 1,
  97         219 => 1,
  98         241 => 1,
  99         242 => 1,
 100         243 => 1,
 101 );
 102
 103 my $data = run_analysis( $tradition );
 104 foreach my $row ( @{$data->{'variants'}} ) {
 105         # Account for rows that used to be "not useful"
 106         unless( exists $expected_genealogical{$row->{'id'}} ) {
 107                 $expected_genealogical{$row->{'id'}} = 1;
 108         }
 109         is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
 110                 "Got correct genealogical flag for row " . $row->{'id'} );
 111 }
 112 is( $data->{'conflict_count'}, 34, "Got right conflict count" );
 113 is( $data->{'variant_count'}, 58, "Got right total variant number" );
 114
 115 =end testing
 116
 117 =cut
 118
 119 sub run_analysis {
 120         my( $tradition, %opts ) = @_;
 121         my $c = $tradition->collation;
 122
 123         my $stemma_id = $opts{'stemma_id'} || 0;
 124         my @ranks = @{$opts{'ranks'}} if ref( $opts{'ranks'} ) eq 'ARRAY';
 125         my @collapse = @{$opts{'merge_types'}} if ref( $opts{'merge_types'} ) eq 'ARRAY';
 126
 127         # Get the stemma
 128         my $stemma = $tradition->stemma( $stemma_id );
 129         # Figure out which witnesses we are working with
 130         my @lacunose = $stemma->hypotheticals;
 131         my @tradition_wits = map { $_->sigil } $tradition->witnesses;
 132         map { push( @tradition_wits, $_->sigil."_ac" ) if $_->is_layered }
 133                 $tradition->witnesses;
 134         push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
 135
 136         # Find and mark 'common' ranks for exclusion, unless they were
 137         # explicitly specified.
 138         unless( @ranks ) {
 139                 my %common_rank;
 140                 foreach my $rdg ( $c->common_readings ) {
 141                         $common_rank{$rdg->rank} = 1;
 142                 }
 143                 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
 144         }
 145
 146         # Group the variants to send to the solver
 147         my @groups;
 148         my %lacunae;
 149         foreach my $rank ( @ranks ) {
 150                 my $missing = [ @lacunose ];
 151                 push( @groups, group_variants( $tradition, $rank, $missing, \@collapse ) );
 152                 $lacunae{$rank} = $missing;
 153         }
 154
 155         # Parse the answer
 156         my $answer = solve_variants( $stemma, @groups );
 157
 158         # Do further analysis on the answer
 159         my $conflict_count = 0;
 160         foreach my $idx ( 0 .. $#ranks ) {
 161                 my $location = $answer->{'variants'}->[$idx];
 162                 # Add the rank back in
 163                 $location->{'id'} = $ranks[$idx];
 164                 # Add the lacunae back in
 165                 $location->{'missing'} = $lacunae{$ranks[$idx]};
 166                 # Run the extra analysis we need.
 167                 analyze_location( $tradition, $stemma->graph, $location );
 168                 # Add the reading text back in
 169                 foreach my $rdghash ( @{$location->{'readings'}} ) {
 170                         $conflict_count++
 171                                 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
 172                         my $rdg = $c->reading( $rdghash->{'readingid'} );
 173                         $rdghash->{'text'} = $rdg ? $rdg->text : $rdghash->{'readingid'};
 174                 }
 175         }
 176         $answer->{'conflict_count'} = $conflict_count;
 177
 178         return $answer;
 179 }
 180
 181 =head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
 182
 183 Groups the variants at the given $rank of the collation, treating any
 184 relationships in @merge_relationship_types as equivalent.  $lacunose should
 185 be a reference to an array, to which the sigla of lacunose witnesses at this
 186 rank will be appended.
 187
 188 Returns two ordered lists $readings, $groups, where $readings->[$n] is attested
 189 by the witnesses listed in $groups->[$n].
 190
 191 =cut
 192
 193 # Return group_readings, groups, lacunose
 194 sub group_variants {
 195         my( $tradition, $rank, $lacunose, $collapse ) = @_;
 196         my $c = $tradition->collation;
 197         # All the regexps here are to get rid of space characters in witness names.
 198         my $aclabel = $c->ac_label;
 199         $aclabel =~ s/\s/_/g;
 200         # Get the alignment table readings
 201         my %readings_at_rank;
 202         my @gap_wits;
 203         foreach my $tablewit ( @{$tradition->collation->alignment_table->{'alignment'}} ) {
 204                 my $rdg = $tablewit->{'tokens'}->[$rank-1];
 205                 my $wit = $tablewit->{'witness'};
 206                 $wit =~ s/\s/_/g;
 207                 if( $rdg && $rdg->{'t'}->is_lacuna ) {
 208                         _add_to_witlist( $wit, $lacunose, $aclabel );
 209                 } elsif( $rdg ) {
 210                         $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
 211                 } else {
 212                         _add_to_witlist( $wit, \@gap_wits, $aclabel );
 213                 }
 214         }
 215
 216         # Group the readings, collapsing groups by relationship if needed
 217         my %grouped_readings;
 218         foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses } values %readings_at_rank ) {
 219                 # Skip readings that have been collapsed into others.
 220                 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
 221                 my @wits = $rdg->witnesses;
 222                 map { s/\s/_/g } @wits;
 223                 if( $collapse ) {
 224                         my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
 225                         foreach my $other ( $rdg->related_readings( $filter ) ) {
 226                                 my @otherwits = $other->witnesses;
 227                                 map { s/\s/_/g } @otherwits;
 228                                 push( @wits, @otherwits );
 229                                 $grouped_readings{$other->id} = 0;
 230                         }
 231                 }
 232                 $grouped_readings{$rdg->id} = \@wits;
 233         }
 234         $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
 235         # Get rid of our collapsed readings
 236         map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
 237                 keys %grouped_readings
 238                 if $collapse;
 239
 240         return \%grouped_readings;
 241 }
 242
 243 =head2 solve_variants( $graph, @groups )
 244
 245 Sends the set of groups to the external graph solver service and returns
 246 a cleaned-up answer, adding the rank IDs back where they belong.
 247
 248 The JSON has the form
 249   { "graph": [ stemmagraph DOT string without newlines ],
 250     "groupings": [ array of arrays of groups, one per rank ] }
 251
 252 The answer has the form
 253   { "variants" => [ array of variant location structures ],
 254     "variant_count" => total,
 255     "conflict_count" => number of conflicts detected,
 256     "genealogical_count" => number of solutions found }
 257
 258 =cut
 259
 260 sub solve_variants {
 261         my( $stemma, @groups ) = @_;
 262
 263         # Make the json with stemma + groups
 264         my $jsonstruct = { 'graph' => $stemma->editable( ' ' ), 'groupings' => [] };
 265         foreach my $ghash ( @groups ) {
 266                 my @grouping;
 267                 foreach my $k ( sort keys %$ghash ) {
 268                         push( @grouping, $ghash->{$k} );
 269                 }
 270                 push( @{$jsonstruct->{'groupings'}}, \@grouping );
 271         }
 272         my $json = encode_json( $jsonstruct );
 273
 274         # Send it off and get the result
 275         my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
 276         my $ua = LWP::UserAgent->new();
 277         my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
 278                                                   'Content' => $json );
 279
 280         my $answer;
 281         if( $resp->is_success ) {
 282                 $answer = decode_json( $resp->content );
 283         } else {
 284                 # Fall back to the old method.
 285                 warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
 286                         . "; falling back to perl method";
 287                 $answer = perl_solver( $stemma, @{$jsonstruct->{'groupings'}} );
 288         }
 289
 290         # Fold the result back into what we know about the groups.
 291         my $variants = [];
 292         my $genealogical = 0;
 293         foreach my $idx ( 0 .. $#groups ) {
 294                 my( $calc_groups, $result ) = @{$answer->[$idx]};
 295                 $genealogical++ if $result;
 296                 my $input_group = $groups[$idx];
 297                 foreach my $k ( sort keys %$input_group ) {
 298                         my $cg = shift @$calc_groups;
 299                         $input_group->{$k} = $cg;
 300                 }
 301                 my $vstruct = {
 302                         'genealogical' => $result,
 303                         'readings' => [],
 304                 };
 305                 foreach my $k ( keys %$input_group ) {
 306                         push( @{$vstruct->{'readings'}},
 307                                   { 'readingid' => $k, 'group' => $input_group->{$k}} );
 308                 }
 309                 push( @$variants, $vstruct );
 310         }
 311
 312         return { 'variants' => $variants,
 313                          'variant_count' => scalar @$variants,
 314                          'genealogical_count' => $genealogical };
 315 }
 316
 317 =head2 analyze_location ( $tradition, $graph, $location_hash )
 318
 319 Given the tradition, its stemma graph, and the solution from the graph solver,
 320 work out the rest of the information we want.  For each reading we need missing,
 321 conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown.  Alters the location_hash in place.
 322
 323 =cut
 324
 325 sub analyze_location {
 326         my ( $tradition, $graph, $variant_row ) = @_;
 327
 328         # Make a hash of all known node memberships, and make the subgraphs.
 329         my $contig = {};
 330         my $reading_roots = {};
 331         my $subgraph = {};
 332     foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
 333         my $rid = $rdghash->{'readingid'};
 334                 map { $contig->{$_} = $rid } @{$rdghash->{'group'}};
 335
 336         # Make the subgraph.
 337         my $part = $graph->copy;
 338                 my %these_vertices;
 339                 map { $these_vertices{$_} = 1 } @{$rdghash->{'group'}};
 340                 $part->delete_vertices( grep { !$these_vertices{$_} } $part->vertices );
 341                 $subgraph->{$rid} = $part;
 342                 # Get the reading roots.
 343                 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
 344         }
 345
 346         # Now that we have all the node group memberships, calculate followed/
 347     # non-followed/unknown values for each reading.  Also figure out the
 348     # reading's evident parent(s).
 349     foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
 350         # Group string key - TODO do we need this?
 351         my $gst = wit_stringify( $rdghash->{'group'} );
 352         my $rid = $rdghash->{'readingid'};
 353         # Get the subgraph
 354         my $part = $subgraph->{$rid};
 355
 356         # Start figuring things out.
 357         my @roots = $part->predecessorless_vertices;
 358         $rdghash->{'independent_occurrence'} = scalar @roots;
 359         $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
 360         # Find the parent readings, if any, of this reading.
 361         my %rdgparents;
 362         foreach my $wit ( @roots ) {
 363                 # Look in the main stemma to find this witness's extant or known-reading
 364                 # immediate ancestor(s), and look up the reading that each ancestor olds.
 365                         my @check = $graph->predecessors( $wit );
 366                         while( @check ) {
 367                                 my @next;
 368                                 foreach my $wparent( @check ) {
 369                                         my $preading = $contig->{$wparent};
 370                                         if( $preading ) {
 371                                                 $rdgparents{$preading} = 1;
 372                                         } else {
 373                                                 push( @next, $graph->predecessors( $wparent ) );
 374                                         }
 375                                 }
 376                                 @check = @next;
 377                         }
 378                 }
 379                 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
 380
 381                 # Find the number of times this reading was altered, and the number of
 382                 # times we're not sure.
 383                 my( %nofollow, %unknownfollow );
 384                 foreach my $wit ( $part->vertices ) {
 385                         foreach my $wchild ( $graph->successors( $wit ) ) {
 386                                 next if $part->has_vertex( $wchild );
 387                                 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
 388                                         # It definitely changed here.
 389                                         $nofollow{$wchild} = 1;
 390                                 } elsif( !($contig->{$wchild}) ) {
 391                                         # The child is a hypothetical node not definitely in
 392                                         # any group. Answer is unknown.
 393                                         $unknownfollow{$wchild} = 1;
 394                                 } # else it's a non-root node in a known group, and therefore
 395                                   # is presumed to have its reading from its group, not this link.
 396                         }
 397                 }
 398                 $rdghash->{'not_followed'} = keys %nofollow;
 399                 $rdghash->{'follow_unknown'} = keys %unknownfollow;
 400
 401                 # Now say whether this reading represents a conflict.
 402                 unless( $variant_row->{'genealogical'} ) {
 403                         $rdghash->{'conflict'} = @roots != 1;
 404                 }
 405     }
 406 }
 407
 408
 409 =head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
 410
 411 ** NOTE ** This method should hopefully not be called - it is not guaranteed
 412 to be correct.  Serves as a backup for the real solver.
 413
 414 Runs an analysis of the given tradition, at the location given in $rank,
 415 against the graph of the stemma specified in $stemma_id.  The argument
 416 @merge_relationship_types is an optional list of relationship types for
 417 which readings so related should be treated as equivalent.
 418
 419 Returns a nested array data structure as follows:
 420
 421  [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
 422
 423 where the group list is the array of arrays passed in for each element of @groups,
 424 possibly with the addition of hypothetical readings.
 425
 426
 427 =cut
 428
 429 sub perl_solver {
 430         my( $stemma, @groups ) = @_;
 431         my $graph = $stemma->graph;
 432         my @answer;
 433         foreach my $g ( @groups ) {
 434                 push( @answer, _solve_variant_location( $graph, $g ) );
 435         }
 436         return \@answer;
 437 }
 438
 439 sub _solve_variant_location {
 440         my( $graph, $groups ) = @_;
 441         # Now do the work.
 442     my $contig = {};
 443     my $subgraph = {};
 444     my $is_conflicted;
 445     my $conflict = {};
 446
 447     # Mark each ms as in its own group, first.
 448     foreach my $g ( @$groups ) {
 449         my $gst = wit_stringify( $g );
 450         map { $contig->{$_} = $gst } @$g;
 451     }
 452
 453     # Now for each unmarked node in the graph, initialize an array
 454     # for possible group memberships.  We will use this later to
 455     # resolve potential conflicts.
 456     map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
 457     foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
 458         my $gst = wit_stringify( $g );  # This is the group name
 459         # Copy the graph, and delete all non-members from the new graph.
 460         my $part = $graph->copy;
 461         my @group_roots;
 462         $part->delete_vertices(
 463             grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
 464
 465         # Now look to see if our group is connected.
 466                 if( @$g > 1 ) {
 467                         # We have to take directionality into account.
 468                         # How many root nodes do we have?
 469                         my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
 470                                 $part->predecessorless_vertices;
 471                         # Assuming that @$g > 1, find the first root node that has at
 472                         # least one successor belonging to our group. If this reading
 473                         # is genealogical, there should be only one, but we will check
 474                         # that implicitly later.
 475                         foreach my $root ( @roots ) {
 476                                 # Prune the tree to get rid of extraneous hypotheticals.
 477                                 $root = _prune_subtree( $part, $root, $contig );
 478                                 next unless $root;
 479                                 # Save this root for our group.
 480                                 push( @group_roots, $root );
 481                                 # Get all the successor nodes of our root.
 482                         }
 483                 } else {
 484                         # Dispense with the trivial case of one reading.
 485                         my $wit = $g->[0];
 486                         @group_roots = ( $wit );
 487                         foreach my $v ( $part->vertices ) {
 488                                 $part->delete_vertex( $v ) unless $v eq $wit;
 489                         }
 490         }
 491
 492         if( @group_roots > 1 ) {
 493                 $conflict->{$gst} = 1;
 494                 $is_conflicted = 1;
 495         }
 496         # Paint the 'hypotheticals' with our group.
 497                 foreach my $wit ( $part->vertices ) {
 498                         if( ref( $contig->{$wit} ) ) {
 499                                 push( @{$contig->{$wit}}, $gst );
 500                         } elsif( $contig->{$wit} ne $gst ) {
 501                                 warn "How did we get here?";
 502                         }
 503                 }
 504
 505
 506                 # Save the relevant subgraph.
 507                 $subgraph->{$gst} = $part;
 508     }
 509
 510         # For each of our hypothetical readings, flatten its 'contig' array if
 511         # the array contains zero or one group.  If we have any unflattened arrays,
 512         # we may need to run the resolution process. If the reading is already known
 513         # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
 514         # it.
 515         my @resolve;
 516         foreach my $wit ( keys %$contig ) {
 517                 next unless ref( $contig->{$wit} );
 518                 if( @{$contig->{$wit}} > 1 ) {
 519                         if( $is_conflicted ) {
 520                                 $contig->{$wit} = '';  # We aren't going to decide.
 521                         } else {
 522                                 push( @resolve, $wit );
 523                         }
 524                 } else {
 525                         my $gst = pop @{$contig->{$wit}};
 526                         $contig->{$wit} = $gst || '';
 527                 }
 528         }
 529
 530     if( @resolve ) {
 531         my $still_contig = {};
 532         foreach my $h ( @resolve ) {
 533             # For each of the hypothetical readings with more than one possibility,
 534             # try deleting it from each of its member subgraphs in turn, and see
 535             # if that breaks the contiguous grouping.
 536             # TODO This can still break in a corner case where group A can use
 537             # either vertex 1 or 2, and group B can use either vertex 2 or 1.
 538             # Revisit this if necessary; it could get brute-force nasty.
 539             foreach my $gst ( @{$contig->{$h}} ) {
 540                 my $gpart = $subgraph->{$gst}->copy();
 541                 # If we have come this far, there is only one root and everything
 542                 # is reachable from it.
 543                 my( $root ) = $gpart->predecessorless_vertices;
 544                 my $reachable = {};
 545                 map { $reachable->{$_} = 1 } $gpart->vertices;
 546
 547                 # Try deleting the hypothetical node.
 548                 $gpart->delete_vertex( $h );
 549                 if( $h eq $root ) {
 550                         # See if we still have a single root.
 551                         my @roots = $gpart->predecessorless_vertices;
 552                         warn "This shouldn't have happened" unless @roots;
 553                         if( @roots > 1 ) {
 554                                 # $h is needed by this group.
 555                                 if( exists( $still_contig->{$h} ) ) {
 556                                         # Conflict!
 557                                         $conflict->{$gst} = 1;
 558                                         $still_contig->{$h} = '';
 559                                 } else {
 560                                         $still_contig->{$h} = $gst;
 561                                 }
 562                         }
 563                 } else {
 564                         # $h is somewhere in the middle. See if everything
 565                         # else can still be reached from the root.
 566                                         my %still_reachable = ( $root => 1 );
 567                                         map { $still_reachable{$_} = 1 }
 568                                                 $gpart->all_successors( $root );
 569                                         foreach my $v ( keys %$reachable ) {
 570                                                 next if $v eq $h;
 571                                                 if( !$still_reachable{$v}
 572                                                         && ( $contig->{$v} eq $gst
 573                                                                  || ( exists $still_contig->{$v}
 574                                                                           && $still_contig->{$v} eq $gst ) ) ) {
 575                                                         # We need $h.
 576                                                         if( exists $still_contig->{$h} ) {
 577                                                                 # Conflict!
 578                                                                 $conflict->{$gst} = 1;
 579                                                                 $still_contig->{$h} = '';
 580                                                         } else {
 581                                                                 $still_contig->{$h} = $gst;
 582                                                         }
 583                                                         last;
 584                                                 } # else we don't need $h in this group.
 585                                         } # end foreach $v
 586                                 } # endif $h eq $root
 587             } # end foreach $gst
 588         } # end foreach $h
 589
 590         # Now we have some hypothetical vertices in $still_contig that are the
 591         # "real" group memberships.  Replace these in $contig.
 592                 foreach my $v ( keys %$contig ) {
 593                         next unless ref $contig->{$v};
 594                         $contig->{$v} = $still_contig->{$v};
 595                 }
 596     } # end if @resolve
 597
 598     my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
 599         my $variant_row = [ [], $is_genealogical ];
 600         # Fill in the groupings from $contig.
 601         foreach my $g ( @$groups ) {
 602         my $gst = wit_stringify( $g );
 603         my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
 604         push( @{$variant_row->[0]}, \@realgroup );
 605     }
 606     return $variant_row;
 607 }
 608
 609 sub _prune_subtree {
 610     my( $tree, $root, $contighash ) = @_;
 611     # First, delete hypothetical leaves / orphans until there are none left.
 612     my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
 613         $tree->successorless_vertices;
 614     while( @orphan_hypotheticals ) {
 615         $tree->delete_vertices( @orphan_hypotheticals );
 616         @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
 617             $tree->successorless_vertices;
 618     }
 619     # Then delete a hypothetical root with only one successor, moving the
 620     # root to the first child that has no other predecessors.
 621     while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
 622         my @nextroot = $tree->successors( $root );
 623         $tree->delete_vertex( $root );
 624         ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
 625     }
 626     # The tree has been modified in place, but we need to know the new root.
 627     $root = undef unless $root && $tree->has_vertex( $root );
 628     return $root;
 629 }
 630 # Add the variant, subject to a.c. representation logic.
 631 # This assumes that we will see the 'main' version before the a.c. version.
 632 sub add_variant_wit {
 633     my( $arr, $wit, $acstr ) = @_;
 634     my $skip;
 635     if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
 636         my $real = $1;
 637         $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
 638     }
 639     push( @$arr, $wit ) unless $skip;
 640 }
 641
 642 sub _useful_variant {
 643         my( $group_readings, $graph, $acstr ) = @_;
 644
 645         # TODO Decide what to do with AC witnesses
 646
 647         # Sort by group size and return
 648         my $is_useful = 0;
 649         my( @readings, @groups );   # The sorted groups for our answer.
 650         foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
 651                 keys %$group_readings ) {
 652                 push( @readings, $rdg );
 653                 push( @groups, $group_readings->{$rdg} );
 654                 if( @{$group_readings->{$rdg}} > 1 ) {
 655                         $is_useful++;
 656                 } else {
 657                         my( $wit ) = @{$group_readings->{$rdg}};
 658                         $wit =~ s/^(.*)\Q$acstr\E$/$1/;
 659                         $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
 660                 }
 661         }
 662         if( $is_useful > 1 ) {
 663                 return( \@readings, \@groups );
 664         } else {
 665                 return( [], [] );
 666         }
 667 }
 668
 669 =head2 wit_stringify( $groups )
 670
 671 Takes an array of witness groupings and produces a string like
 672 ['A','B'] / ['C','D','E'] / ['F']
 673
 674 =cut
 675
 676 sub wit_stringify {
 677     my $groups = shift;
 678     my @gst;
 679     # If we were passed an array of witnesses instead of an array of
 680     # groupings, then "group" the witnesses first.
 681     unless( ref( $groups->[0] ) ) {
 682         my $mkgrp = [ $groups ];
 683         $groups = $mkgrp;
 684     }
 685     foreach my $g ( @$groups ) {
 686         push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
 687     }
 688     return join( ' / ', @gst );
 689 }
 690
 691 # Helper function to ensure that X and X a.c. never appear in the same list.
 692 sub _add_to_witlist {
 693         my( $wit, $list, $acstr ) = @_;
 694         my %inlist;
 695         my $idx = 0;
 696         map { $inlist{$_} = $idx++ } @$list;
 697         if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
 698                 my $acwit = $1;
 699                 unless( exists $inlist{$acwit} ) {
 700                         push( @$list, $acwit.$acstr );
 701                 }
 702         } else {
 703                 if( exists( $inlist{$wit.$acstr} ) ) {
 704                         # Replace the a.c. version with the main witness
 705                         my $i = $inlist{$wit.$acstr};
 706                         $list->[$i] = $wit;
 707                 } else {
 708                         push( @$list, $wit );
 709                 }
 710         }
 711 }
 712
 713 sub _symmdiff {
 714         my( $lista, $listb ) = @_;
 715         my %union;
 716         my %scalars;
 717         map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
 718         map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
 719         my @set = grep { $union{$_} == 1 } keys %union;
 720         return map { $scalars{$_} } @set;
 721 }
 722
 723 1;
 724
 725 =head1 LICENSE
 726
 727 This package is free software and is provided "as is" without express
 728 or implied warranty.  You can redistribute it and/or modify it under
 729 the same terms as Perl itself.
 730
 731 =head1 AUTHOR
 732
 733 Tara L Andrews E<lt>aurum@cpan.orgE<gt>