lib/Text/Tradition/Analysis.pm

   1 package Text::Tradition::Analysis;
   2
   3 use strict;
   4 use warnings;
   5 use Benchmark;
   6 use Encode qw/ encode_utf8 /;
   7 use Exporter 'import';
   8 use Graph;
   9 use JSON qw/ encode_json decode_json /;
  10 use LWP::UserAgent;
  11 use Text::LevenshteinXS qw/ distance /;
  12 use Text::Tradition;
  13 use Text::Tradition::Stemma;
  14 use TryCatch;
  15
  16 use vars qw/ @EXPORT_OK /;
  17 @EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
  18
  19 =head1 NAME
  20
  21 Text::Tradition::Analysis - functions for stemma analysis of a tradition
  22
  23 =head1 SYNOPSIS
  24
  25   use Text::Tradition;
  26   use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  27   my $t = Text::Tradition->new(
  28     'name' => 'this is a text',
  29     'input' => 'TEI',
  30     'file' => '/path/to/tei_parallel_seg_file.xml' );
  31   $t->add_stemma( 'dotfile' => $stemmafile );
  32
  33   my $variant_data = run_analysis( $tradition );
  34   # Recalculate rank $n treating all orthographic variants as equivalent
  35   my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
  36
  37 =head1 DESCRIPTION
  38
  39 Text::Tradition is a library for representation and analysis of collated
  40 texts, particularly medieval ones.  The Collation is the central feature of
  41 a Tradition, where the text, its sequence of readings, and its relationships
  42 between readings are actually kept.
  43
  44 =head1 SUBROUTINES
  45
  46 =head2 run_analysis( $tradition, %opts )
  47
  48 Runs the analysis described in analyze_variant_location on every location in the
  49 collation of the given tradition, with the given options. These include:
  50
  51 =over 4
  52
  53 =item * stemma_id - Specify which of the tradition's stemmata to use. Default
  54 is 0 (i.e. the first).
  55
  56 =item * ranks - Specify a list of location ranks to analyze; exclude the rest.
  57
  58 =item * merge_types - Specify a list of relationship types, where related readings
  59 should be treated as identical for the purposes of analysis.
  60
  61 =item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
  62
  63 =back
  64
  65 =begin testing
  66
  67 use Text::Tradition;
  68 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
  69
  70 my $datafile = 't/data/florilegium_tei_ps.xml';
  71 my $tradition = Text::Tradition->new( 'input' => 'TEI',
  72                                       'name' => 'test0',
  73                                       'file' => $datafile );
  74 my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
  75 is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
  76
  77 my %expected_genealogical = (
  78         1 => 0,
  79         2 => 1,
  80         3 =>  0,
  81         5 =>  0,
  82         7 =>  0,
  83         8 =>  0,
  84         10 => 0,
  85         13 => 1,
  86         33 => 0,
  87         34 => 0,
  88         37 => 0,
  89         60 => 0,
  90         81 => 1,
  91         84 => 0,
  92         87 => 0,
  93         101 => 0,
  94         102 => 0,
  95         122 => 1,
  96         157 => 0,
  97         166 => 1,
  98         169 => 1,
  99         200 => 0,
 100         216 => 1,
 101         217 => 1,
 102         219 => 1,
 103         241 => 1,
 104         242 => 1,
 105         243 => 1,
 106 );
 107
 108 my $data = run_analysis( $tradition );
 109 my $c = $tradition->collation;
 110 foreach my $row ( @{$data->{'variants'}} ) {
 111         # Account for rows that used to be "not useful"
 112         unless( exists $expected_genealogical{$row->{'id'}} ) {
 113                 $expected_genealogical{$row->{'id'}} = 1;
 114         }
 115         my $gen_bool = $row->{'genealogical'} ? 1 : 0;
 116         is( $gen_bool, $expected_genealogical{$row->{'id'}},
 117                 "Got correct genealogical flag for row " . $row->{'id'} );
 118         # Check that we have the right row with the right groups
 119         my $rank = $row->{'id'};
 120         foreach my $rdghash ( @{$row->{'readings'}} ) {
 121                 # Skip 'readings' that aren't really
 122                 next unless $c->reading( $rdghash->{'readingid'} );
 123                 # Check the rank
 124                 is( $c->reading( $rdghash->{'readingid'} )->rank, $rank,
 125                         "Got correct reading rank" );
 126                 # Check the witnesses
 127                 my @realwits = sort $c->reading_witnesses( $rdghash->{'readingid'} );
 128                 my @sgrp = sort @{$rdghash->{'group'}};
 129                 is_deeply( \@sgrp, \@realwits, "Reading analyzed with correct groups" );
 130         }
 131 }
 132 is( $data->{'variant_count'}, 58, "Got right total variant number" );
 133 # TODO Make something meaningful of conflict count, maybe test other bits
 134
 135 =end testing
 136
 137 =cut
 138
 139 sub run_analysis {
 140         my( $tradition, %opts ) = @_;
 141         my $c = $tradition->collation;
 142
 143         my $stemma_id = $opts{'stemma_id'} || 0;
 144         my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
 145         my @collapse = ref( $opts{'merge_types'} ) eq 'ARRAY' ? @{$opts{'merge_types'}} : ();
 146
 147         # Get the stemma
 148         my $stemma = $tradition->stemma( $stemma_id );
 149
 150         # Figure out which witnesses we are working with - that is, the ones that
 151         # appear both in the stemma and in the tradition. All others are 'lacunose'
 152         # for our purposes.
 153         my @lacunose = $stemma->hypotheticals;
 154         my @tradition_wits = map { $_->sigil } $tradition->witnesses;
 155         push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
 156
 157         # Find and mark 'common' ranks for exclusion, unless they were
 158         # explicitly specified.
 159         unless( @ranks ) {
 160                 my %common_rank;
 161                 foreach my $rdg ( $c->common_readings ) {
 162                         $common_rank{$rdg->rank} = 1;
 163                 }
 164                 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
 165         }
 166
 167         # Group the variants to send to the solver
 168         my @groups;
 169         my @use_ranks;
 170         my %lacunae;
 171         my $moved = {};
 172         foreach my $rank ( @ranks ) {
 173                 my $missing = [ @lacunose ];
 174                 my $rankgroup = group_variants( $tradition, $rank, $missing, $moved, \@collapse );
 175                 # Filter out any empty rankgroups
 176                 # (e.g. from the later rank for a transposition)
 177                 next unless keys %$rankgroup;
 178                 if( $opts{'exclude_type1'} ) {
 179                         # Check to see whether this is a "useful" group.
 180                         my( $rdgs, $grps ) = _useful_variant( $rankgroup,
 181                                 $stemma->graph, $c->ac_label );
 182                         next unless @$rdgs;
 183                 }
 184                 push( @use_ranks, $rank );
 185                 push( @groups, $rankgroup );
 186                 $lacunae{$rank} = $missing;
 187         }
 188         # Run the solver
 189         my $answer = solve_variants( $stemma, @groups );
 190
 191         # Do further analysis on the answer
 192         my $conflict_count = 0;
 193         my $aclabel = $c->ac_label;
 194         foreach my $idx ( 0 .. $#use_ranks ) {
 195                 my $location = $answer->{'variants'}->[$idx];
 196                 # Add the rank back in
 197                 my $rank = $use_ranks[$idx];
 198                 $location->{'id'} = $rank;
 199                 # Note what our lacunae are
 200                 my %lmiss;
 201                 map { $lmiss{$_} = 1 } @{$lacunae{$use_ranks[$idx]}};
 202                 $location->{'missing'} = [ keys %lmiss ];
 203
 204                 # Run the extra analysis we need.
 205                 analyze_location( $tradition, $stemma, $location, \%lmiss );
 206
 207                 my @layerwits;
 208                 # Do the final post-analysis tidying up of the data.
 209                 foreach my $rdghash ( @{$location->{'readings'}} ) {
 210                         $conflict_count++
 211                                 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
 212                         # Add the reading text back in, setting display value as needed
 213                         my $rdg = $c->reading( $rdghash->{'readingid'} );
 214                         if( $rdg ) {
 215                                 $rdghash->{'text'} = $rdg->text .
 216                                         ( $rdg->rank == $rank ? '' : ' [' . $rdg->rank . ']' );
 217                                 $rdghash->{'is_ungrammatical'} = $rdg->grammar_invalid;
 218                                 $rdghash->{'is_nonsense'} = $rdg->is_nonsense;
 219                         }
 220                         # Remove lacunose witnesses from this reading's list now that the
 221                         # analysis is done
 222                         my @realgroup;
 223                         map { push( @realgroup, $_ ) unless $lmiss{$_} } @{$rdghash->{'group'}};
 224                         $rdghash->{'group'} = \@realgroup;
 225                         # Note any layered witnesses that appear in this group
 226                         foreach( @realgroup ) {
 227                                 if( $_ =~ /^(.*)\Q$aclabel\E$/ ) {
 228                                         push( @layerwits, $1 );
 229                                 }
 230                         }
 231                 }
 232                 $location->{'layerwits'} = \@layerwits if @layerwits;
 233         }
 234         $answer->{'conflict_count'} = $conflict_count;
 235
 236         return $answer;
 237 }
 238
 239 =head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
 240
 241 Groups the variants at the given $rank of the collation, treating any
 242 relationships in @merge_relationship_types as equivalent.  $lacunose should
 243 be a reference to an array, to which the sigla of lacunose witnesses at this
 244 rank will be appended; $transposed should be a reference to a hash, wherein
 245 the identities of transposed readings and their relatives will be stored.
 246
 247 Returns a hash $group_readings where $rdg is attested by the witnesses listed
 248 in $group_readings->{$rdg}.
 249
 250 =cut
 251
 252 # Return group_readings, groups, lacunose
 253 sub group_variants {
 254         my( $tradition, $rank, $lacunose, $transposed, $collapse ) = @_;
 255         my $c = $tradition->collation;
 256         my $aclabel = $c->ac_label;
 257         my $table = $c->alignment_table;
 258         # Get the alignment table readings
 259         my %readings_at_rank;
 260         my %is_lacunose; # lookup table for witnesses not in stemma
 261         map { $is_lacunose{$_} = 1; $is_lacunose{$_.$aclabel} = 1 } @$lacunose;
 262         my @check_for_gaps;
 263         my %moved_wits;
 264         my $has_transposition;
 265         foreach my $tablewit ( @{$table->{'alignment'}} ) {
 266                 my $rdg = $tablewit->{'tokens'}->[$rank-1];
 267                 my $wit = $tablewit->{'witness'};
 268                 # Exclude the witness if it is "lacunose" which if we got here
 269                 # means "not in the stemma".
 270                 next if $is_lacunose{$wit};
 271                 # Note if the witness is actually in a lacuna
 272                 if( $rdg && $rdg->{'t'}->is_lacuna ) {
 273                         _add_to_witlist( $wit, $lacunose, $aclabel );
 274                 # Otherwise the witness either has a positive reading...
 275                 } elsif( $rdg ) {
 276                         # If the reading has been counted elsewhere as a transposition, ignore it.
 277                         if( $transposed->{$rdg->{'t'}->id} ) {
 278                                 # TODO Does this cope with three-way transpositions?
 279                                 map { $moved_wits{$_} = 1 } @{$transposed->{$rdg->{'t'}->id}};
 280                                 next;
 281                         }
 282                         # Otherwise, record it...
 283                         $readings_at_rank{$rdg->{'t'}->id} = $rdg->{'t'};
 284                         # ...and grab any transpositions, and their relations.
 285                         my @transp = grep { $_->rank != $rank } $rdg->{'t'}->related_readings();
 286                         foreach my $trdg ( @transp ) {
 287                                 next if exists $readings_at_rank{$trdg->id};
 288                                 $has_transposition = 1;
 289                                 my @affected_wits = _table_witnesses(
 290                                         $table, $trdg, \%is_lacunose, $aclabel );
 291                                 next unless @affected_wits;
 292                                 map { $moved_wits{$_} = 1 } @affected_wits;
 293                                 $transposed->{$trdg->id} =
 294                                         [ _table_witnesses( $table, $rdg->{'t'}, \%is_lacunose, $aclabel ) ];
 295                                 $readings_at_rank{$trdg->id} = $trdg;
 296                         }
 297                 # ...or it is empty, ergo a gap.
 298                 } else {
 299                         _add_to_witlist( $wit, \@check_for_gaps, $aclabel );
 300                 }
 301         }
 302         my @gap_wits;
 303         map { _add_to_witlist( $_, \@gap_wits, $aclabel )
 304                 unless $moved_wits{$_} } @check_for_gaps;
 305         # Group the readings, collapsing groups by relationship if needed
 306         my $grouped_readings = {};
 307         foreach my $rdg ( values %readings_at_rank ) {
 308                 # Skip readings that have been collapsed into others.
 309                 next if exists $grouped_readings->{$rdg->id}
 310                         && $grouped_readings->{$rdg->id} eq 'COLLAPSE';
 311                 # Get the witness list, including from readings collapsed into this one.
 312                 my @wits = _table_witnesses( $table, $rdg, \%is_lacunose, $aclabel );
 313                 if( $collapse && @$collapse ) {
 314                         my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
 315                         foreach my $other ( $rdg->related_readings( $filter ) ) {
 316                                 my @otherwits = _table_witnesses(
 317                                         $table, $other, \%is_lacunose, $aclabel );
 318                                 push( @wits, @otherwits );
 319                                 $grouped_readings->{$other->id} = 'COLLAPSE';
 320                         }
 321                 }
 322                 $grouped_readings->{$rdg->id} = \@wits;
 323         }
 324         $grouped_readings->{'(omitted)'} = \@gap_wits if @gap_wits;
 325         # Get rid of our collapsed readings
 326         map { delete $grouped_readings->{$_} if $grouped_readings->{$_} eq 'COLLAPSE' }
 327                 keys %$grouped_readings
 328                 if $collapse;
 329
 330         # If something was transposed, check the groups for doubled-up readings
 331         if( $has_transposition ) {
 332                 # print STDERR "Group for rank $rank:\n";
 333                 # map { print STDERR "\t$_: " . join( ' ' , @{$grouped_readings->{$_}} ) . "\n" }
 334                 #       keys %$grouped_readings;
 335                 _check_transposed_consistency( $c, $rank, $transposed, $grouped_readings );
 336         }
 337
 338         # Return the result
 339         return $grouped_readings;
 340 }
 341
 342 # Helper function to query the alignment table for all witnesses (a.c. included)
 343 # that have a given reading at its rank.
 344 sub _table_witnesses {
 345         my( $table, $trdg, $lacunose, $aclabel ) = @_;
 346         my $tableidx = $trdg->rank - 1;
 347         my @has_reading;
 348         foreach my $row ( @{$table->{'alignment'}} ) {
 349                 my $wit = $row->{'witness'};
 350                 next if $lacunose->{$wit};
 351                 my $rdg = $row->{'tokens'}->[$tableidx];
 352                 next unless exists $rdg->{'t'} && defined $rdg->{'t'};
 353                 _add_to_witlist( $wit, \@has_reading, $aclabel )
 354                         if $rdg->{'t'}->id eq $trdg->id;
 355         }
 356         return @has_reading;
 357 }
 358
 359 # Helper function to ensure that X and X a.c. never appear in the same list.
 360 sub _add_to_witlist {
 361         my( $wit, $list, $acstr ) = @_;
 362         my %inlist;
 363         my $idx = 0;
 364         map { $inlist{$_} = $idx++ } @$list;
 365         if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
 366                 my $acwit = $1;
 367                 unless( exists $inlist{$acwit} ) {
 368                         push( @$list, $acwit.$acstr );
 369                 }
 370         } else {
 371                 if( exists( $inlist{$wit.$acstr} ) ) {
 372                         # Replace the a.c. version with the main witness
 373                         my $i = $inlist{$wit.$acstr};
 374                         $list->[$i] = $wit;
 375                 } else {
 376                         push( @$list, $wit );
 377                 }
 378         }
 379 }
 380
 381 sub _check_transposed_consistency {
 382         my( $c, $rank, $transposed, $groupings ) = @_;
 383         my %seen_wits;
 384         my %thisrank;
 385         # Note which readings are actually at this rank, and which witnesses
 386         # belong to which reading.
 387         foreach my $rdg ( keys %$groupings ) {
 388                 my $rdgobj = $c->reading( $rdg );
 389                 # Count '(omitted)' as a reading at this rank
 390                 $thisrank{$rdg} = 1 if !$rdgobj || $rdgobj->rank == $rank;
 391                 map { push( @{$seen_wits{$_}}, $rdg ) } @{$groupings->{$rdg}};
 392         }
 393         # Our work is done if we have no witness belonging to more than one
 394         # reading.
 395         my @doubled = grep { scalar @{$seen_wits{$_}} > 1 } keys %seen_wits;
 396         return unless @doubled;
 397         # If we have a symmetric related transposition, drop the non-rank readings.
 398         if( @doubled == scalar keys %seen_wits ) {
 399                 foreach my $rdg ( keys %$groupings ) {
 400                         if( !$thisrank{$rdg} ) {
 401                                 my $groupstr = wit_stringify( $groupings->{$rdg} );
 402                                 my ( $matched ) = grep { $groupstr eq wit_stringify( $groupings->{$_} ) }
 403                                         keys %thisrank;
 404                                 delete $groupings->{$rdg};
 405                                 # If we found a group match, assume there is a symmetry happening.
 406                                 # TODO think more about this
 407                                 # print STDERR "*** Deleting symmetric reading $rdg\n";
 408                                 unless( $matched ) {
 409                                         delete $transposed->{$rdg};
 410                                         warn "Found problem in evident symmetry with reading $rdg";
 411                                 }
 412                         }
 413                 }
 414         # Otherwise 'unhook' the transposed reading(s) that have duplicates.
 415         } else {
 416                 foreach my $dup ( @doubled ) {
 417                         foreach my $rdg ( @{$seen_wits{$dup}} ) {
 418                                 next if $thisrank{$rdg};
 419                                 next unless exists $groupings->{$rdg};
 420                                 # print STDERR "*** Deleting asymmetric doubled-up reading $rdg\n";
 421                                 delete $groupings->{$rdg};
 422                                 delete $transposed->{$rdg};
 423                         }
 424                 }
 425                 # and put any now-orphaned readings into an 'omitted' reading.
 426                 foreach my $wit ( keys %seen_wits ) {
 427                         unless( grep { exists $groupings->{$_} } @{$seen_wits{$wit}} ) {
 428                                 $groupings->{'(omitted)'} = [] unless exists $groupings->{'(omitted)'};
 429                                 _add_to_witlist( $wit, $groupings->{'(omitted)'}, $c->ac_label );
 430                         }
 431                 }
 432         }
 433 }
 434
 435 =head2 solve_variants( $graph, @groups )
 436
 437 Sends the set of groups to the external graph solver service and returns
 438 a cleaned-up answer, adding the rank IDs back where they belong.
 439
 440 The JSON has the form
 441   { "graph": [ stemmagraph DOT string without newlines ],
 442     "groupings": [ array of arrays of groups, one per rank ] }
 443
 444 The answer has the form
 445   { "variants" => [ array of variant location structures ],
 446     "variant_count" => total,
 447     "conflict_count" => number of conflicts detected,
 448     "genealogical_count" => number of solutions found }
 449
 450 =cut
 451
 452 sub solve_variants {
 453         my( $stemma, @groups ) = @_;
 454         my $aclabel = $stemma->collation->ac_label;
 455
 456         # Filter the groups down to distinct groups, and work out what graph
 457         # should be used in the calculation of each group. We want to send each
 458         # distinct problem to the solver only once.
 459         # We need a whole bunch of lookup tables for this.
 460         my $index_groupkeys = {};       # Save the order of readings
 461         my $group_indices = {};         # Save the indices that have a given grouping
 462         my $graph_problems = {};        # Save the groupings for the given graph
 463
 464         foreach my $idx ( 0..$#groups ) {
 465                 my $ghash = $groups[$idx];
 466                 my @grouping;
 467                 # Sort the groupings from big to little, and scan for a.c. witnesses
 468                 # that would need an extended graph.
 469                 my @acwits;   # note which AC witnesses crop up at this rank
 470                 my @idxkeys = sort { scalar @{$ghash->{$b}} <=> scalar @{$ghash->{$a}} }
 471                         keys %$ghash;
 472                 foreach my $rdg ( @idxkeys ) {
 473                         my @sg = sort @{$ghash->{$rdg}};
 474                         push( @acwits, grep { $_ =~ /\Q$aclabel\E$/ } @sg );
 475                         push( @grouping, \@sg );
 476                 }
 477                 # Save the reading order
 478                 $index_groupkeys->{$idx} = \@idxkeys;
 479
 480                 # Now associate the distinct group with this index
 481                 my $gstr = wit_stringify( \@grouping );
 482                 push( @{$group_indices->{$gstr}}, $idx );
 483
 484                 # Finally, add the group to the list to be calculated for this graph.
 485                 map { s/\Q$aclabel\E$// } @acwits;
 486                 my $graph;
 487                 try {
 488                         $graph = $stemma->extend_graph( \@acwits );
 489                 } catch {
 490                         die "Unable to extend graph with @acwits";
 491                 }
 492                 unless( exists $graph_problems->{"$graph"} ) {
 493                         $graph_problems->{"$graph"} = { 'object' => $graph, 'groups' => [] };
 494                 }
 495                 push( @{$graph_problems->{"$graph"}->{'groups'}}, \@grouping );
 496         }
 497
 498         ## For each distinct graph, send its groups to the solver.
 499         my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
 500         my $ua = LWP::UserAgent->new();
 501         ## Witness map is a HACK to get around limitations in node names from IDP
 502         my $witness_map = {};
 503         ## Variables to store answers as they come back
 504         my $variants = [ ( undef ) x ( scalar keys %$index_groupkeys ) ];
 505         my $genealogical = 0;
 506         foreach my $graphkey ( keys %$graph_problems ) {
 507                 my $graph = $graph_problems->{$graphkey}->{'object'};
 508                 my $groupings = $graph_problems->{$graphkey}->{'groups'};
 509                 my $json = encode_json( _safe_wit_strings( $graph, $stemma->collation,
 510                         $groupings, $witness_map ) );
 511                 # Send it off and get the result
 512                 #print STDERR "Sending request: $json\n";
 513                 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
 514                                                           'Content' => $json );
 515                 my $answer;
 516                 my $used_idp;
 517                 if( $resp->is_success ) {
 518                         $answer = _desanitize_names( decode_json( $resp->content ), $witness_map );
 519                         $used_idp = 1;
 520                 } else {
 521                         # Fall back to the old method.
 522                         warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
 523                                 . "; falling back to perl method";
 524                         $answer = perl_solver( $graph, @$groupings );
 525                 }
 526                 ## The answer is the evaluated groupings, plus a boolean for whether
 527                 ## they were genealogical.  Reconstruct our original groups.
 528                 foreach my $gidx ( 0 .. $#{$groupings} ) {
 529                         my( $calc_groups, $result ) = @{$answer->[$gidx]};
 530                         if( $result ) {
 531                                 $genealogical++;
 532                                 # Prune the calculated groups, in case the IDP solver failed to.
 533                                 if( $used_idp ) {
 534                                         my @pruned_groups;
 535                                         foreach my $cg ( @$calc_groups ) {
 536                                                 # This is a little wasteful but the path of least
 537                                                 # resistance. Send both the stemma, which knows what
 538                                                 # its hypotheticals are, and the actual graph used.
 539                                                 my @pg = _prune_group( $cg, $stemma, $graph );
 540                                                 push( @pruned_groups, \@pg );
 541                                         }
 542                                         $calc_groups = \@pruned_groups;
 543                                 }
 544                         }
 545                         # Retrieve the key for the original group that went to the solver
 546                         my $input_group = wit_stringify( $groupings->[$gidx] );
 547                         foreach my $oidx ( @{$group_indices->{$input_group}} ) {
 548                                 my @readings = @{$index_groupkeys->{$oidx}};
 549                                 my $vstruct = {
 550                                         'genealogical' => $result,
 551                                         'readings' => [],
 552                                 };
 553                                 foreach my $ridx ( 0 .. $#readings ) {
 554                                         push( @{$vstruct->{'readings'}},
 555                                                 { 'readingid' => $readings[$ridx],
 556                                                   'group' => $calc_groups->[$ridx] } );
 557                                 }
 558                                 $variants->[$oidx] = $vstruct;
 559                         }
 560                 }
 561         }
 562
 563         return { 'variants' => $variants,
 564                          'variant_count' => scalar @$variants,
 565                          'genealogical_count' => $genealogical };
 566 }
 567
 568 #### HACKERY to cope with IDP's limited idea of what a node name looks like ###
 569
 570 sub _safe_wit_strings {
 571         my( $graph, $c, $groupings, $witness_map ) = @_;
 572         # Parse the graph we were given into a stemma.
 573         my $safegraph = Graph->new();
 574         # Convert the graph to a safe representation and store the conversion.
 575         foreach my $n ( $graph->vertices ) {
 576                 my $sn = _safe_witstr( $n );
 577                 if( exists $witness_map->{$sn} ) {
 578                         warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
 579                                 if $witness_map->{$sn} ne $n;
 580                 } else {
 581                         $witness_map->{$sn} = $n;
 582                 }
 583                 $safegraph->add_vertex( $sn );
 584                 $safegraph->set_vertex_attributes( $sn,
 585                         $graph->get_vertex_attributes( $n ) );
 586         }
 587         foreach my $e ( $graph->edges ) {
 588                 my @safe_e = ( _safe_witstr( $e->[0] ), _safe_witstr( $e->[1] ) );
 589                 $safegraph->add_edge( @safe_e );
 590         }
 591         my $safe_stemma = Text::Tradition::Stemma->new(
 592                 'collation' => $c, 'graph' => $safegraph );
 593
 594         # Now convert the witness groupings to a safe representation.
 595         my $safe_groupings = [];
 596         foreach my $grouping ( @$groupings ) {
 597                 my $safe_grouping = [];
 598                 foreach my $group ( @$grouping ) {
 599                         my $safe_group = [];
 600                         foreach my $n ( @$group ) {
 601                                 my $sn = _safe_witstr( $n );
 602                                 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
 603                                         if exists $witness_map->{$sn} && $witness_map->{$sn} ne $n;
 604                                 $witness_map->{$sn} = $n;
 605                                 push( @$safe_group, $sn );
 606                         }
 607                         push( @$safe_grouping, $safe_group );
 608                 }
 609                 push( @$safe_groupings, $safe_grouping );
 610         }
 611
 612         # Return it all in the struct we expect.  We have stored the reductions
 613         # in the $witness_map that we were passed.
 614         return { 'graph' => $safe_stemma->editable( { 'linesep' => ' ' } ),
 615                          'groupings' => $safe_groupings };
 616 }
 617
 618 sub _safe_witstr {
 619         my $witstr = shift;
 620         $witstr =~ s/\s+/_/g;
 621         $witstr =~ s/[^\w\d-]//g;
 622         return $witstr;
 623 }
 624
 625 sub _desanitize_names {
 626         my( $jsonstruct, $witness_map ) = @_;
 627         my $result = [];
 628         foreach my $grouping ( @$jsonstruct ) {
 629                 my $real_grouping = [];
 630                 foreach my $element ( @$grouping ) {
 631                         if( ref( $element ) eq 'ARRAY' ) {
 632                                 # it's the groupset.
 633                                 my $real_groupset = [];
 634                                 foreach my $group ( @$element ) {
 635                                         my $real_group = [];
 636                                         foreach my $n ( @$group ) {
 637                                                 my $rn = $witness_map->{$n};
 638                                                 push( @$real_group, $rn );
 639                                         }
 640                                         push( @$real_groupset, $real_group );
 641                                 }
 642                                 push( @$real_grouping, $real_groupset );
 643                         } else {
 644                                 # It is the boolean, not actually a group.
 645                                 push( @$real_grouping, $element );
 646                         }
 647                 }
 648                 push( @$result, $real_grouping );
 649         }
 650         return $result;
 651 }
 652
 653 ### END HACKERY ###
 654
 655 =head2 analyze_location ( $tradition, $graph, $location_hash )
 656
 657 Given the tradition, its stemma graph, and the solution from the graph solver,
 658 work out the rest of the information we want.  For each reading we need missing,
 659 conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown.  Alters the location_hash in place.
 660
 661 =cut
 662
 663 sub analyze_location {
 664         my ( $tradition, $stemma, $variant_row, $lacunose ) = @_;
 665         my $c = $tradition->collation;
 666
 667         # Make a hash of all known node memberships, and make the subgraphs.
 668         my $contig = {};
 669         my $reading_roots = {};
 670         my $subgraph = {};
 671         my $acstr = $c->ac_label;
 672         my @acwits;
 673         # Note which witnesses positively belong to which group
 674     foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
 675         my $rid = $rdghash->{'readingid'};
 676         foreach my $wit ( @{$rdghash->{'group'}} ) {
 677                 $contig->{$wit} = $rid;
 678             if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
 679                 push( @acwits, $1 );
 680             }
 681         }
 682         }
 683
 684         # Get the actual graph we should work with
 685         my $graph;
 686         try {
 687                 $graph = @acwits ? $stemma->extend_graph( \@acwits ) : $stemma->graph;
 688         } catch {
 689                 die "Could not extend graph with a.c. witnesses @acwits";
 690         }
 691
 692         # Now, armed with that knowledge, make a subgraph for each reading
 693         # and note the root(s) of each subgraph.
 694         foreach my $rdghash( @{$variant_row->{'readings'}} ) {
 695         my $rid = $rdghash->{'readingid'};
 696         my %rdgwits;
 697         # Make the subgraph.
 698         my $part = $graph->copy;
 699         my @todelete = grep { exists $contig->{$_} && $contig->{$_} ne $rid }
 700                 keys %$contig;
 701         $part->delete_vertices( @todelete );
 702         _prune_subtree( $part, $lacunose );
 703                 $subgraph->{$rid} = $part;
 704                 # Record the remaining lacunose nodes as part of this group, if
 705                 # we are dealing with a non-genealogical reading.
 706                 unless( $variant_row->{'genealogical'} ) {
 707                         map { $contig->{$_} = $rid } $part->vertices;
 708                 }
 709                 # Get the reading roots.
 710                 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
 711         }
 712
 713         # Now that we have all the node group memberships, calculate followed/
 714     # non-followed/unknown values for each reading.  Also figure out the
 715     # reading's evident parent(s).
 716     foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
 717         my $rid = $rdghash->{'readingid'};
 718         # Get the subgraph
 719         my $part = $subgraph->{$rid};
 720
 721         # Start figuring things out.
 722         my @roots = grep { $reading_roots->{$_} eq $rid } keys %$reading_roots;
 723         $rdghash->{'independent_occurrence'} = \@roots;
 724         $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
 725         # Find the parent readings, if any, of this reading.
 726         my $rdgparents = {};
 727         foreach my $wit ( @roots ) {
 728                 # Look in the main stemma to find this witness's extant or known-reading
 729                 # immediate ancestor(s), and look up the reading that each ancestor olds.
 730                         my @check = $graph->predecessors( $wit );
 731                         while( @check ) {
 732                                 my @next;
 733                                 foreach my $wparent( @check ) {
 734                                         my $preading = $contig->{$wparent};
 735                                         if( $preading ) {
 736                                                 $rdgparents->{$preading} = 1;
 737                                         } else {
 738                                                 push( @next, $graph->predecessors( $wparent ) );
 739                                         }
 740                                 }
 741                                 @check = @next;
 742                         }
 743                 }
 744                 foreach my $p ( keys %$rdgparents ) {
 745                         # Resolve the relationship of the parent to the reading, and
 746                         # save it in our hash.
 747                         my $pobj = $c->reading( $p );
 748                         my $prep = $pobj ? $pobj->id . ' (' . $pobj->text . ')' : $p;
 749                         my $phash = { 'label' => $prep };
 750                         if( $pobj ) {
 751                                 my $rel = $c->get_relationship( $p, $rdghash->{readingid} );
 752                                 if( $rel ) {
 753                                         $phash->{relation} = { type => $rel->type };
 754                                         if( $rel->has_annotation ) {
 755                                                 $phash->{relation}->{'annotation'} = $rel->annotation;
 756                                         }
 757                                 } elsif( $rdghash->{readingid} eq '(omitted)' ) {
 758                                         $phash->{relation} = { type => 'deletion' };
 759                                 } elsif( $rdghash->{text} ) {
 760                                         # Check for sheer word similarity.
 761                                         my $rtext = $rdghash->{text};
 762                                         my $ptext = $pobj->text;
 763                                         my $min = length( $rtext ) > length( $ptext )
 764                                                 ? length( $ptext ) : length( $rtext );
 765                                         my $distance = distance( $rtext, $ptext );
 766                                         if( $distance < $min ) {
 767                                                 $phash->{relation} = { type => 'wordsimilar' };
 768                                         }
 769                                 }
 770                                 # Get the attributes of the parent object while we are here
 771                                 $phash->{'text'} = $pobj->text if $pobj;
 772                                 $phash->{'is_nonsense'} = $pobj->is_nonsense;
 773                                 $phash->{'is_ungrammatical'} = $pobj->grammar_invalid;
 774                         } elsif( $p eq '(omitted)' ) {
 775                                 $phash->{relation} = { type => 'addition' };
 776                         }
 777                         # Save it
 778                         $rdgparents->{$p} = $phash;
 779                 }
 780
 781                 $rdghash->{'reading_parents'} = $rdgparents;
 782
 783                 # Find the number of times this reading was altered, and the number of
 784                 # times we're not sure.
 785                 my( %nofollow, %unknownfollow );
 786                 foreach my $wit ( $part->vertices ) {
 787                         foreach my $wchild ( $graph->successors( $wit ) ) {
 788                                 next if $part->has_vertex( $wchild );
 789                                 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
 790                                         # It definitely changed here.
 791                                         $nofollow{$wchild} = 1;
 792                                 } elsif( !($contig->{$wchild}) ) {
 793                                         # The child is a hypothetical node not definitely in
 794                                         # any group. Answer is unknown.
 795                                         $unknownfollow{$wchild} = 1;
 796                                 } # else it's a non-root node in a known group, and therefore
 797                                   # is presumed to have its reading from its group, not this link.
 798                         }
 799                 }
 800                 $rdghash->{'not_followed'} = keys %nofollow;
 801                 $rdghash->{'follow_unknown'} = keys %unknownfollow;
 802
 803                 # Now say whether this reading represents a conflict.
 804                 unless( $variant_row->{'genealogical'} ) {
 805                         $rdghash->{'conflict'} = @roots != 1;
 806                 }
 807     }
 808 }
 809
 810
 811 =head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
 812
 813 ** NOTE ** This method should hopefully not be called - it is not guaranteed
 814 to be correct.  Serves as a backup for the real solver.
 815
 816 Runs an analysis of the given tradition, at the location given in $rank,
 817 against the graph of the stemma specified in $stemma_id.  The argument
 818 @merge_relationship_types is an optional list of relationship types for
 819 which readings so related should be treated as equivalent.
 820
 821 Returns a nested array data structure as follows:
 822
 823  [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
 824
 825 where the group list is the array of arrays passed in for each element of @groups,
 826 possibly with the addition of hypothetical readings.
 827
 828
 829 =cut
 830
 831 sub perl_solver {
 832         my( $graph, @groups ) = @_;
 833         my @answer;
 834         foreach my $g ( @groups ) {
 835                 push( @answer, _solve_variant_location( $graph, $g ) );
 836         }
 837         return \@answer;
 838 }
 839
 840 sub _solve_variant_location {
 841         my( $graph, $groups ) = @_;
 842         # Now do the work.
 843     my $contig = {};
 844     my $subgraph = {};
 845     my $is_conflicted;
 846     my $conflict = {};
 847
 848     # Mark each ms as in its own group, first.
 849     foreach my $g ( @$groups ) {
 850         my $gst = wit_stringify( $g );
 851         map { $contig->{$_} = $gst } @$g;
 852     }
 853
 854     # Now for each unmarked node in the graph, initialize an array
 855     # for possible group memberships.  We will use this later to
 856     # resolve potential conflicts.
 857     map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
 858     foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
 859         my $gst = wit_stringify( $g );  # This is the group name
 860         # Copy the graph, and delete all non-members from the new graph.
 861         my $part = $graph->copy;
 862         my @group_roots;
 863         $part->delete_vertices(
 864             grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
 865
 866         # Now look to see if our group is connected.
 867                 if( @$g > 1 ) {
 868                         # We have to take directionality into account.
 869                         # How many root nodes do we have?
 870                         my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
 871                                 $part->predecessorless_vertices;
 872                         # Assuming that @$g > 1, find the first root node that has at
 873                         # least one successor belonging to our group. If this reading
 874                         # is genealogical, there should be only one, but we will check
 875                         # that implicitly later.
 876                         foreach my $root ( @roots ) {
 877                                 # Prune the tree to get rid of extraneous hypotheticals.
 878                                 $root = _prune_subtree_old( $part, $root, $contig );
 879                                 next unless $root;
 880                                 # Save this root for our group.
 881                                 push( @group_roots, $root );
 882                                 # Get all the successor nodes of our root.
 883                         }
 884                 } else {
 885                         # Dispense with the trivial case of one reading.
 886                         my $wit = $g->[0];
 887                         @group_roots = ( $wit );
 888                         foreach my $v ( $part->vertices ) {
 889                                 $part->delete_vertex( $v ) unless $v eq $wit;
 890                         }
 891         }
 892
 893         if( @group_roots > 1 ) {
 894                 $conflict->{$gst} = 1;
 895                 $is_conflicted = 1;
 896         }
 897         # Paint the 'hypotheticals' with our group.
 898                 foreach my $wit ( $part->vertices ) {
 899                         if( ref( $contig->{$wit} ) ) {
 900                                 push( @{$contig->{$wit}}, $gst );
 901                         } elsif( $contig->{$wit} ne $gst ) {
 902                                 warn "How did we get here?";
 903                         }
 904                 }
 905
 906
 907                 # Save the relevant subgraph.
 908                 $subgraph->{$gst} = $part;
 909     }
 910
 911         # For each of our hypothetical readings, flatten its 'contig' array if
 912         # the array contains zero or one group.  If we have any unflattened arrays,
 913         # we may need to run the resolution process. If the reading is already known
 914         # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
 915         # it.
 916         my @resolve;
 917         foreach my $wit ( keys %$contig ) {
 918                 next unless ref( $contig->{$wit} );
 919                 if( @{$contig->{$wit}} > 1 ) {
 920                         if( $is_conflicted ) {
 921                                 $contig->{$wit} = '';  # We aren't going to decide.
 922                         } else {
 923                                 push( @resolve, $wit );
 924                         }
 925                 } else {
 926                         my $gst = pop @{$contig->{$wit}};
 927                         $contig->{$wit} = $gst || '';
 928                 }
 929         }
 930
 931     if( @resolve ) {
 932         my $still_contig = {};
 933         foreach my $h ( @resolve ) {
 934             # For each of the hypothetical readings with more than one possibility,
 935             # try deleting it from each of its member subgraphs in turn, and see
 936             # if that breaks the contiguous grouping.
 937             # TODO This can still break in a corner case where group A can use
 938             # either vertex 1 or 2, and group B can use either vertex 2 or 1.
 939             # Revisit this if necessary; it could get brute-force nasty.
 940             foreach my $gst ( @{$contig->{$h}} ) {
 941                 my $gpart = $subgraph->{$gst}->copy();
 942                 # If we have come this far, there is only one root and everything
 943                 # is reachable from it.
 944                 my( $root ) = $gpart->predecessorless_vertices;
 945                 my $reachable = {};
 946                 map { $reachable->{$_} = 1 } $gpart->vertices;
 947
 948                 # Try deleting the hypothetical node.
 949                 $gpart->delete_vertex( $h );
 950                 if( $h eq $root ) {
 951                         # See if we still have a single root.
 952                         my @roots = $gpart->predecessorless_vertices;
 953                         warn "This shouldn't have happened" unless @roots;
 954                         if( @roots > 1 ) {
 955                                 # $h is needed by this group.
 956                                 if( exists( $still_contig->{$h} ) ) {
 957                                         # Conflict!
 958                                         $conflict->{$gst} = 1;
 959                                         $still_contig->{$h} = '';
 960                                 } else {
 961                                         $still_contig->{$h} = $gst;
 962                                 }
 963                         }
 964                 } else {
 965                         # $h is somewhere in the middle. See if everything
 966                         # else can still be reached from the root.
 967                                         my %still_reachable = ( $root => 1 );
 968                                         map { $still_reachable{$_} = 1 }
 969                                                 $gpart->all_successors( $root );
 970                                         foreach my $v ( keys %$reachable ) {
 971                                                 next if $v eq $h;
 972                                                 if( !$still_reachable{$v}
 973                                                         && ( $contig->{$v} eq $gst
 974                                                                  || ( exists $still_contig->{$v}
 975                                                                           && $still_contig->{$v} eq $gst ) ) ) {
 976                                                         # We need $h.
 977                                                         if( exists $still_contig->{$h} ) {
 978                                                                 # Conflict!
 979                                                                 $conflict->{$gst} = 1;
 980                                                                 $still_contig->{$h} = '';
 981                                                         } else {
 982                                                                 $still_contig->{$h} = $gst;
 983                                                         }
 984                                                         last;
 985                                                 } # else we don't need $h in this group.
 986                                         } # end foreach $v
 987                                 } # endif $h eq $root
 988             } # end foreach $gst
 989         } # end foreach $h
 990
 991         # Now we have some hypothetical vertices in $still_contig that are the
 992         # "real" group memberships.  Replace these in $contig.
 993                 foreach my $v ( keys %$contig ) {
 994                         next unless ref $contig->{$v};
 995                         $contig->{$v} = $still_contig->{$v};
 996                 }
 997     } # end if @resolve
 998
 999     my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
1000         my $variant_row = [ [], $is_genealogical ];
1001         # Fill in the groupings from $contig.
1002         foreach my $g ( @$groups ) {
1003         my $gst = wit_stringify( $g );
1004         my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
1005         push( @{$variant_row->[0]}, \@realgroup );
1006     }
1007     return $variant_row;
1008 }
1009
1010 sub _prune_group {
1011         my( $group, $stemma, $graph ) = @_;
1012         my $lacunose = {};
1013         map { $lacunose->{$_} = 1 } $stemma->hypotheticals;
1014         map { $lacunose->{$_} = 0 } @$group;
1015         # Make our subgraph
1016         my $subgraph = $graph->copy;
1017         map { $subgraph->delete_vertex( $_ ) unless exists $lacunose->{$_} }
1018                 $subgraph->vertices;
1019         # ...and find the root.
1020         # Now prune and return the remaining vertices.
1021         _prune_subtree( $subgraph, $lacunose );
1022         return $subgraph->vertices;
1023 }
1024
1025 sub _prune_subtree {
1026         my( $tree, $lacunose ) = @_;
1027
1028         # Delete lacunose witnesses that have no successors
1029     my @orphan_hypotheticals;
1030     my $ctr = 0;
1031     do {
1032         die "Infinite loop on leaves" if $ctr > 100;
1033         @orphan_hypotheticals = grep { $lacunose->{$_} }
1034                 $tree->successorless_vertices;
1035         $tree->delete_vertices( @orphan_hypotheticals );
1036         $ctr++;
1037     } while( @orphan_hypotheticals );
1038
1039         # Delete lacunose roots that have a single successor
1040         my @redundant_root;
1041         $ctr = 0;
1042         do {
1043         die "Infinite loop on roots" if $ctr > 100;
1044                 @redundant_root = grep { $lacunose->{$_} && $tree->successors( $_ ) == 1 }
1045                         $tree->predecessorless_vertices;
1046                 $tree->delete_vertices( @redundant_root );
1047                 $ctr++;
1048         } while( @redundant_root );
1049 }
1050
1051 sub _prune_subtree_old {
1052     my( $tree, $root, $contighash ) = @_;
1053     # First, delete hypothetical leaves / orphans until there are none left.
1054     my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
1055         $tree->successorless_vertices;
1056     while( @orphan_hypotheticals ) {
1057         $tree->delete_vertices( @orphan_hypotheticals );
1058         @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
1059             $tree->successorless_vertices;
1060     }
1061     # Then delete a hypothetical root with only one successor, moving the
1062     # root to the first child that has no other predecessors.
1063     while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
1064         my @nextroot = $tree->successors( $root );
1065         $tree->delete_vertex( $root );
1066         ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
1067     }
1068     # The tree has been modified in place, but we need to know the new root.
1069     $root = undef unless $root && $tree->has_vertex( $root );
1070     return $root;
1071 }
1072 # Add the variant, subject to a.c. representation logic.
1073 # This assumes that we will see the 'main' version before the a.c. version.
1074 sub add_variant_wit {
1075     my( $arr, $wit, $acstr ) = @_;
1076     my $skip;
1077     if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
1078         my $real = $1;
1079         $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
1080     }
1081     push( @$arr, $wit ) unless $skip;
1082 }
1083
1084 sub _useful_variant {
1085         my( $group_readings, $graph, $acstr ) = @_;
1086
1087         # TODO Decide what to do with AC witnesses
1088
1089         # Sort by group size and return
1090         my $is_useful = 0;
1091         my( @readings, @groups );   # The sorted groups for our answer.
1092         foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
1093                 keys %$group_readings ) {
1094                 push( @readings, $rdg );
1095                 push( @groups, $group_readings->{$rdg} );
1096                 if( @{$group_readings->{$rdg}} > 1 ) {
1097                         $is_useful++;
1098                 } else {
1099                         my( $wit ) = @{$group_readings->{$rdg}};
1100                         $wit =~ s/^(.*)\Q$acstr\E$/$1/;
1101                         $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
1102                 }
1103         }
1104         if( $is_useful > 1 ) {
1105                 return( \@readings, \@groups );
1106         } else {
1107                 return( [], [] );
1108         }
1109 }
1110
1111 =head2 wit_stringify( $groups )
1112
1113 Takes an array of witness groupings and produces a string like
1114 ['A','B'] / ['C','D','E'] / ['F']
1115
1116 =cut
1117
1118 sub wit_stringify {
1119     my $groups = shift;
1120     my @gst;
1121     # If we were passed an array of witnesses instead of an array of
1122     # groupings, then "group" the witnesses first.
1123     unless( ref( $groups->[0] ) ) {
1124         my $mkgrp = [ $groups ];
1125         $groups = $mkgrp;
1126     }
1127     foreach my $g ( @$groups ) {
1128         push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
1129     }
1130     return join( ' / ', @gst );
1131 }
1132
1133 sub _symmdiff {
1134         my( $lista, $listb ) = @_;
1135         my %union;
1136         my %scalars;
1137         map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
1138         map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
1139         my @set = grep { $union{$_} == 1 } keys %union;
1140         return map { $scalars{$_} } @set;
1141 }
1142
1143 1;
1144
1145 =head1 LICENSE
1146
1147 This package is free software and is provided "as is" without express
1148 or implied warranty.  You can redistribute it and/or modify it under
1149 the same terms as Perl itself.
1150
1151 =head1 AUTHOR
1152
1153 Tara L Andrews E<lt>aurum@cpan.orgE<gt>