add some more data to the analysis structure
[scpubgit/stemmatology.git] / lib / Text / Tradition / Analysis.pm
CommitLineData
d71100ed 1package Text::Tradition::Analysis;
2
3use strict;
4use warnings;
e4386ba9 5use Benchmark;
88a6bac5 6use Encode qw/ encode_utf8 /;
d1348d38 7use Exporter 'import';
b4cb2d60 8use Graph;
88a6bac5 9use JSON qw/ encode_json decode_json /;
10use LWP::UserAgent;
354cc918 11use Text::LevenshteinXS qw/ distance /;
d71100ed 12use Text::Tradition;
13use Text::Tradition::Stemma;
d120c995 14use TryCatch;
d71100ed 15
d1348d38 16use vars qw/ @EXPORT_OK /;
a2cf85dd 17@EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
d1348d38 18
7f52eac8 19=head1 NAME
20
21Text::Tradition::Analysis - functions for stemma analysis of a tradition
22
23=head1 SYNOPSIS
24
25 use Text::Tradition;
26 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
27 my $t = Text::Tradition->new(
28 'name' => 'this is a text',
29 'input' => 'TEI',
30 'file' => '/path/to/tei_parallel_seg_file.xml' );
31 $t->add_stemma( 'dotfile' => $stemmafile );
32
33 my $variant_data = run_analysis( $tradition );
34 # Recalculate rank $n treating all orthographic variants as equivalent
35 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
36
37=head1 DESCRIPTION
38
39Text::Tradition is a library for representation and analysis of collated
40texts, particularly medieval ones. The Collation is the central feature of
41a Tradition, where the text, its sequence of readings, and its relationships
42between readings are actually kept.
43
44=head1 SUBROUTINES
45
88a6bac5 46=head2 run_analysis( $tradition, %opts )
7f52eac8 47
88a6bac5 48Runs the analysis described in analyze_variant_location on every location in the
49collation of the given tradition, with the given options. These include:
7f52eac8 50
88a6bac5 51=over 4
52
53=item * stemma_id - Specify which of the tradition's stemmata to use. Default
54is 0 (i.e. the first).
55
56=item * ranks - Specify a list of location ranks to analyze; exclude the rest.
57
58=item * merge_types - Specify a list of relationship types, where related readings
59should be treated as identical for the purposes of analysis.
60
ffa22d6f 61=item * exclude_type1 - Exclude those ranks whose groupings have only type-1 variants.
62
88a6bac5 63=back
7f52eac8 64
65=begin testing
66
67use Text::Tradition;
68use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
69
70my $datafile = 't/data/florilegium_tei_ps.xml';
71my $tradition = Text::Tradition->new( 'input' => 'TEI',
72 'name' => 'test0',
73 'file' => $datafile );
74my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
75is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
76
f00cefe8 77my %expected_genealogical = (
a44aaf2a 78 1 => 0,
f00cefe8 79 2 => 1,
a44aaf2a 80 3 => 0,
81 5 => 0,
82 7 => 0,
83 8 => 0,
84 10 => 0,
f00cefe8 85 13 => 1,
a44aaf2a 86 33 => 0,
87 34 => 0,
88 37 => 0,
89 60 => 0,
f00cefe8 90 81 => 1,
a44aaf2a 91 84 => 0,
92 87 => 0,
93 101 => 0,
94 102 => 0,
f00cefe8 95 122 => 1,
a44aaf2a 96 157 => 0,
f00cefe8 97 166 => 1,
98 169 => 1,
a44aaf2a 99 200 => 0,
f00cefe8 100 216 => 1,
101 217 => 1,
102 219 => 1,
103 241 => 1,
104 242 => 1,
105 243 => 1,
106);
107
7f52eac8 108my $data = run_analysis( $tradition );
7234b01d 109my $c = $tradition->collation;
f00cefe8 110foreach my $row ( @{$data->{'variants'}} ) {
a44aaf2a 111 # Account for rows that used to be "not useful"
112 unless( exists $expected_genealogical{$row->{'id'}} ) {
113 $expected_genealogical{$row->{'id'}} = 1;
114 }
18f48b82 115 my $gen_bool = $row->{'genealogical'} ? 1 : 0;
116 is( $gen_bool, $expected_genealogical{$row->{'id'}},
f00cefe8 117 "Got correct genealogical flag for row " . $row->{'id'} );
7234b01d 118 # Check that we have the right row with the right groups
119 my $rank = $row->{'id'};
120 foreach my $rdghash ( @{$row->{'readings'}} ) {
121 # Skip 'readings' that aren't really
122 next unless $c->reading( $rdghash->{'readingid'} );
123 # Check the rank
124 is( $c->reading( $rdghash->{'readingid'} )->rank, $rank,
125 "Got correct reading rank" );
126 # Check the witnesses
127 my @realwits = sort $c->reading_witnesses( $rdghash->{'readingid'} );
128 my @sgrp = sort @{$rdghash->{'group'}};
129 is_deeply( \@sgrp, \@realwits, "Reading analyzed with correct groups" );
130 }
f00cefe8 131}
a44aaf2a 132is( $data->{'variant_count'}, 58, "Got right total variant number" );
b4cb2d60 133# TODO Make something meaningful of conflict count, maybe test other bits
7f52eac8 134
135=end testing
136
137=cut
138
d71100ed 139sub run_analysis {
88a6bac5 140 my( $tradition, %opts ) = @_;
f00cefe8 141 my $c = $tradition->collation;
88a6bac5 142
143 my $stemma_id = $opts{'stemma_id'} || 0;
1d73ecad 144 my @ranks = ref( $opts{'ranks'} ) eq 'ARRAY' ? @{$opts{'ranks'}} : ();
145 my @collapse = ref( $opts{'merge_types'} ) eq 'ARRAY' ? @{$opts{'merge_types'}} : ();
88a6bac5 146
147 # Get the stemma
148 my $stemma = $tradition->stemma( $stemma_id );
b4cb2d60 149
4ce27d42 150 # Figure out which witnesses we are working with - that is, the ones that
151 # appear both in the stemma and in the tradition. All others are 'lacunose'
152 # for our purposes.
88a6bac5 153 my @lacunose = $stemma->hypotheticals;
fae07016 154 my @tradition_wits = map { $_->sigil } $tradition->witnesses;
fae07016 155 push( @lacunose, _symmdiff( [ $stemma->witnesses ], \@tradition_wits ) );
88a6bac5 156
157 # Find and mark 'common' ranks for exclusion, unless they were
158 # explicitly specified.
159 unless( @ranks ) {
160 my %common_rank;
a44aaf2a 161 foreach my $rdg ( $c->common_readings ) {
88a6bac5 162 $common_rank{$rdg->rank} = 1;
163 }
164 @ranks = grep { !$common_rank{$_} } ( 1 .. $c->end->rank-1 );
d71100ed 165 }
7f52eac8 166
88a6bac5 167 # Group the variants to send to the solver
168 my @groups;
f629cb3b 169 my @use_ranks;
a44aaf2a 170 my %lacunae;
94654e27 171 my $moved = {};
88a6bac5 172 foreach my $rank ( @ranks ) {
a44aaf2a 173 my $missing = [ @lacunose ];
94654e27 174 my $rankgroup = group_variants( $tradition, $rank, $missing, $moved, \@collapse );
175 # Filter out any empty rankgroups
176 # (e.g. from the later rank for a transposition)
177 next unless keys %$rankgroup;
ffa22d6f 178 if( $opts{'exclude_type1'} ) {
179 # Check to see whether this is a "useful" group.
180 my( $rdgs, $grps ) = _useful_variant( $rankgroup,
181 $stemma->graph, $c->ac_label );
182 next unless @$rdgs;
183 }
f629cb3b 184 push( @use_ranks, $rank );
ffa22d6f 185 push( @groups, $rankgroup );
a44aaf2a 186 $lacunae{$rank} = $missing;
d71100ed 187 }
4ce27d42 188 # Run the solver
e59b8faa 189 my $answer = solve_variants( $stemma, @groups );
fae07016 190
88a6bac5 191 # Do further analysis on the answer
a44aaf2a 192 my $conflict_count = 0;
7234b01d 193 my $aclabel = $c->ac_label;
f629cb3b 194 foreach my $idx ( 0 .. $#use_ranks ) {
88a6bac5 195 my $location = $answer->{'variants'}->[$idx];
196 # Add the rank back in
94654e27 197 my $rank = $use_ranks[$idx];
198 $location->{'id'} = $rank;
7234b01d 199 # Note what our lacunae are
f629cb3b 200 my %lmiss;
7234b01d 201 map { $lmiss{$_} = 1 } @{$lacunae{$use_ranks[$idx]}};
7234b01d 202 $location->{'missing'} = [ keys %lmiss ];
203
88a6bac5 204 # Run the extra analysis we need.
638e2a95 205 analyze_location( $tradition, $stemma, $location, \%lmiss );
7234b01d 206
638e2a95 207 my @layerwits;
7234b01d 208 # Do the final post-analysis tidying up of the data.
a44aaf2a 209 foreach my $rdghash ( @{$location->{'readings'}} ) {
210 $conflict_count++
211 if exists $rdghash->{'conflict'} && $rdghash->{'conflict'};
94654e27 212 # Add the reading text back in, setting display value as needed
a44aaf2a 213 my $rdg = $c->reading( $rdghash->{'readingid'} );
94654e27 214 if( $rdg ) {
215 $rdghash->{'text'} = $rdg->text .
216 ( $rdg->rank == $rank ? '' : ' [' . $rdg->rank . ']' );
7b7abf10 217 $rdghash->{'is_ungrammatical'} = $rdg->grammar_invalid;
218 $rdghash->{'is_nonsense'} = $rdg->is_nonsense;
94654e27 219 }
f629cb3b 220 # Remove lacunose witnesses from this reading's list now that the
7234b01d 221 # analysis is done
f629cb3b 222 my @realgroup;
7234b01d 223 map { push( @realgroup, $_ ) unless $lmiss{$_} } @{$rdghash->{'group'}};
f629cb3b 224 $rdghash->{'group'} = \@realgroup;
638e2a95 225 # Note any layered witnesses that appear in this group
226 foreach( @realgroup ) {
227 if( $_ =~ /^(.*)\Q$aclabel\E$/ ) {
228 push( @layerwits, $1 );
229 }
230 }
a44aaf2a 231 }
638e2a95 232 $location->{'layerwits'} = \@layerwits if @layerwits;
88a6bac5 233 }
a44aaf2a 234 $answer->{'conflict_count'} = $conflict_count;
f00cefe8 235
88a6bac5 236 return $answer;
d71100ed 237}
238
7f52eac8 239=head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
240
241Groups the variants at the given $rank of the collation, treating any
242relationships in @merge_relationship_types as equivalent. $lacunose should
243be a reference to an array, to which the sigla of lacunose witnesses at this
94654e27 244rank will be appended; $transposed should be a reference to a hash, wherein
245the identities of transposed readings and their relatives will be stored.
7f52eac8 246
ffa22d6f 247Returns a hash $group_readings where $rdg is attested by the witnesses listed
248in $group_readings->{$rdg}.
7f52eac8 249
250=cut
251
252# Return group_readings, groups, lacunose
d1348d38 253sub group_variants {
94654e27 254 my( $tradition, $rank, $lacunose, $transposed, $collapse ) = @_;
7f52eac8 255 my $c = $tradition->collation;
335a62ef 256 my $aclabel = $c->ac_label;
d120c995 257 my $table = $c->alignment_table;
7f52eac8 258 # Get the alignment table readings
259 my %readings_at_rank;
d120c995 260 my %is_lacunose; # lookup table for witnesses not in stemma
261 map { $is_lacunose{$_} = 1; $is_lacunose{$_.$aclabel} = 1 } @$lacunose;
94654e27 262 my @check_for_gaps;
263 my %moved_wits;
d120c995 264 my $has_transposition;
265 foreach my $tablewit ( @{$table->{'alignment'}} ) {
7f52eac8 266 my $rdg = $tablewit->{'tokens'}->[$rank-1];
fae07016 267 my $wit = $tablewit->{'witness'};
ffa22d6f 268 # Exclude the witness if it is "lacunose" which if we got here
269 # means "not in the stemma".
270 next if $is_lacunose{$wit};
94654e27 271 # Note if the witness is actually in a lacuna
7f52eac8 272 if( $rdg && $rdg->{'t'}->is_lacuna ) {
335a62ef 273 _add_to_witlist( $wit, $lacunose, $aclabel );
94654e27 274 # Otherwise the witness either has a positive reading...
7f52eac8 275 } elsif( $rdg ) {
94654e27 276 # If the reading has been counted elsewhere as a transposition, ignore it.
277 if( $transposed->{$rdg->{'t'}->id} ) {
d120c995 278 # TODO Does this cope with three-way transpositions?
94654e27 279 map { $moved_wits{$_} = 1 } @{$transposed->{$rdg->{'t'}->id}};
280 next;
281 }
282 # Otherwise, record it...
283 $readings_at_rank{$rdg->{'t'}->id} = $rdg->{'t'};
284 # ...and grab any transpositions, and their relations.
285 my @transp = grep { $_->rank != $rank } $rdg->{'t'}->related_readings();
286 foreach my $trdg ( @transp ) {
d120c995 287 next if exists $readings_at_rank{$trdg->id};
288 $has_transposition = 1;
289 my @affected_wits = _table_witnesses(
290 $table, $trdg, \%is_lacunose, $aclabel );
291 next unless @affected_wits;
292 map { $moved_wits{$_} = 1 } @affected_wits;
293 $transposed->{$trdg->id} =
294 [ _table_witnesses( $table, $rdg->{'t'}, \%is_lacunose, $aclabel ) ];
94654e27 295 $readings_at_rank{$trdg->id} = $trdg;
296 }
297 # ...or it is empty, ergo a gap.
7f52eac8 298 } else {
d120c995 299 _add_to_witlist( $wit, \@check_for_gaps, $aclabel );
7f52eac8 300 }
301 }
94654e27 302 my @gap_wits;
303 map { _add_to_witlist( $_, \@gap_wits, $aclabel )
304 unless $moved_wits{$_} } @check_for_gaps;
7f52eac8 305 # Group the readings, collapsing groups by relationship if needed
d120c995 306 my $grouped_readings = {};
4ce27d42 307 foreach my $rdg ( values %readings_at_rank ) {
7f52eac8 308 # Skip readings that have been collapsed into others.
d120c995 309 next if exists $grouped_readings->{$rdg->id}
310 && $grouped_readings->{$rdg->id} eq 'COLLAPSE';
4ce27d42 311 # Get the witness list, including from readings collapsed into this one.
d120c995 312 my @wits = _table_witnesses( $table, $rdg, \%is_lacunose, $aclabel );
313 if( $collapse && @$collapse ) {
7f52eac8 314 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
315 foreach my $other ( $rdg->related_readings( $filter ) ) {
d120c995 316 my @otherwits = _table_witnesses(
317 $table, $other, \%is_lacunose, $aclabel );
fae07016 318 push( @wits, @otherwits );
d120c995 319 $grouped_readings->{$other->id} = 'COLLAPSE';
d1348d38 320 }
321 }
d120c995 322 $grouped_readings->{$rdg->id} = \@wits;
7f52eac8 323 }
d120c995 324 $grouped_readings->{'(omitted)'} = \@gap_wits if @gap_wits;
7f52eac8 325 # Get rid of our collapsed readings
d120c995 326 map { delete $grouped_readings->{$_} if $grouped_readings->{$_} eq 'COLLAPSE' }
327 keys %$grouped_readings
7f52eac8 328 if $collapse;
d120c995 329
330 # If something was transposed, check the groups for doubled-up readings
331 if( $has_transposition ) {
62a39b8f 332 # print STDERR "Group for rank $rank:\n";
333 # map { print STDERR "\t$_: " . join( ' ' , @{$grouped_readings->{$_}} ) . "\n" }
334 # keys %$grouped_readings;
d120c995 335 _check_transposed_consistency( $c, $rank, $transposed, $grouped_readings );
336 }
7f52eac8 337
4ce27d42 338 # Return the result
d120c995 339 return $grouped_readings;
340}
341
342# Helper function to query the alignment table for all witnesses (a.c. included)
343# that have a given reading at its rank.
344sub _table_witnesses {
345 my( $table, $trdg, $lacunose, $aclabel ) = @_;
346 my $tableidx = $trdg->rank - 1;
347 my @has_reading;
348 foreach my $row ( @{$table->{'alignment'}} ) {
349 my $wit = $row->{'witness'};
350 next if $lacunose->{$wit};
351 my $rdg = $row->{'tokens'}->[$tableidx];
352 next unless exists $rdg->{'t'} && defined $rdg->{'t'};
353 _add_to_witlist( $wit, \@has_reading, $aclabel )
354 if $rdg->{'t'}->id eq $trdg->id;
355 }
356 return @has_reading;
d1348d38 357}
358
335a62ef 359# Helper function to ensure that X and X a.c. never appear in the same list.
360sub _add_to_witlist {
361 my( $wit, $list, $acstr ) = @_;
362 my %inlist;
363 my $idx = 0;
364 map { $inlist{$_} = $idx++ } @$list;
365 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
366 my $acwit = $1;
367 unless( exists $inlist{$acwit} ) {
368 push( @$list, $acwit.$acstr );
369 }
370 } else {
371 if( exists( $inlist{$wit.$acstr} ) ) {
372 # Replace the a.c. version with the main witness
373 my $i = $inlist{$wit.$acstr};
374 $list->[$i] = $wit;
375 } else {
376 push( @$list, $wit );
377 }
378 }
379}
380
d120c995 381sub _check_transposed_consistency {
382 my( $c, $rank, $transposed, $groupings ) = @_;
383 my %seen_wits;
384 my %thisrank;
385 # Note which readings are actually at this rank, and which witnesses
386 # belong to which reading.
387 foreach my $rdg ( keys %$groupings ) {
388 my $rdgobj = $c->reading( $rdg );
389 # Count '(omitted)' as a reading at this rank
390 $thisrank{$rdg} = 1 if !$rdgobj || $rdgobj->rank == $rank;
391 map { push( @{$seen_wits{$_}}, $rdg ) } @{$groupings->{$rdg}};
392 }
393 # Our work is done if we have no witness belonging to more than one
394 # reading.
395 my @doubled = grep { scalar @{$seen_wits{$_}} > 1 } keys %seen_wits;
396 return unless @doubled;
397 # If we have a symmetric related transposition, drop the non-rank readings.
398 if( @doubled == scalar keys %seen_wits ) {
399 foreach my $rdg ( keys %$groupings ) {
400 if( !$thisrank{$rdg} ) {
401 my $groupstr = wit_stringify( $groupings->{$rdg} );
402 my ( $matched ) = grep { $groupstr eq wit_stringify( $groupings->{$_} ) }
403 keys %thisrank;
404 delete $groupings->{$rdg};
405 # If we found a group match, assume there is a symmetry happening.
406 # TODO think more about this
62a39b8f 407 # print STDERR "*** Deleting symmetric reading $rdg\n";
d120c995 408 unless( $matched ) {
409 delete $transposed->{$rdg};
410 warn "Found problem in evident symmetry with reading $rdg";
411 }
412 }
413 }
414 # Otherwise 'unhook' the transposed reading(s) that have duplicates.
415 } else {
416 foreach my $dup ( @doubled ) {
417 foreach my $rdg ( @{$seen_wits{$dup}} ) {
418 next if $thisrank{$rdg};
419 next unless exists $groupings->{$rdg};
62a39b8f 420 # print STDERR "*** Deleting asymmetric doubled-up reading $rdg\n";
d120c995 421 delete $groupings->{$rdg};
422 delete $transposed->{$rdg};
423 }
424 }
425 # and put any now-orphaned readings into an 'omitted' reading.
426 foreach my $wit ( keys %seen_wits ) {
427 unless( grep { exists $groupings->{$_} } @{$seen_wits{$wit}} ) {
428 $groupings->{'(omitted)'} = [] unless exists $groupings->{'(omitted)'};
429 _add_to_witlist( $wit, $groupings->{'(omitted)'}, $c->ac_label );
430 }
431 }
432 }
433}
434
88a6bac5 435=head2 solve_variants( $graph, @groups )
436
437Sends the set of groups to the external graph solver service and returns
438a cleaned-up answer, adding the rank IDs back where they belong.
439
440The JSON has the form
441 { "graph": [ stemmagraph DOT string without newlines ],
442 "groupings": [ array of arrays of groups, one per rank ] }
443
444The answer has the form
445 { "variants" => [ array of variant location structures ],
446 "variant_count" => total,
447 "conflict_count" => number of conflicts detected,
448 "genealogical_count" => number of solutions found }
449
450=cut
451
452sub solve_variants {
e59b8faa 453 my( $stemma, @groups ) = @_;
335a62ef 454 my $aclabel = $stemma->collation->ac_label;
455
456 # Filter the groups down to distinct groups, and work out what graph
457 # should be used in the calculation of each group. We want to send each
458 # distinct problem to the solver only once.
459 # We need a whole bunch of lookup tables for this.
460 my $index_groupkeys = {}; # Save the order of readings
461 my $group_indices = {}; # Save the indices that have a given grouping
462 my $graph_problems = {}; # Save the groupings for the given graph
463
464 foreach my $idx ( 0..$#groups ) {
465 my $ghash = $groups[$idx];
88a6bac5 466 my @grouping;
335a62ef 467 # Sort the groupings from big to little, and scan for a.c. witnesses
468 # that would need an extended graph.
469 my @acwits; # note which AC witnesses crop up at this rank
470 my @idxkeys = sort { scalar @{$ghash->{$b}} <=> scalar @{$ghash->{$a}} }
471 keys %$ghash;
472 foreach my $rdg ( @idxkeys ) {
473 my @sg = sort @{$ghash->{$rdg}};
474 push( @acwits, grep { $_ =~ /\Q$aclabel\E$/ } @sg );
475 push( @grouping, \@sg );
476 }
477 # Save the reading order
478 $index_groupkeys->{$idx} = \@idxkeys;
479
480 # Now associate the distinct group with this index
481 my $gstr = wit_stringify( \@grouping );
482 push( @{$group_indices->{$gstr}}, $idx );
483
484 # Finally, add the group to the list to be calculated for this graph.
485 map { s/\Q$aclabel\E$// } @acwits;
d120c995 486 my $graph;
487 try {
488 $graph = $stemma->extend_graph( \@acwits );
489 } catch {
490 die "Unable to extend graph with @acwits";
491 }
335a62ef 492 unless( exists $graph_problems->{"$graph"} ) {
493 $graph_problems->{"$graph"} = { 'object' => $graph, 'groups' => [] };
88a6bac5 494 }
335a62ef 495 push( @{$graph_problems->{"$graph"}->{'groups'}}, \@grouping );
88a6bac5 496 }
335a62ef 497
498 ## For each distinct graph, send its groups to the solver.
88a6bac5 499 my $solver_url = 'http://byzantini.st/cgi-bin/graphcalc.cgi';
500 my $ua = LWP::UserAgent->new();
335a62ef 501 ## Witness map is a HACK to get around limitations in node names from IDP
502 my $witness_map = {};
503 ## Variables to store answers as they come back
504 my $variants = [ ( undef ) x ( scalar keys %$index_groupkeys ) ];
88a6bac5 505 my $genealogical = 0;
335a62ef 506 foreach my $graphkey ( keys %$graph_problems ) {
507 my $graph = $graph_problems->{$graphkey}->{'object'};
508 my $groupings = $graph_problems->{$graphkey}->{'groups'};
509 my $json = encode_json( _safe_wit_strings( $graph, $stemma->collation,
510 $groupings, $witness_map ) );
511 # Send it off and get the result
94654e27 512 #print STDERR "Sending request: $json\n";
335a62ef 513 my $resp = $ua->post( $solver_url, 'Content-Type' => 'application/json',
514 'Content' => $json );
515 my $answer;
516 my $used_idp;
517 if( $resp->is_success ) {
518 $answer = _desanitize_names( decode_json( $resp->content ), $witness_map );
519 $used_idp = 1;
520 } else {
521 # Fall back to the old method.
522 warn "IDP solver returned " . $resp->status_line . " / " . $resp->content
523 . "; falling back to perl method";
524 $answer = perl_solver( $graph, @$groupings );
525 }
526 ## The answer is the evaluated groupings, plus a boolean for whether
527 ## they were genealogical. Reconstruct our original groups.
528 foreach my $gidx ( 0 .. $#{$groupings} ) {
529 my( $calc_groups, $result ) = @{$answer->[$gidx]};
530 if( $result ) {
531 $genealogical++;
532 # Prune the calculated groups, in case the IDP solver failed to.
533 if( $used_idp ) {
534 my @pruned_groups;
535 foreach my $cg ( @$calc_groups ) {
536 # This is a little wasteful but the path of least
537 # resistance. Send both the stemma, which knows what
538 # its hypotheticals are, and the actual graph used.
539 my @pg = _prune_group( $cg, $stemma, $graph );
540 push( @pruned_groups, \@pg );
541 }
542 $calc_groups = \@pruned_groups;
6d25a3a0 543 }
335a62ef 544 }
545 # Retrieve the key for the original group that went to the solver
546 my $input_group = wit_stringify( $groupings->[$gidx] );
547 foreach my $oidx ( @{$group_indices->{$input_group}} ) {
548 my @readings = @{$index_groupkeys->{$oidx}};
549 my $vstruct = {
550 'genealogical' => $result,
551 'readings' => [],
552 };
553 foreach my $ridx ( 0 .. $#readings ) {
554 push( @{$vstruct->{'readings'}},
555 { 'readingid' => $readings[$ridx],
556 'group' => $calc_groups->[$ridx] } );
557 }
558 $variants->[$oidx] = $vstruct;
6d25a3a0 559 }
560 }
88a6bac5 561 }
562
563 return { 'variants' => $variants,
564 'variant_count' => scalar @$variants,
565 'genealogical_count' => $genealogical };
566}
567
b4cb2d60 568#### HACKERY to cope with IDP's limited idea of what a node name looks like ###
569
570sub _safe_wit_strings {
335a62ef 571 my( $graph, $c, $groupings, $witness_map ) = @_;
572 # Parse the graph we were given into a stemma.
b4cb2d60 573 my $safegraph = Graph->new();
574 # Convert the graph to a safe representation and store the conversion.
335a62ef 575 foreach my $n ( $graph->vertices ) {
b4cb2d60 576 my $sn = _safe_witstr( $n );
335a62ef 577 if( exists $witness_map->{$sn} ) {
578 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
579 if $witness_map->{$sn} ne $n;
580 } else {
581 $witness_map->{$sn} = $n;
582 }
b4cb2d60 583 $safegraph->add_vertex( $sn );
584 $safegraph->set_vertex_attributes( $sn,
335a62ef 585 $graph->get_vertex_attributes( $n ) );
b4cb2d60 586 }
335a62ef 587 foreach my $e ( $graph->edges ) {
b4cb2d60 588 my @safe_e = ( _safe_witstr( $e->[0] ), _safe_witstr( $e->[1] ) );
589 $safegraph->add_edge( @safe_e );
590 }
591 my $safe_stemma = Text::Tradition::Stemma->new(
335a62ef 592 'collation' => $c, 'graph' => $safegraph );
b4cb2d60 593
594 # Now convert the witness groupings to a safe representation.
595 my $safe_groupings = [];
596 foreach my $grouping ( @$groupings ) {
597 my $safe_grouping = [];
598 foreach my $group ( @$grouping ) {
599 my $safe_group = [];
600 foreach my $n ( @$group ) {
601 my $sn = _safe_witstr( $n );
602 warn "Ambiguous stringification $sn for $n and " . $witness_map->{$sn}
603 if exists $witness_map->{$sn} && $witness_map->{$sn} ne $n;
604 $witness_map->{$sn} = $n;
605 push( @$safe_group, $sn );
606 }
607 push( @$safe_grouping, $safe_group );
608 }
609 push( @$safe_groupings, $safe_grouping );
610 }
611
612 # Return it all in the struct we expect. We have stored the reductions
613 # in the $witness_map that we were passed.
335a62ef 614 return { 'graph' => $safe_stemma->editable( { 'linesep' => ' ' } ),
615 'groupings' => $safe_groupings };
b4cb2d60 616}
617
618sub _safe_witstr {
619 my $witstr = shift;
620 $witstr =~ s/\s+/_/g;
621 $witstr =~ s/[^\w\d-]//g;
622 return $witstr;
623}
624
625sub _desanitize_names {
626 my( $jsonstruct, $witness_map ) = @_;
627 my $result = [];
628 foreach my $grouping ( @$jsonstruct ) {
629 my $real_grouping = [];
630 foreach my $element ( @$grouping ) {
631 if( ref( $element ) eq 'ARRAY' ) {
632 # it's the groupset.
633 my $real_groupset = [];
634 foreach my $group ( @$element ) {
635 my $real_group = [];
636 foreach my $n ( @$group ) {
637 my $rn = $witness_map->{$n};
638 push( @$real_group, $rn );
639 }
640 push( @$real_groupset, $real_group );
641 }
642 push( @$real_grouping, $real_groupset );
643 } else {
644 # It is the boolean, not actually a group.
645 push( @$real_grouping, $element );
646 }
647 }
648 push( @$result, $real_grouping );
649 }
650 return $result;
651}
652
653### END HACKERY ###
654
fae07016 655=head2 analyze_location ( $tradition, $graph, $location_hash )
7f52eac8 656
fae07016 657Given the tradition, its stemma graph, and the solution from the graph solver,
658work out the rest of the information we want. For each reading we need missing,
659conflict, reading_parents, independent_occurrence, followed, not_followed, and follow_unknown. Alters the location_hash in place.
7f52eac8 660
661=cut
732152b1 662
fae07016 663sub analyze_location {
638e2a95 664 my ( $tradition, $stemma, $variant_row, $lacunose ) = @_;
94654e27 665 my $c = $tradition->collation;
fae07016 666
667 # Make a hash of all known node memberships, and make the subgraphs.
668 my $contig = {};
669 my $reading_roots = {};
670 my $subgraph = {};
638e2a95 671 my $acstr = $c->ac_label;
672 my @acwits;
94654e27 673 # Note which witnesses positively belong to which group
fae07016 674 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
675 my $rid = $rdghash->{'readingid'};
638e2a95 676 foreach my $wit ( @{$rdghash->{'group'}} ) {
677 $contig->{$wit} = $rid;
678 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
679 push( @acwits, $1 );
680 }
681 }
94654e27 682 }
683
638e2a95 684 # Get the actual graph we should work with
d120c995 685 my $graph;
686 try {
687 $graph = @acwits ? $stemma->extend_graph( \@acwits ) : $stemma->graph;
688 } catch {
689 die "Could not extend graph with a.c. witnesses @acwits";
690 }
638e2a95 691
94654e27 692 # Now, armed with that knowledge, make a subgraph for each reading
693 # and note the root(s) of each subgraph.
694 foreach my $rdghash( @{$variant_row->{'readings'}} ) {
695 my $rid = $rdghash->{'readingid'};
696 my %rdgwits;
fae07016 697 # Make the subgraph.
698 my $part = $graph->copy;
94654e27 699 my @todelete = grep { exists $contig->{$_} && $contig->{$_} ne $rid }
700 keys %$contig;
701 $part->delete_vertices( @todelete );
702 _prune_subtree( $part, $lacunose );
fae07016 703 $subgraph->{$rid} = $part;
94654e27 704 # Record the remaining lacunose nodes as part of this group, if
705 # we are dealing with a non-genealogical reading.
706 unless( $variant_row->{'genealogical'} ) {
707 map { $contig->{$_} = $rid } $part->vertices;
708 }
fae07016 709 # Get the reading roots.
710 map { $reading_roots->{$_} = $rid } $part->predecessorless_vertices;
bebec0e9 711 }
712
fae07016 713 # Now that we have all the node group memberships, calculate followed/
bebec0e9 714 # non-followed/unknown values for each reading. Also figure out the
715 # reading's evident parent(s).
716 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
fae07016 717 my $rid = $rdghash->{'readingid'};
718 # Get the subgraph
719 my $part = $subgraph->{$rid};
720
721 # Start figuring things out.
94654e27 722 my @roots = grep { $reading_roots->{$_} eq $rid } keys %$reading_roots;
723 $rdghash->{'independent_occurrence'} = \@roots;
bebec0e9 724 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
725 # Find the parent readings, if any, of this reading.
94654e27 726 my $rdgparents = {};
bebec0e9 727 foreach my $wit ( @roots ) {
f00cefe8 728 # Look in the main stemma to find this witness's extant or known-reading
729 # immediate ancestor(s), and look up the reading that each ancestor olds.
730 my @check = $graph->predecessors( $wit );
731 while( @check ) {
732 my @next;
733 foreach my $wparent( @check ) {
fae07016 734 my $preading = $contig->{$wparent};
735 if( $preading ) {
94654e27 736 $rdgparents->{$preading} = 1;
f00cefe8 737 } else {
738 push( @next, $graph->predecessors( $wparent ) );
739 }
740 }
741 @check = @next;
742 }
bebec0e9 743 }
94654e27 744 foreach my $p ( keys %$rdgparents ) {
745 # Resolve the relationship of the parent to the reading, and
746 # save it in our hash.
747 my $pobj = $c->reading( $p );
94654e27 748 my $prep = $pobj ? $pobj->id . ' (' . $pobj->text . ')' : $p;
354cc918 749 my $phash = { 'label' => $prep };
94654e27 750 if( $pobj ) {
751 my $rel = $c->get_relationship( $p, $rdghash->{readingid} );
752 if( $rel ) {
354cc918 753 $phash->{relation} = { type => $rel->type };
94654e27 754 if( $rel->has_annotation ) {
354cc918 755 $phash->{relation}->{'annotation'} = $rel->annotation;
756 }
757 } elsif( $rdghash->{readingid} eq '(omitted)' ) {
758 $phash->{relation} = { type => 'deletion' };
759 } elsif( $rdghash->{text} ) {
760 # Check for sheer word similarity.
761 my $rtext = $rdghash->{text};
762 my $ptext = $pobj->text;
763 my $min = length( $rtext ) > length( $ptext )
764 ? length( $ptext ) : length( $rtext );
765 my $distance = distance( $rtext, $ptext );
766 if( $distance < $min ) {
767 $phash->{relation} = { type => 'wordsimilar' };
94654e27 768 }
769 }
354cc918 770 # Get the attributes of the parent object while we are here
771 $phash->{'text'} = $pobj->text if $pobj;
772 $phash->{'is_nonsense'} = $pobj->is_nonsense;
773 $phash->{'is_ungrammatical'} = $pobj->grammar_invalid;
774 } elsif( $p eq '(omitted)' ) {
775 $phash->{relation} = { type => 'addition' };
7b7abf10 776 }
354cc918 777 # Save it
7b7abf10 778 $rdgparents->{$p} = $phash;
94654e27 779 }
780
781 $rdghash->{'reading_parents'} = $rdgparents;
bebec0e9 782
783 # Find the number of times this reading was altered, and the number of
784 # times we're not sure.
785 my( %nofollow, %unknownfollow );
786 foreach my $wit ( $part->vertices ) {
787 foreach my $wchild ( $graph->successors( $wit ) ) {
788 next if $part->has_vertex( $wchild );
fae07016 789 if( $reading_roots->{$wchild} && $contig->{$wchild} ) {
bebec0e9 790 # It definitely changed here.
791 $nofollow{$wchild} = 1;
792 } elsif( !($contig->{$wchild}) ) {
793 # The child is a hypothetical node not definitely in
794 # any group. Answer is unknown.
795 $unknownfollow{$wchild} = 1;
796 } # else it's a non-root node in a known group, and therefore
797 # is presumed to have its reading from its group, not this link.
798 }
799 }
800 $rdghash->{'not_followed'} = keys %nofollow;
801 $rdghash->{'follow_unknown'} = keys %unknownfollow;
fae07016 802
803 # Now say whether this reading represents a conflict.
804 unless( $variant_row->{'genealogical'} ) {
805 $rdghash->{'conflict'} = @roots != 1;
806 }
c4a4fb1b 807 }
d71100ed 808}
809
fae07016 810
811=head2 perl_solver( $tradition, $rank, $stemma_id, @merge_relationship_types )
812
813** NOTE ** This method should hopefully not be called - it is not guaranteed
814to be correct. Serves as a backup for the real solver.
815
816Runs an analysis of the given tradition, at the location given in $rank,
817against the graph of the stemma specified in $stemma_id. The argument
818@merge_relationship_types is an optional list of relationship types for
819which readings so related should be treated as equivalent.
820
821Returns a nested array data structure as follows:
822
823 [ [ group_list, is_genealogical ], [ group_list, is_genealogical ] ... ]
824
825where the group list is the array of arrays passed in for each element of @groups,
826possibly with the addition of hypothetical readings.
827
828
829=cut
830
831sub perl_solver {
335a62ef 832 my( $graph, @groups ) = @_;
e59b8faa 833 my @answer;
834 foreach my $g ( @groups ) {
835 push( @answer, _solve_variant_location( $graph, $g ) );
836 }
837 return \@answer;
fae07016 838}
839
e59b8faa 840sub _solve_variant_location {
841 my( $graph, $groups ) = @_;
fae07016 842 # Now do the work.
e59b8faa 843 my $contig = {};
844 my $subgraph = {};
845 my $is_conflicted;
846 my $conflict = {};
847
848 # Mark each ms as in its own group, first.
849 foreach my $g ( @$groups ) {
850 my $gst = wit_stringify( $g );
851 map { $contig->{$_} = $gst } @$g;
852 }
853
854 # Now for each unmarked node in the graph, initialize an array
855 # for possible group memberships. We will use this later to
856 # resolve potential conflicts.
857 map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
858 foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
859 my $gst = wit_stringify( $g ); # This is the group name
860 # Copy the graph, and delete all non-members from the new graph.
861 my $part = $graph->copy;
862 my @group_roots;
863 $part->delete_vertices(
864 grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
865
866 # Now look to see if our group is connected.
867 if( @$g > 1 ) {
868 # We have to take directionality into account.
869 # How many root nodes do we have?
870 my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
871 $part->predecessorless_vertices;
872 # Assuming that @$g > 1, find the first root node that has at
873 # least one successor belonging to our group. If this reading
874 # is genealogical, there should be only one, but we will check
875 # that implicitly later.
876 foreach my $root ( @roots ) {
877 # Prune the tree to get rid of extraneous hypotheticals.
94654e27 878 $root = _prune_subtree_old( $part, $root, $contig );
e59b8faa 879 next unless $root;
880 # Save this root for our group.
881 push( @group_roots, $root );
882 # Get all the successor nodes of our root.
883 }
884 } else {
885 # Dispense with the trivial case of one reading.
886 my $wit = $g->[0];
887 @group_roots = ( $wit );
888 foreach my $v ( $part->vertices ) {
889 $part->delete_vertex( $v ) unless $v eq $wit;
890 }
891 }
892
893 if( @group_roots > 1 ) {
894 $conflict->{$gst} = 1;
895 $is_conflicted = 1;
896 }
897 # Paint the 'hypotheticals' with our group.
898 foreach my $wit ( $part->vertices ) {
899 if( ref( $contig->{$wit} ) ) {
900 push( @{$contig->{$wit}}, $gst );
901 } elsif( $contig->{$wit} ne $gst ) {
902 warn "How did we get here?";
903 }
904 }
905
906
907 # Save the relevant subgraph.
908 $subgraph->{$gst} = $part;
909 }
910
911 # For each of our hypothetical readings, flatten its 'contig' array if
912 # the array contains zero or one group. If we have any unflattened arrays,
913 # we may need to run the resolution process. If the reading is already known
914 # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
915 # it.
916 my @resolve;
917 foreach my $wit ( keys %$contig ) {
918 next unless ref( $contig->{$wit} );
919 if( @{$contig->{$wit}} > 1 ) {
920 if( $is_conflicted ) {
921 $contig->{$wit} = ''; # We aren't going to decide.
922 } else {
923 push( @resolve, $wit );
924 }
925 } else {
926 my $gst = pop @{$contig->{$wit}};
927 $contig->{$wit} = $gst || '';
928 }
929 }
930
931 if( @resolve ) {
932 my $still_contig = {};
933 foreach my $h ( @resolve ) {
934 # For each of the hypothetical readings with more than one possibility,
935 # try deleting it from each of its member subgraphs in turn, and see
936 # if that breaks the contiguous grouping.
937 # TODO This can still break in a corner case where group A can use
938 # either vertex 1 or 2, and group B can use either vertex 2 or 1.
939 # Revisit this if necessary; it could get brute-force nasty.
940 foreach my $gst ( @{$contig->{$h}} ) {
941 my $gpart = $subgraph->{$gst}->copy();
942 # If we have come this far, there is only one root and everything
943 # is reachable from it.
944 my( $root ) = $gpart->predecessorless_vertices;
945 my $reachable = {};
946 map { $reachable->{$_} = 1 } $gpart->vertices;
947
948 # Try deleting the hypothetical node.
949 $gpart->delete_vertex( $h );
950 if( $h eq $root ) {
951 # See if we still have a single root.
952 my @roots = $gpart->predecessorless_vertices;
953 warn "This shouldn't have happened" unless @roots;
954 if( @roots > 1 ) {
955 # $h is needed by this group.
956 if( exists( $still_contig->{$h} ) ) {
957 # Conflict!
958 $conflict->{$gst} = 1;
959 $still_contig->{$h} = '';
960 } else {
961 $still_contig->{$h} = $gst;
962 }
963 }
964 } else {
965 # $h is somewhere in the middle. See if everything
966 # else can still be reached from the root.
967 my %still_reachable = ( $root => 1 );
968 map { $still_reachable{$_} = 1 }
969 $gpart->all_successors( $root );
970 foreach my $v ( keys %$reachable ) {
971 next if $v eq $h;
972 if( !$still_reachable{$v}
973 && ( $contig->{$v} eq $gst
974 || ( exists $still_contig->{$v}
975 && $still_contig->{$v} eq $gst ) ) ) {
976 # We need $h.
977 if( exists $still_contig->{$h} ) {
978 # Conflict!
979 $conflict->{$gst} = 1;
980 $still_contig->{$h} = '';
981 } else {
982 $still_contig->{$h} = $gst;
983 }
984 last;
985 } # else we don't need $h in this group.
986 } # end foreach $v
987 } # endif $h eq $root
988 } # end foreach $gst
989 } # end foreach $h
990
991 # Now we have some hypothetical vertices in $still_contig that are the
992 # "real" group memberships. Replace these in $contig.
993 foreach my $v ( keys %$contig ) {
994 next unless ref $contig->{$v};
995 $contig->{$v} = $still_contig->{$v};
996 }
997 } # end if @resolve
998
999 my $is_genealogical = keys %$conflict ? JSON::false : JSON::true;
1000 my $variant_row = [ [], $is_genealogical ];
1001 # Fill in the groupings from $contig.
1002 foreach my $g ( @$groups ) {
1003 my $gst = wit_stringify( $g );
1004 my @realgroup = grep { $contig->{$_} eq $gst } keys %$contig;
1005 push( @{$variant_row->[0]}, \@realgroup );
1006 }
1007 return $variant_row;
1008}
fae07016 1009
6d25a3a0 1010sub _prune_group {
335a62ef 1011 my( $group, $stemma, $graph ) = @_;
94654e27 1012 my $lacunose = {};
1013 map { $lacunose->{$_} = 1 } $stemma->hypotheticals;
1014 map { $lacunose->{$_} = 0 } @$group;
6d25a3a0 1015 # Make our subgraph
335a62ef 1016 my $subgraph = $graph->copy;
94654e27 1017 map { $subgraph->delete_vertex( $_ ) unless exists $lacunose->{$_} }
6d25a3a0 1018 $subgraph->vertices;
1019 # ...and find the root.
6d25a3a0 1020 # Now prune and return the remaining vertices.
94654e27 1021 _prune_subtree( $subgraph, $lacunose );
6d25a3a0 1022 return $subgraph->vertices;
1023}
1024
7f52eac8 1025sub _prune_subtree {
94654e27 1026 my( $tree, $lacunose ) = @_;
1027
1028 # Delete lacunose witnesses that have no successors
1029 my @orphan_hypotheticals;
1030 my $ctr = 0;
1031 do {
1032 die "Infinite loop on leaves" if $ctr > 100;
1033 @orphan_hypotheticals = grep { $lacunose->{$_} }
1034 $tree->successorless_vertices;
1035 $tree->delete_vertices( @orphan_hypotheticals );
1036 $ctr++;
1037 } while( @orphan_hypotheticals );
1038
1039 # Delete lacunose roots that have a single successor
1040 my @redundant_root;
1041 $ctr = 0;
1042 do {
1043 die "Infinite loop on roots" if $ctr > 100;
1044 @redundant_root = grep { $lacunose->{$_} && $tree->successors( $_ ) == 1 }
1045 $tree->predecessorless_vertices;
1046 $tree->delete_vertices( @redundant_root );
1047 $ctr++;
1048 } while( @redundant_root );
1049}
1050
1051sub _prune_subtree_old {
231d71fc 1052 my( $tree, $root, $contighash ) = @_;
1053 # First, delete hypothetical leaves / orphans until there are none left.
1054 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
1055 $tree->successorless_vertices;
1056 while( @orphan_hypotheticals ) {
1057 $tree->delete_vertices( @orphan_hypotheticals );
1058 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
1059 $tree->successorless_vertices;
1060 }
1061 # Then delete a hypothetical root with only one successor, moving the
bebec0e9 1062 # root to the first child that has no other predecessors.
231d71fc 1063 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
1064 my @nextroot = $tree->successors( $root );
1065 $tree->delete_vertex( $root );
bebec0e9 1066 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
231d71fc 1067 }
1068 # The tree has been modified in place, but we need to know the new root.
bebec0e9 1069 $root = undef unless $root && $tree->has_vertex( $root );
231d71fc 1070 return $root;
1071}
d71100ed 1072# Add the variant, subject to a.c. representation logic.
1073# This assumes that we will see the 'main' version before the a.c. version.
1074sub add_variant_wit {
1075 my( $arr, $wit, $acstr ) = @_;
1076 my $skip;
1077 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
1078 my $real = $1;
1079 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
1080 }
1081 push( @$arr, $wit ) unless $skip;
1082}
1083
5be0cdeb 1084sub _useful_variant {
1085 my( $group_readings, $graph, $acstr ) = @_;
1086
1087 # TODO Decide what to do with AC witnesses
1088
1089 # Sort by group size and return
1090 my $is_useful = 0;
1091 my( @readings, @groups ); # The sorted groups for our answer.
1092 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
1093 keys %$group_readings ) {
1094 push( @readings, $rdg );
1095 push( @groups, $group_readings->{$rdg} );
1096 if( @{$group_readings->{$rdg}} > 1 ) {
1097 $is_useful++;
1098 } else {
1099 my( $wit ) = @{$group_readings->{$rdg}};
1100 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
1101 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
1102 }
1103 }
1104 if( $is_useful > 1 ) {
1105 return( \@readings, \@groups );
1106 } else {
1107 return( [], [] );
1108 }
1109}
1110
7f52eac8 1111=head2 wit_stringify( $groups )
1112
1113Takes an array of witness groupings and produces a string like
1114['A','B'] / ['C','D','E'] / ['F']
d71100ed 1115
7f52eac8 1116=cut
d71100ed 1117
1118sub wit_stringify {
1119 my $groups = shift;
1120 my @gst;
1121 # If we were passed an array of witnesses instead of an array of
1122 # groupings, then "group" the witnesses first.
1123 unless( ref( $groups->[0] ) ) {
1124 my $mkgrp = [ $groups ];
1125 $groups = $mkgrp;
1126 }
1127 foreach my $g ( @$groups ) {
1128 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
1129 }
1130 return join( ' / ', @gst );
1131}
7f52eac8 1132
bebec0e9 1133sub _symmdiff {
1134 my( $lista, $listb ) = @_;
7f52eac8 1135 my %union;
1136 my %scalars;
1137 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
1138 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
bebec0e9 1139 my @set = grep { $union{$_} == 1 } keys %union;
7f52eac8 1140 return map { $scalars{$_} } @set;
1141}
1142
11431;
1144
1145=head1 LICENSE
1146
1147This package is free software and is provided "as is" without express
1148or implied warranty. You can redistribute it and/or modify it under
1149the same terms as Perl itself.
1150
1151=head1 AUTHOR
1152
1153Tara L Andrews E<lt>aurum@cpan.orgE<gt>