working fuller analysis plus tests
[scpubgit/stemmatology.git] / lib / Text / Tradition / Analysis.pm
CommitLineData
d71100ed 1package Text::Tradition::Analysis;
2
3use strict;
4use warnings;
e4386ba9 5use Benchmark;
d1348d38 6use Exporter 'import';
d71100ed 7use Text::Tradition;
8use Text::Tradition::Stemma;
9
d1348d38 10use vars qw/ @EXPORT_OK /;
a2cf85dd 11@EXPORT_OK = qw/ run_analysis group_variants analyze_variant_location wit_stringify /;
d1348d38 12
7f52eac8 13=head1 NAME
14
15Text::Tradition::Analysis - functions for stemma analysis of a tradition
16
17=head1 SYNOPSIS
18
19 use Text::Tradition;
20 use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
21 my $t = Text::Tradition->new(
22 'name' => 'this is a text',
23 'input' => 'TEI',
24 'file' => '/path/to/tei_parallel_seg_file.xml' );
25 $t->add_stemma( 'dotfile' => $stemmafile );
26
27 my $variant_data = run_analysis( $tradition );
28 # Recalculate rank $n treating all orthographic variants as equivalent
29 my $reanalyze = analyze_variant_location( $tradition, $n, 0, 'orthographic' );
30
31=head1 DESCRIPTION
32
33Text::Tradition is a library for representation and analysis of collated
34texts, particularly medieval ones. The Collation is the central feature of
35a Tradition, where the text, its sequence of readings, and its relationships
36between readings are actually kept.
37
38=head1 SUBROUTINES
39
40=head2 run_analysis( $tradition, $stemma_id, @merge_relationship_types )
41
42Runs the analysis described in analyze_variant_location on every location
43in the collation of the given tradition, against the stemma specified in
44$stemma_id. If $stemma_id is not specified, it defaults to 0 (referencing
45the first stemma saved for the tradition.)
46
47The optional @merge_relationship_types contains a list of relationship types
48to treat as equivalent for the analysis.
49
50=begin testing
51
52use Text::Tradition;
53use Text::Tradition::Analysis qw/ run_analysis analyze_variant_location /;
54
55my $datafile = 't/data/florilegium_tei_ps.xml';
56my $tradition = Text::Tradition->new( 'input' => 'TEI',
57 'name' => 'test0',
58 'file' => $datafile );
59my $s = $tradition->add_stemma( 'dotfile' => 't/data/florilegium.dot' );
60is( ref( $s ), 'Text::Tradition::Stemma', "Added stemma to tradition" );
61
f00cefe8 62my %expected_genealogical = (
63 1 => '',
64 2 => 1,
65 3 => '',
66 5 => '',
67 7 => '',
68 8 => '',
69 10 => '',
70 13 => 1,
71 33 => '',
72 34 => '',
73 37 => '',
74 60 => '',
75 81 => 1,
76 84 => '',
77 87 => '',
78 101 => '',
79 102 => '',
80 122 => 1,
81 157 => '',
82 166 => 1,
83 169 => 1,
84 200 => 1,
85 216 => 1,
86 217 => 1,
87 219 => 1,
88 241 => 1,
89 242 => 1,
90 243 => 1,
91);
92
7f52eac8 93my $data = run_analysis( $tradition );
f00cefe8 94foreach my $row ( @{$data->{'variants'}} ) {
95 is( $row->{'genealogical'}, $expected_genealogical{$row->{'id'}},
96 "Got correct genealogical flag for row " . $row->{'id'} );
97}
5be0cdeb 98is( $data->{'conflict_count'}, 16, "Got right conflict count" );
99is( $data->{'variant_count'}, 28, "Got right total variant number" );
7f52eac8 100
101=end testing
102
103=cut
104
d71100ed 105sub run_analysis {
7f52eac8 106 my( $tradition, $stemma_id, @collapse ) = @_;
f00cefe8 107 my $c = $tradition->collation;
7f52eac8 108 $stemma_id = 0 unless $stemma_id;
56cf65bd 109
7f52eac8 110 # Run the variant analysis on every rank in the graph that doesn't
111 # have a common reading. Return the results.
112 my @variants; # holds results from analyze_variant_location
113 my $genealogical; # counter of 'genealogical' variants
114 my $conflicts; # counter of conflicting readings
d71100ed 115
7f52eac8 116 # Find and mark 'common' ranks for exclusion.
117 my %common_rank;
118 foreach my $rdg ( $tradition->collation->common_readings ) {
119 $common_rank{$rdg->rank} = 1;
d71100ed 120 }
7f52eac8 121
122 foreach my $rank ( 1 .. $tradition->collation->end->rank-1 ) {
123 next if $common_rank{$rank};
124 my $variant_row = analyze_variant_location(
125 $tradition, $rank, $stemma_id, @collapse );
5be0cdeb 126 next unless $variant_row;
f00cefe8 127 # Add the reading text to the readings in variant_row
128 foreach my $rh ( @{$variant_row->{'readings'}} ) {
129 if( $c->reading( $rh->{'readingid'} ) ) {
130 $rh->{'text'} = $c->reading( $rh->{'readingid'} )->text;
131 } else {
132 $rh->{'text'} = $rh->{'readingid'};
133 }
134 }
7f52eac8 135 push( @variants, $variant_row );
136 $genealogical++ if $variant_row->{'genealogical'};
137 $conflicts += grep { $_->{'conflict'} } @{$variant_row->{'readings'}};
d71100ed 138 }
139
f00cefe8 140
7f52eac8 141 return {
142 'variants' => \@variants,
143 'variant_count' => scalar @variants, # TODO redundant
144 'conflict_count' => $conflicts,
145 'genealogical_count' => $genealogical,
146 };
d71100ed 147}
148
7f52eac8 149=head2 group_variants( $tradition, $rank, $lacunose, @merge_relationship_types )
150
151Groups the variants at the given $rank of the collation, treating any
152relationships in @merge_relationship_types as equivalent. $lacunose should
153be a reference to an array, to which the sigla of lacunose witnesses at this
154rank will be appended.
155
156Returns two ordered lists $readings, $groups, where $readings->[$n] is attested
157by the witnesses listed in $groups->[$n].
158
159=cut
160
161# Return group_readings, groups, lacunose
d1348d38 162sub group_variants {
7f52eac8 163 my( $tradition, $rank, $lacunose, $collapse ) = @_;
164 my $c = $tradition->collation;
165 # Get the alignment table readings
166 my %readings_at_rank;
167 my @gap_wits;
168 foreach my $tablewit ( @{$tradition->collation->alignment_table->{'alignment'}} ) {
169 my $rdg = $tablewit->{'tokens'}->[$rank-1];
170 if( $rdg && $rdg->{'t'}->is_lacuna ) {
5be0cdeb 171 _add_to_witlist( $tablewit->{'witness'}, $lacunose,
172 $tradition->collation->ac_label );
7f52eac8 173 } elsif( $rdg ) {
174 $readings_at_rank{$rdg->{'t'}->text} = $rdg->{'t'};
175 } else {
5be0cdeb 176 _add_to_witlist( $tablewit->{'witness'}, \@gap_wits,
177 $tradition->collation->ac_label );
7f52eac8 178 }
179 }
d1348d38 180
7f52eac8 181 # Group the readings, collapsing groups by relationship if needed
182 my %grouped_readings;
183 foreach my $rdg ( sort { $b->witnesses <=> $a->witnesses } values %readings_at_rank ) {
184 # Skip readings that have been collapsed into others.
f00cefe8 185 next if exists $grouped_readings{$rdg->id} && !$grouped_readings{$rdg->id};
7f52eac8 186 my @wits = $rdg->witnesses;
187 if( $collapse ) {
188 my $filter = sub { my $r = $_[0]; grep { $_ eq $r->type } @$collapse; };
189 foreach my $other ( $rdg->related_readings( $filter ) ) {
190 push( @wits, $other->witnesses );
f00cefe8 191 $grouped_readings{$other->id} = 0;
d1348d38 192 }
193 }
f00cefe8 194 $grouped_readings{$rdg->id} = \@wits;
7f52eac8 195 }
196 $grouped_readings{'(omitted)'} = \@gap_wits if @gap_wits;
197 # Get rid of our collapsed readings
198 map { delete $grouped_readings{$_} unless $grouped_readings{$_} }
199 keys %grouped_readings
200 if $collapse;
201
5be0cdeb 202 return \%grouped_readings;
d1348d38 203}
204
7f52eac8 205=head2 analyze_variant_location( $tradition, $rank, $stemma_id, @merge_relationship_types )
f6c8ea08 206
7f52eac8 207Runs an analysis of the given tradition, at the location given in $rank,
208against the graph of the stemma specified in $stemma_id. The argument
209@merge_relationship_types is an optional list of relationship types for
210which readings so related should be treated as equivalent.
f6c8ea08 211
7f52eac8 212Returns a data structure as follows:
213
214 { 'id' => $rank,
215 'genealogical' => boolean,
f00cefe8 216 'readings => [ { readingid => $reading_id,
7f52eac8 217 group => [ witnesses ],
218 conflict => [ conflicting ],
219 missing => [ excluded ] }, ... ]
220 }
221where 'conflicting' is the list of witnesses whose readings conflict with
222this group, and 'excluded' is the list of witnesses either not present in
223the stemma or lacunose at this location.
224
225=cut
732152b1 226
d71100ed 227sub analyze_variant_location {
7f52eac8 228 my( $tradition, $rank, $sid, @collapse ) = @_;
7f52eac8 229 # Get the readings in this tradition at this rank
230 my @rank_rdgs = grep { $_->rank == $rank } $tradition->collation->readings;
231 # Get the applicable stemma
232 my $undirected; # TODO Allow undirected distance tree analysis too
233 my $stemma = $tradition->stemma( $sid );
234 my $graph = $stemma->graph;
235 # Figure out which witnesses we are working with
bebec0e9 236 my @lacunose = $stemma->hypotheticals;
237 push( @lacunose, _symmdiff( [ $stemma->witnesses ],
238 [ map { $_->sigil } $tradition->witnesses ] ) );
7f52eac8 239
240 # Now group the readings
5be0cdeb 241 my( $readings, $groups ) = _useful_variant(
242 group_variants( $tradition, $rank, \@lacunose, \@collapse ),
243 $graph, $tradition->collation->ac_label );
244 return unless scalar @$readings;
7f52eac8 245 my $group_readings = {};
246 # Lookup table group string -> readings
247 foreach my $x ( 0 .. $#$groups ) {
248 $group_readings->{wit_stringify( $groups->[$x] )} = $readings->[$x];
249 }
250
251 # Now do the work.
231d71fc 252 my $contig = {};
253 my $subgraph = {};
c4a4fb1b 254 my $is_conflicted;
d71100ed 255 my $conflict = {};
bebec0e9 256 my %reading_roots;
7f52eac8 257 my $variant_row = { 'id' => $rank, 'readings' => [] };
94a077d6 258 # Mark each ms as in its own group, first.
f00cefe8 259 $DB::single = 1 if $rank == 81;
94a077d6 260 foreach my $g ( @$groups ) {
261 my $gst = wit_stringify( $g );
231d71fc 262 map { $contig->{$_} = $gst } @$g;
94a077d6 263 }
c4a4fb1b 264 # Now for each unmarked node in the graph, initialize an array
265 # for possible group memberships. We will use this later to
266 # resolve potential conflicts.
231d71fc 267 map { $contig->{$_} = [] unless $contig->{$_} } $graph->vertices;
d71100ed 268 foreach my $g ( sort { scalar @$b <=> scalar @$a } @$groups ) {
c4a4fb1b 269 my $gst = wit_stringify( $g ); # This is the group name
08e0fb85 270 # Copy the graph, and delete all non-members from the new graph.
c4a4fb1b 271 my $part = $graph->copy;
bebec0e9 272 my @group_roots;
c4a4fb1b 273 $part->delete_vertices(
231d71fc 274 grep { !ref( $contig->{$_} ) && $contig->{$_} ne $gst } $graph->vertices );
c4a4fb1b 275
276 # Now look to see if our group is connected.
277 if( $undirected ) { # For use with distance trees etc.
278 # Find all vertices reachable from the first (arbitrary) group
bebec0e9 279 # member. If we are genealogical this should include them all.
280 my $reachable = {};
c4a4fb1b 281 map { $reachable->{$_} = 1 } $part->all_reachable( $g->[0] );
282 # TODO This is a terrible way to do distance trees, since all
283 # non-leaf nodes are included in every graph part now. We may
284 # have to go back to SPDP.
285 } else {
286 if( @$g > 1 ) {
c4a4fb1b 287 # We have to take directionality into account.
288 # How many root nodes do we have?
231d71fc 289 my @roots = grep { ref( $contig->{$_} ) || $contig->{$_} eq $gst }
bebec0e9 290 $part->predecessorless_vertices;
c4a4fb1b 291 # Assuming that @$g > 1, find the first root node that has at
292 # least one successor belonging to our group. If this reading
293 # is genealogical, there should be only one, but we will check
294 # that implicitly later.
c4a4fb1b 295 foreach my $root ( @roots ) {
231d71fc 296 # Prune the tree to get rid of extraneous hypotheticals.
7f52eac8 297 $root = _prune_subtree( $part, $root, $contig );
bebec0e9 298 next unless $root;
299 # Save this root for our group.
300 push( @group_roots, $root );
c4a4fb1b 301 # Get all the successor nodes of our root.
c4a4fb1b 302 }
bebec0e9 303 } else {
304 # Dispense with the trivial case of one reading.
f00cefe8 305 my $wit = pop @$g;
306 @group_roots = ( $wit );
307 foreach my $v ( $part->vertices ) {
308 $part->delete_vertex( $v ) unless $v eq $wit;
309 }
bebec0e9 310 }
c4a4fb1b 311 }
312
bebec0e9 313 map { $reading_roots{$_} = 1 } @group_roots;
314 if( @group_roots > 1 ) {
315 $conflict->{$group_readings->{$gst}} = 1;
316 $is_conflicted = 1;
08e0fb85 317 }
bebec0e9 318 # Paint the 'hypotheticals' with our group.
319 foreach my $wit ( $part->vertices ) {
320 if( ref( $contig->{$wit} ) ) {
321 push( @{$contig->{$wit}}, $gst );
322 } elsif( $contig->{$wit} ne $gst ) {
323 warn "How did we get here?";
324 }
325 }
08e0fb85 326
c4a4fb1b 327
bebec0e9 328 # Start to write the reading, and save the group subgraph.
f00cefe8 329 my $reading = { 'readingid' => $group_readings->{$gst},
7f52eac8 330 'missing' => wit_stringify( \@lacunose ),
c4a4fb1b 331 'group' => $gst }; # This will change if we find no conflict
bebec0e9 332 # Save the relevant subgraph.
333 $subgraph->{$gst} = $part;
732152b1 334 push( @{$variant_row->{'readings'}}, $reading );
d71100ed 335 }
c4a4fb1b 336
bebec0e9 337 # For each of our hypothetical readings, flatten its 'contig' array if
338 # the array contains zero or one group. If we have any unflattened arrays,
339 # we may need to run the resolution process. If the reading is already known
340 # to have a conflict, flatten the 'contig' array to nothing; we won't resolve
341 # it.
342 my @resolve;
343 foreach my $wit ( keys %$contig ) {
344 next unless ref( $contig->{$wit} );
345 if( @{$contig->{$wit}} > 1 ) {
346 if( $is_conflicted ) {
347 $contig->{$wit} = ''; # We aren't going to decide.
348 } else {
349 push( @resolve, $wit );
350 }
351 } else {
352 my $gst = pop @{$contig->{$wit}};
353 $contig->{$wit} = $gst || '';
354 }
355 }
356
357 if( @resolve ) {
231d71fc 358 my $still_contig = {};
c4a4fb1b 359 foreach my $h ( @resolve ) {
360 # For each of the hypothetical readings with more than one possibility,
361 # try deleting it from each of its member subgraphs in turn, and see
362 # if that breaks the contiguous grouping.
363 # TODO This can still break in a corner case where group A can use
364 # either vertex 1 or 2, and group B can use either vertex 2 or 1.
365 # Revisit this if necessary; it could get brute-force nasty.
231d71fc 366 foreach my $gst ( @{$contig->{$h}} ) {
bebec0e9 367 my $gpart = $subgraph->{$gst}->copy();
368 # If we have come this far, there is only one root and everything
369 # is reachable from it.
370 my( $root ) = $gpart->predecessorless_vertices;
371 my $reachable = {};
372 map { $reachable->{$_} = 1 } $gpart->vertices;
373
374 # Try deleting the hypothetical node.
c4a4fb1b 375 $gpart->delete_vertex( $h );
bebec0e9 376 if( $h eq $root ) {
377 # See if we still have a single root.
378 my @roots = $gpart->predecessorless_vertices;
379 warn "This shouldn't have happened" unless @roots;
380 if( @roots > 1 ) {
381 # $h is needed by this group.
382 if( exists( $still_contig->{$h} ) ) {
383 # Conflict!
384 $conflict->{$group_readings->{$gst}} = 1;
385 $still_contig->{$h} = '';
386 } else {
387 $still_contig->{$h} = $gst;
388 }
389 }
390 } else {
391 # $h is somewhere in the middle. See if everything
392 # else can still be reached from the root.
393 my %still_reachable = ( $root => 1 );
394 map { $still_reachable{$_} = 1 }
395 $gpart->all_successors( $root );
396 foreach my $v ( keys %$reachable ) {
397 next if $v eq $h;
398 if( !$still_reachable{$v}
399 && ( $contig->{$v} eq $gst
400 || ( exists $still_contig->{$v}
401 && $still_contig->{$v} eq $gst ) ) ) {
402 # We need $h.
403 if( exists $still_contig->{$h} ) {
404 # Conflict!
405 $conflict->{$group_readings->{$gst}} = 1;
406 $still_contig->{$h} = '';
407 } else {
408 $still_contig->{$h} = $gst;
409 }
410 last;
411 } # else we don't need $h in this group.
412 } # end foreach $v
413 } # endif $h eq $root
414 } # end foreach $gst
415 } # end foreach $h
c4a4fb1b 416
bebec0e9 417 # Now we have some hypothetical vertices in $still_contig that are the
418 # "real" group memberships. Replace these in $contig.
419 foreach my $v ( keys %$contig ) {
420 next unless ref $contig->{$v};
421 $contig->{$v} = $still_contig->{$v};
422 }
423 } # end if @resolve
424
425 # Now that we have all the node group memberships, calculate followed/
426 # non-followed/unknown values for each reading. Also figure out the
427 # reading's evident parent(s).
428 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
429 my $gst = $rdghash->{'group'};
430 my $part = $subgraph->{$gst};
431 my @roots = $part->predecessorless_vertices;
432 $rdghash->{'independent_occurrence'} = scalar @roots;
433 $rdghash->{'followed'} = scalar( $part->vertices ) - scalar( @roots );
434 # Find the parent readings, if any, of this reading.
f00cefe8 435 my %rdgparents;
bebec0e9 436 foreach my $wit ( @roots ) {
f00cefe8 437 # Look in the main stemma to find this witness's extant or known-reading
438 # immediate ancestor(s), and look up the reading that each ancestor olds.
439 my @check = $graph->predecessors( $wit );
440 while( @check ) {
441 my @next;
442 foreach my $wparent( @check ) {
443 my $pgroup = $contig->{$wparent};
444 if( $pgroup ) {
445 $rdgparents{$group_readings->{$pgroup}} = 1;
446 } else {
447 push( @next, $graph->predecessors( $wparent ) );
448 }
449 }
450 @check = @next;
451 }
bebec0e9 452 }
f00cefe8 453 $rdghash->{'reading_parents'} = [ keys %rdgparents ];
bebec0e9 454
455 # Find the number of times this reading was altered, and the number of
456 # times we're not sure.
457 my( %nofollow, %unknownfollow );
458 foreach my $wit ( $part->vertices ) {
459 foreach my $wchild ( $graph->successors( $wit ) ) {
460 next if $part->has_vertex( $wchild );
461 if( $reading_roots{$wchild} && $contig->{$wchild} ) {
462 # It definitely changed here.
463 $nofollow{$wchild} = 1;
464 } elsif( !($contig->{$wchild}) ) {
465 # The child is a hypothetical node not definitely in
466 # any group. Answer is unknown.
467 $unknownfollow{$wchild} = 1;
468 } # else it's a non-root node in a known group, and therefore
469 # is presumed to have its reading from its group, not this link.
470 }
471 }
472 $rdghash->{'not_followed'} = keys %nofollow;
473 $rdghash->{'follow_unknown'} = keys %unknownfollow;
c4a4fb1b 474 }
bebec0e9 475
c4a4fb1b 476 # Now write the group and conflict information into the respective rows.
bebec0e9 477 foreach my $rdghash ( @{$variant_row->{'readings'}} ) {
f00cefe8 478 $rdghash->{'conflict'} = $conflict->{$rdghash->{'readingid'}};
bebec0e9 479 my @members = grep { $contig->{$_} eq $rdghash->{'group'} } keys %$contig;
480 $rdghash->{'group'} = wit_stringify( \@members );
c4a4fb1b 481 }
482
08e0fb85 483 $variant_row->{'genealogical'} = !( keys %$conflict );
732152b1 484 return $variant_row;
d71100ed 485}
486
7f52eac8 487sub _prune_subtree {
231d71fc 488 my( $tree, $root, $contighash ) = @_;
489 # First, delete hypothetical leaves / orphans until there are none left.
490 my @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
491 $tree->successorless_vertices;
492 while( @orphan_hypotheticals ) {
493 $tree->delete_vertices( @orphan_hypotheticals );
494 @orphan_hypotheticals = grep { ref( $contighash->{$_} ) }
495 $tree->successorless_vertices;
496 }
497 # Then delete a hypothetical root with only one successor, moving the
bebec0e9 498 # root to the first child that has no other predecessors.
231d71fc 499 while( $tree->successors( $root ) == 1 && ref $contighash->{$root} ) {
500 my @nextroot = $tree->successors( $root );
501 $tree->delete_vertex( $root );
bebec0e9 502 ( $root ) = grep { $tree->is_predecessorless_vertex( $_ ) } @nextroot;
231d71fc 503 }
504 # The tree has been modified in place, but we need to know the new root.
bebec0e9 505 $root = undef unless $root && $tree->has_vertex( $root );
231d71fc 506 return $root;
507}
d71100ed 508# Add the variant, subject to a.c. representation logic.
509# This assumes that we will see the 'main' version before the a.c. version.
510sub add_variant_wit {
511 my( $arr, $wit, $acstr ) = @_;
512 my $skip;
513 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
514 my $real = $1;
515 $skip = grep { $_ =~ /^\Q$real\E$/ } @$arr;
516 }
517 push( @$arr, $wit ) unless $skip;
518}
519
5be0cdeb 520sub _useful_variant {
521 my( $group_readings, $graph, $acstr ) = @_;
522
523 # TODO Decide what to do with AC witnesses
524
525 # Sort by group size and return
526 my $is_useful = 0;
527 my( @readings, @groups ); # The sorted groups for our answer.
528 foreach my $rdg ( sort { @{$group_readings->{$b}} <=> @{$group_readings->{$a}} }
529 keys %$group_readings ) {
530 push( @readings, $rdg );
531 push( @groups, $group_readings->{$rdg} );
532 if( @{$group_readings->{$rdg}} > 1 ) {
533 $is_useful++;
534 } else {
535 my( $wit ) = @{$group_readings->{$rdg}};
536 $wit =~ s/^(.*)\Q$acstr\E$/$1/;
537 $is_useful++ unless( $graph->is_sink_vertex( $wit ) );
538 }
539 }
540 if( $is_useful > 1 ) {
541 return( \@readings, \@groups );
542 } else {
543 return( [], [] );
544 }
545}
546
7f52eac8 547=head2 wit_stringify( $groups )
548
549Takes an array of witness groupings and produces a string like
550['A','B'] / ['C','D','E'] / ['F']
d71100ed 551
7f52eac8 552=cut
d71100ed 553
554sub wit_stringify {
555 my $groups = shift;
556 my @gst;
557 # If we were passed an array of witnesses instead of an array of
558 # groupings, then "group" the witnesses first.
559 unless( ref( $groups->[0] ) ) {
560 my $mkgrp = [ $groups ];
561 $groups = $mkgrp;
562 }
563 foreach my $g ( @$groups ) {
564 push( @gst, '[' . join( ',', map { "'$_'" } @$g ) . ']' );
565 }
566 return join( ' / ', @gst );
567}
7f52eac8 568
5be0cdeb 569# Helper function to ensure that X and X a.c. never appear in the same list.
570sub _add_to_witlist {
571 my( $wit, $list, $acstr ) = @_;
572 my %inlist;
573 my $idx = 0;
574 map { $inlist{$_} = $idx++ } @$list;
575 if( $wit =~ /^(.*)\Q$acstr\E$/ ) {
576 my $acwit = $1;
577 unless( exists $inlist{$acwit} ) {
578 push( @$list, $acwit.$acstr );
579 }
580 } else {
581 if( exists( $inlist{$wit.$acstr} ) ) {
582 # Replace the a.c. version with the main witness
583 my $i = $inlist{$wit.$acstr};
584 $list->[$i] = $wit;
585 } else {
586 push( @$list, $wit );
587 }
588 }
589}
590
bebec0e9 591sub _symmdiff {
592 my( $lista, $listb ) = @_;
7f52eac8 593 my %union;
594 my %scalars;
595 map { $union{$_} = 1; $scalars{$_} = $_ } @$lista;
596 map { $union{$_} += 1; $scalars{$_} = $_ } @$listb;
bebec0e9 597 my @set = grep { $union{$_} == 1 } keys %union;
7f52eac8 598 return map { $scalars{$_} } @set;
599}
600
6011;
602
603=head1 LICENSE
604
605This package is free software and is provided "as is" without express
606or implied warranty. You can redistribute it and/or modify it under
607the same terms as Perl itself.
608
609=head1 AUTHOR
610
611Tara L Andrews E<lt>aurum@cpan.orgE<gt>