binmode STDERR, ':utf8';
my( $dsn, $dbuser, $dbpass );
-my $filename = 'analysis.csv';
+my $filename = 'exclusions.csv';
GetOptions(
'dsn=s' => \$dsn,
'u|user=s' => \$dbuser,
## analysis with basic set of exclusions", i.e. orth/spelling/punct, and exclude
## the variants in question later. True means "explicitly exclude this type too
## at analysis time."
-my %relation_types = (
- sameword => undef,
- grammatical => 1,
- lexical => 1,
- uncertain => 1,
- other => 1,
- addition => undef,
- deletion => undef
-);
+my @relation_types = qw/ sameword grammatical lexical uncertain other
+ addition deletion transposition /;
# Set up the things we want to calculate for each text
my @calcs = qw/ total genealogical excoincidental reverted exgenealogical coincidental /;
my @resultfields = ( 'text_name' );
-foreach my $rt ( keys %relation_types ) {
+foreach my $rt ( @relation_types ) {
foreach my $cc ( @calcs ) {
push( @resultfields, sprintf( "%s_ex_%s", $cc, $rt ) );
}
$datahash{text_id} = $tinfo->{'id'};
$datahash{text_name} = $tradition->name;
- # Run the analysis for each row in @rows
- my $vanilla; # Store the run with no extra exclusions
my $result;
- foreach my $rtype ( keys %relation_types ) {
+ try {
+ $result = run_analysis( $tradition, exclude_type1 => 1,
+ merge_types => [ qw/ orthographic spelling punctuation / ] );
+ } catch {
+ say "Analysis run failed on tradition " . $tradition->name . ": @_";
+ return;
+ }
+ foreach my $rtype ( @relation_types ) {
say "...calculating on exclusion of $rtype";
- if( $relation_types{$rtype} ) {
- $result = run_exclude( $tradition, $rtype );
- } elsif( !$vanilla ) {
- $result = run_exclude( $tradition );
- $vanilla = $result;
- } else {
- $result = $vanilla;
- }
# Get the totals by location and by variant as we go.
my $totalvariant = 0;
my $phash = $type eq 'reverted'
? $rdghash->{'reversion_parents'} : $rdghash->{'source_parents'};
- my $rel = _get_reading_relations( $rdghash->{'readingid'}, $loc->{'id'},
+ my @rels = _get_reading_relations( $rdghash->{'readingid'}, $loc->{'id'},
$rdg, $type, $phash, \%datahash, \@unknown );
# If this is one of our exclusions, take it out of the total.
- if( $rel eq $rtype ) {
- $totalvariant--;
- next;
- }
+ foreach my $rel ( @rels ) {
+ $DB::single = 1 unless $rel;
+ if( $rel eq $rtype ) {
+ $totalvariant--;
+ next;
+ } else {
# Otherwise add the variant type to our count.
- $conflictvariant++ if $type eq 'conflict';
- $revertvariant++ if $type eq 'reverted';
- $genvariant++ if $type eq 'genealogical';
+ $conflictvariant++ if $type eq 'conflict';
+ $revertvariant++ if $type eq 'reverted';
+ $genvariant++ if $type eq 'genealogical';
+ }
+ }
}
}
# Add in the sums for the whole location
- $datahash{"total_$rtype"} = $totalvariant - $singleton;
+ $datahash{"total_ex_$rtype"} = $totalvariant - $singleton;
$datahash{"genealogical_ex_$rtype"} = $genvariant;
$datahash{"reverted_ex_$rtype"} = $revertvariant;
$datahash{"coincidental_ex_$rtype"} = $conflictvariant;
$datahash{"excoincidental_ex_$rtype"} = $genvariant + $revertvariant;
- $datahash{"exgenealogical_ex_type"} = $conflictvariant + $revertvariant;
+ $datahash{"exgenealogical_ex_$rtype"} = $conflictvariant + $revertvariant;
}
# Write them out to CSV.
sub _get_reading_relations {
my( $rid, $rank, $robj, $type, $parenthash, $datahash, $unknown ) = @_;
my @kp = keys ( %$parenthash );
- unless( @kp ) {
- _increment_typekey( $datahash, $type, 'source' );
- return;
- }
- if( @kp > 1 ) {
- $datahash->{multiparent} = @kp - 1;
- }
+ my @rels;
foreach my $p ( @kp ) {
my $pdata = $parenthash->{$p};
my $relation;
push( @$unknown, [ $pdata->{label}, $robj->id, $robj->text, $type ] );
}
}
- return $relation;
+ push( @rels, $relation );
}
+ return @rels;
}
-sub run_exclude {
- my( $tradition, $type ) = @_;
- my $merge = [ qw/ orthographic spelling punctuation / ];
- if( $type && $relation_types{$type} ) {
- push( @$merge, $type );
- }
-
- my $result;
- try {
- $result = run_analysis( $tradition, exclude_type1 => 1,
- merge_types => $merge );
- } catch {
- say "Analysis run failed on tradition " . $tradition->name . ": @_";
- return;
- }
- return $result;
-}