9 use Text::Tradition::Analysis qw/ run_analysis /;
10 use Text::Tradition::Directory;
13 binmode STDOUT, ':utf8';
14 binmode STDERR, ':utf8';
16 my( $dsn, $dbuser, $dbpass );
17 my $filename = 'analysis.csv';
20 'u|user=s' => \$dbuser,
21 'p|pass=s' => \$dbpass,
22 'f|file=s' => \$filename
25 my %dbopts = ( dsn => $dsn );
26 if( $dbuser || $dbpass ) {
27 $dbopts{extra_args} = { user => $dbuser, password => $dbpass }
30 my $dir = Text::Tradition::Directory->new( %dbopts );
31 my $scope = $dir->new_scope();
32 my $lookfor = shift @ARGV || '';
35 say "Merging relationship types @ARGV";
36 map { $collapse{$_} = 1 } @ARGV;
39 ## Set up the relationship types we will exclude in turn. False means "run
40 ## analysis with basic set of exclusions", i.e. orth/spelling/punct, and exclude
41 ## the variants in question later. True means "explicitly exclude this type too
43 my %relation_types = (
53 # Set up the things we want to calculate for each text
54 my @calcs = qw/ total genealogical excoincidental reverted exgenealogical coincidental /;
55 my @resultfields = ( 'text_name' );
56 foreach my $rt ( keys %relation_types ) {
57 foreach my $cc ( @calcs ) {
58 push( @resultfields, sprintf( "%s_ex_%s", $cc, $rt ) );
62 my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
63 open my $fh, ">:encoding(UTF-8)", $filename or die "$filename: $!";
64 if( $csv->combine( @resultfields ) ) {
67 say "combine() failed on argument: " . $csv->error_input;
70 foreach my $tinfo( $dir->traditionlist ) {
71 next if $tinfo->{'name'} eq 'xxxxx';
72 next if $tinfo->{'name'} =~ /158/;
73 next if $tinfo->{'name'} =~ /Heinrichi part/;
75 next unless $tinfo->{'id'} eq $lookfor
76 || $tinfo->{'name'} =~ /$lookfor/;
78 my $tradition = $dir->lookup( $tinfo->{'id'} );
79 next unless $tradition->stemma_count;
80 say "Analyzing tradition " . $tradition->name;
83 if( $tradition->name =~ /Chronicle/ ) {
84 $MAXRANK = $tradition->collation->reading('L1545')->rank;
87 # Initialize everything with zeroes
88 map { $datahash{$_} = 0 } @resultfields;
89 # Put in the real text ID and name
90 $datahash{text_id} = $tinfo->{'id'};
91 $datahash{text_name} = $tradition->name;
93 # Run the analysis for each row in @rows
94 my $vanilla; # Store the run with no extra exclusions
96 foreach my $rtype ( keys %relation_types ) {
97 say "...calculating on exclusion of $rtype";
98 if( $relation_types{$rtype} ) {
99 $result = run_exclude( $tradition, $rtype );
100 } elsif( !$vanilla ) {
101 $result = run_exclude( $tradition );
107 # Get the totals by location and by variant as we go.
108 my $totalvariant = 0;
111 my $conflictvariant = 0;
112 my $revertvariant = 0;
113 my $msgd; # for the HACK
115 foreach my $loc ( @{$result->{variants}} ) {
116 # A transition is the relationship type between parent and child.
117 # Find each genealogical transition
118 # Find each non-genealogical transition
119 if( exists $loc->{unsolved} ) {
120 # Not solved; remove it from the total.
121 say "Skipping unsolved location at " . $loc->{id};
123 } elsif( $MAXRANK && $loc->{id} > $MAXRANK ) {
124 # HACK until Chronicle tagging is done
125 say "Skipping ranks above $MAXRANK"
130 foreach my $rdghash( @{$loc->{readings}} ) {
131 # Weed out singletons
133 my @roots = @{$rdghash->{independent_occurrence}};
134 if( @roots == 1 && !$rdghash->{'followed'} && !$rdghash->{'not_followed'}
135 && !$rdghash->{'follow_unknown'} ) {
140 if( $rdghash->{'is_conflict'} ) {
142 } elsif( $rdghash->{'is_reverted'} ) {
144 } elsif( @roots == 1 ) {
145 $type = 'genealogical';
147 warn 'Reading ' . $rdghash->{readingid} . ' neither conflict, genealogical, nor reverted. What?';
150 # Get the relationship type stats for reading parents.
151 my $rdg = $tradition->collation->reading( $rdghash->{readingid} );
153 my $phash = $type eq 'reverted'
154 ? $rdghash->{'reversion_parents'} : $rdghash->{'source_parents'};
155 my $rel = _get_reading_relations( $rdghash->{'readingid'}, $loc->{'id'},
156 $rdg, $type, $phash, \%datahash, \@unknown );
157 # If this is one of our exclusions, take it out of the total.
158 if( $rel eq $rtype ) {
162 # Otherwise add the variant type to our count.
163 $conflictvariant++ if $type eq 'conflict';
164 $revertvariant++ if $type eq 'reverted';
165 $genvariant++ if $type eq 'genealogical';
169 # Add in the sums for the whole location
170 $datahash{"total_$rtype"} = $totalvariant - $singleton;
171 $datahash{"genealogical_ex_$rtype"} = $genvariant;
172 $datahash{"reverted_ex_$rtype"} = $revertvariant;
173 $datahash{"coincidental_ex_$rtype"} = $conflictvariant;
174 $datahash{"excoincidental_ex_$rtype"} = $genvariant + $revertvariant;
175 $datahash{"exgenealogical_ex_type"} = $conflictvariant + $revertvariant;
178 # Write them out to CSV.
179 my @csvalues = map { $datahash{$_} } @resultfields;
180 if( $csv->combine( @csvalues ) ) {
181 say $fh $csv->string;
183 say "combine() failed on argument: " . $csv->error_input;
189 sub _get_reading_relations {
190 my( $rid, $rank, $robj, $type, $parenthash, $datahash, $unknown ) = @_;
191 my @kp = keys ( %$parenthash );
193 _increment_typekey( $datahash, $type, 'source' );
197 $datahash->{multiparent} = @kp - 1;
199 foreach my $p ( @kp ) {
200 my $pdata = $parenthash->{$p};
202 if( $pdata->{relation} ) {
203 $relation = $pdata->{relation}->{transposed}
204 ? 'transposition' : $pdata->{relation}->{type};
206 $relation = 'unknown';
208 say "Unknown relation on missing reading object $rid at rank $rank";
210 say "Unknown relation on missing parent object for $rid at rank $rank";
212 push( @$unknown, [ $pdata->{label}, $robj->id, $robj->text, $type ] );
220 my( $tradition, $type ) = @_;
221 my $merge = [ qw/ orthographic spelling punctuation / ];
222 if( $type && $relation_types{$type} ) {
223 push( @$merge, $type );
228 $result = run_analysis( $tradition, exclude_type1 => 1,
229 merge_types => $merge );
231 say "Analysis run failed on tradition " . $tradition->name . ": @_";