fix some subgraph rendering issues
[scpubgit/stemmatology.git] / script / statistics.pl
CommitLineData
1d73ecad 1#!/usr/bin/env perl
2
3use strict;
4use warnings;
5use lib 'lib';
6use feature 'say';
7use Text::Tradition::Directory;
8use Text::Tradition::Analysis qw/ run_analysis /;
9
10binmode STDOUT, ':utf8';
11binmode STDERR, ':utf8';
12eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
13
14my $args;
15my $db = 'SQLite';
16if( $ARGV[0] && $ARGV[0] eq 'mysql' ) {
17 $db = shift @ARGV;
18}
19if( $db eq 'mysql' ) {
20 $args = { 'dsn' => 'dbi:mysql:dbname=stemmaweb',
21 'extra_args' => { 'user' => 'stemmaweb', 'password' => 'l@chmann' } };
22} else {
23 $args = { 'dsn' => 'dbi:SQLite:dbname=stemmaweb/db/traditions.db' };
24}
25# the rest of @ARGV is tradition names
26
27my $dir = Text::Tradition::Directory->new( $args );
28my @traditions;
29my @tlist = $dir->traditionlist;
30if( @ARGV ) {
31 # Get only the traditions named.
32 foreach my $tid ( @tlist ) {
33 push( @traditions, $tid->{'id'} )
34 if grep { $tid->{'name'} =~ /\Q$_\E/ } @ARGV;
35 }
36} else {
37 @traditions = map { $_->{'id'} } @tlist;
38}
39
40# Initialize our stats collection
41my $omit = '(omitted)';
42
43# Run the analysis of each tradition
44# Look through the results
45foreach my $tid ( @traditions ) {
46 my $scope = $dir->new_scope();
47 my $tradition = $dir->lookup( $tid );
48 printf( "\n**** TRADITION %s ****\n", $tradition->name );
49 my $c = $tradition->collation;
50 my $results = run_analysis( $tradition );
51 my %stats;
52 my %rels_found;
53 foreach my $row ( @{$results->{'variants'}} ) {
54 # say sprintf( "=== Looking at rank %d (%s) ===", $row->{'id'},
55 # $row->{'genealogical'} ? 'genealogical' : 'not genealogical' );
56 my $rdgdir = {};
57 map { $rdgdir->{$_->{'readingid'}} = $_ } @{$row->{'readings'}};
58 # Look for reading parents and the relationships between them.
59 my %seen_rel;
60 foreach my $rdg ( keys %$rdgdir ) {
61 my $rhash = $rdgdir->{$rdg};
62 my $parents = $rhash->{'reading_parents'};
63 if( $parents && @$parents ) {
64 say sprintf( " - reading %s ( %s ) has %d possible origins",
65 $rdg, $rhash->{'text'}, scalar @$parents )
66 unless @$parents == 1;
67 foreach my $p ( @$parents ) {
68 # Is there a relationship here?
69 my $rel = $c->get_relationship( $rdg, $p );
70 my $type;
71 if( $rel ) {
72 # Yes there is - get its type and figure stuff out.
73 $type = $rel->type;
74 } elsif( $rdg eq $omit ) {
75 $type = 'deletion';
76 } elsif( $p eq $omit ) {
77 $type = 'addition';
78 } # TODO need to manage transposition
79 if( $type ) {
80 # Note that there was an instability of this type
81 $seen_rel{$type} = 1;
82 $stats{$type} = {} unless exists $stats{$type};
83 # Calculate, in this spot, how often the form shifted
84 # vs. how often it stayed the same.
85 # Add the number of times this form appeared
86 add_to_hash( $stats{$type}, 'shifts',
87 $rhash->{'independent_occurrence'} );
88 # Add the number of times this form was followed
89 add_to_hash( $stats{$type}, 'follows', $rhash->{'followed'} );
90 # TODO work out whether not_followed gets included after iteration
91 }
92 } # foreach parent
93
94 } # if parents
95 foreach my $k ( keys %seen_rel ) {
96 add_to_hash( \%rels_found, $k, 1 );
97 }
98 } # foreach rdg
99 }
100 # Print out the stats
101 # First see how stable the text was
102 my $total = $c->end->rank - 1;
103 say sprintf( "Total locations %d, total variant locations %d",
104 $total, $results->{'variant_count'} );
105 say $results->{'genealogical_count'}
106 . " variant locations entirely followed the stemma";
107 say $results->{'conflict_count'}
108 . " variant readings conflicted with the stemma";
109 foreach my $k ( keys %rels_found ) {
110 my $shifts = $rels_found{$k};
111 say "Had $shifts total $k-type shifts in $total locations";
112 }
113 foreach my $k ( keys %stats ) {
114 say "\tType $k:";
115 say sprintf( "\tUnstable readings shifted %d times, and were followed %d times",
116 $stats{$k}->{'shifts'}, $stats{$k}->{'follows'} );
117 }
118}
119
120sub add_to_hash {
121 my( $hash, $key, $num ) = @_;
122 unless( exists $hash->{$key} ) {
123 $hash->{$key} = 0;
124 }
125 $hash->{$key} += $num;
126}