Commit | Line | Data |
1d73ecad |
1 | #!/usr/bin/env perl |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use lib 'lib'; |
6 | use feature 'say'; |
7 | use Text::Tradition::Directory; |
8 | use Text::Tradition::Analysis qw/ run_analysis /; |
9 | |
10 | binmode STDOUT, ':utf8'; |
11 | binmode STDERR, ':utf8'; |
12 | eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 }; |
13 | |
14 | my $args; |
15 | my $db = 'SQLite'; |
16 | if( $ARGV[0] && $ARGV[0] eq 'mysql' ) { |
17 | $db = shift @ARGV; |
18 | } |
19 | if( $db eq 'mysql' ) { |
20 | $args = { 'dsn' => 'dbi:mysql:dbname=stemmaweb', |
21 | 'extra_args' => { 'user' => 'stemmaweb', 'password' => 'l@chmann' } }; |
22 | } else { |
23 | $args = { 'dsn' => 'dbi:SQLite:dbname=stemmaweb/db/traditions.db' }; |
24 | } |
25 | # the rest of @ARGV is tradition names |
26 | |
27 | my $dir = Text::Tradition::Directory->new( $args ); |
28 | my @traditions; |
29 | my @tlist = $dir->traditionlist; |
30 | if( @ARGV ) { |
31 | # Get only the traditions named. |
32 | foreach my $tid ( @tlist ) { |
33 | push( @traditions, $tid->{'id'} ) |
34 | if grep { $tid->{'name'} =~ /\Q$_\E/ } @ARGV; |
35 | } |
36 | } else { |
37 | @traditions = map { $_->{'id'} } @tlist; |
38 | } |
39 | |
40 | # Initialize our stats collection |
41 | my $omit = '(omitted)'; |
42 | |
43 | # Run the analysis of each tradition |
44 | # Look through the results |
45 | foreach my $tid ( @traditions ) { |
46 | my $scope = $dir->new_scope(); |
47 | my $tradition = $dir->lookup( $tid ); |
48 | printf( "\n**** TRADITION %s ****\n", $tradition->name ); |
49 | my $c = $tradition->collation; |
50 | my $results = run_analysis( $tradition ); |
51 | my %stats; |
52 | my %rels_found; |
53 | foreach my $row ( @{$results->{'variants'}} ) { |
54 | # say sprintf( "=== Looking at rank %d (%s) ===", $row->{'id'}, |
55 | # $row->{'genealogical'} ? 'genealogical' : 'not genealogical' ); |
56 | my $rdgdir = {}; |
57 | map { $rdgdir->{$_->{'readingid'}} = $_ } @{$row->{'readings'}}; |
58 | # Look for reading parents and the relationships between them. |
59 | my %seen_rel; |
60 | foreach my $rdg ( keys %$rdgdir ) { |
61 | my $rhash = $rdgdir->{$rdg}; |
62 | my $parents = $rhash->{'reading_parents'}; |
63 | if( $parents && @$parents ) { |
64 | say sprintf( " - reading %s ( %s ) has %d possible origins", |
65 | $rdg, $rhash->{'text'}, scalar @$parents ) |
66 | unless @$parents == 1; |
67 | foreach my $p ( @$parents ) { |
68 | # Is there a relationship here? |
69 | my $rel = $c->get_relationship( $rdg, $p ); |
70 | my $type; |
71 | if( $rel ) { |
72 | # Yes there is - get its type and figure stuff out. |
73 | $type = $rel->type; |
74 | } elsif( $rdg eq $omit ) { |
75 | $type = 'deletion'; |
76 | } elsif( $p eq $omit ) { |
77 | $type = 'addition'; |
78 | } # TODO need to manage transposition |
79 | if( $type ) { |
80 | # Note that there was an instability of this type |
81 | $seen_rel{$type} = 1; |
82 | $stats{$type} = {} unless exists $stats{$type}; |
83 | # Calculate, in this spot, how often the form shifted |
84 | # vs. how often it stayed the same. |
85 | # Add the number of times this form appeared |
86 | add_to_hash( $stats{$type}, 'shifts', |
87 | $rhash->{'independent_occurrence'} ); |
88 | # Add the number of times this form was followed |
89 | add_to_hash( $stats{$type}, 'follows', $rhash->{'followed'} ); |
90 | # TODO work out whether not_followed gets included after iteration |
91 | } |
92 | } # foreach parent |
93 | |
94 | } # if parents |
95 | foreach my $k ( keys %seen_rel ) { |
96 | add_to_hash( \%rels_found, $k, 1 ); |
97 | } |
98 | } # foreach rdg |
99 | } |
100 | # Print out the stats |
101 | # First see how stable the text was |
102 | my $total = $c->end->rank - 1; |
103 | say sprintf( "Total locations %d, total variant locations %d", |
104 | $total, $results->{'variant_count'} ); |
105 | say $results->{'genealogical_count'} |
106 | . " variant locations entirely followed the stemma"; |
107 | say $results->{'conflict_count'} |
108 | . " variant readings conflicted with the stemma"; |
109 | foreach my $k ( keys %rels_found ) { |
110 | my $shifts = $rels_found{$k}; |
111 | say "Had $shifts total $k-type shifts in $total locations"; |
112 | } |
113 | foreach my $k ( keys %stats ) { |
114 | say "\tType $k:"; |
115 | say sprintf( "\tUnstable readings shifted %d times, and were followed %d times", |
116 | $stats{$k}->{'shifts'}, $stats{$k}->{'follows'} ); |
117 | } |
118 | } |
119 | |
120 | sub add_to_hash { |
121 | my( $hash, $key, $num ) = @_; |
122 | unless( exists $hash->{$key} ) { |
123 | $hash->{$key} = 0; |
124 | } |
125 | $hash->{$key} += $num; |
126 | } |