Merge branch 'master' of https://github.com/tla/stemmatology
[scpubgit/stemmatology.git] / analysis / script / magnitude.pl
CommitLineData
3bce8f64 1#!/usr/bin/env perl
2
3use feature 'say';
4use lib 'lib';
5use strict;
6use warnings;
7use Getopt::Long;
8use Set::Scalar;
9use Text::CSV_XS;
10use Text::Tradition::Analysis qw/ group_variants /;
11use Text::Tradition::Directory;
12use TryCatch;
13
14binmode STDOUT, ':utf8';
15binmode STDERR, ':utf8';
16
17my( $dsn, $dbuser, $dbpass );
18my $filename = 'magnitude.csv';
19GetOptions(
20 'dsn=s' => \$dsn,
21 'u|user=s' => \$dbuser,
22 'p|pass=s' => \$dbpass,
23 'f|file=s' => \$filename
24);
25
26my %dbopts = ( dsn => $dsn );
27if( $dbuser || $dbpass ) {
28 $dbopts{extra_args} = { user => $dbuser, password => $dbpass }
29}
30
31my $dir = Text::Tradition::Directory->new( %dbopts );
32my $scope = $dir->new_scope();
33my $lookfor = shift @ARGV || '';
34my $collapse = Set::Scalar->new();
35if( @ARGV ) {
36 say "Merging relationship types @ARGV";
37 map { $collapse->insert($_) } @ARGV;
38}
39
40my $csv = Text::CSV_XS->new( { binary => 1, quote_null => 0 } );
41open my $fh, ">:encoding(UTF-8)", $filename or die "$filename: $!";
42
43foreach my $tinfo( $dir->traditionlist ) {
44 next if $tinfo->{'name'} eq 'xxxxx';
45 next if $tinfo->{'name'} =~ /158/;
46 next if $tinfo->{'name'} =~ /Heinrichi part/;
47 if( $lookfor ) {
48 next unless $tinfo->{'id'} eq $lookfor
49 || $tinfo->{'name'} =~ /$lookfor/;
50 }
51 my $tradition = $dir->lookup( $tinfo->{'id'} );
52 say "Counting variation in tradition " . $tradition->name;
53
54 # Group the variants for each rank, and count the number of
55 # reading groupings.
56 my $lcph = Set::Scalar->new(); # placeholder for lacunae
57 my $moved = {};
58 my %magnitudes;
59 my $max = 1;
60 foreach my $rk ( 1 .. $tradition->collation->end->rank ) {
61 my $missing = $lcph->clone();
62 my $rankgroup = group_variants( $tradition, $rk, $missing, $moved, $collapse );
63 my $numr = scalar keys %$rankgroup;
64 $numr++ if $missing->size;
65 $max = $numr if $numr > $max;
66 if( exists $magnitudes{$numr} ) {
67 $magnitudes{$numr}++
68 } else {
69 $magnitudes{$numr} = 1;
70 }
71 }
72
73 # Write them out to CSV.
74 my @csvalues = map { $magnitudes{$_} || 0 } 2..$max;
75 if( $csv->combine( $tradition->name, @csvalues ) ) {
76 say $fh $csv->string;
77 } else {
78 say "combine() failed on argument: " . $csv->error_input;
79 }
80}
81
82close $fh;