initial commit of IDP scripts for stemma analysis
[scpubgit/stemmatology.git] / base / script / majority_text.pl
CommitLineData
c23749b5 1use lib 'lib';
2use feature 'say';
3use strict;
4use warnings;
5use Getopt::Long;
6use Text::Tradition::Directory;
7use TryCatch;
8
9binmode STDOUT, ':utf8';
10binmode STDERR, ':utf8';
11eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
12
13my( $dbuser, $dbpass );
14my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
15
16GetOptions(
17 'dsn=s' => \$dsn,
18 'u|user=s' => \$dbuser,
19 'p|pass=s' => \$dbpass,
20 );
21
22my $dbopts = { dsn => $dsn };
23$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
24$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
25
26my $dir = Text::Tradition::Directory->new( $dbopts );
27
28my $scope = $dir->new_scope();
29my $lookfor = $ARGV[0] || '';
30foreach my $tinfo ( $dir->traditionlist() ) {
31 next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
32 my $tradition = $dir->lookup( $tinfo->{'id'} );
33 # Try to piece together the majority text.
34 my $c = $tradition->collation;
35 my $curr = $c->start;
36 my @text;
37 while( $curr ne $c->end ) {
38 my %witnumbers;
39 foreach my $candidate( $c->sequence->successors( $curr ) ) {
40 my $cobj = $c->reading( $candidate );
41 next if $cobj->is_lacuna;
42 my $witnum = scalar keys %{$c->sequence->get_edge_attributes(
43 $curr->id, $candidate )};
44 $witnumbers{$witnum} = $cobj;
45 }
46 my @numbers = sort { $a <=> $b } keys %witnumbers;
47 if( @numbers ) {
48 $curr = $witnumbers{ pop @numbers };
49 push( @text, $curr ) unless $curr->is_meta;
50 } else {
51 warn "Did not find non-lacuna successor to $curr";
52 last;
53 }
54 }
55 my $pathtext = '';
56 my $last;
57 foreach my $r ( @text ) {
58 unless ( $r->join_prior || !$last || $last->join_next ) {
59 $pathtext .= ' ';
60 }
61 $pathtext .= $r->text;
62 $last = $r;
63 }
64 say $pathtext;
65}