add options for stexaminer re-analysis
[scpubgit/stemmatology.git] / script / join_readings.pl
CommitLineData
869a1ada 1#!/usr/bin/env perl
2
3use lib 'lib';
4use feature 'say';
5use strict;
6use warnings;
7use Getopt::Long;
8use Lingua::Features::Structure;
9use Text::Tradition::Directory;
10use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
11use TryCatch;
12
13binmode STDOUT, ':utf8';
14binmode STDERR, ':utf8';
15eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
16
17my( $dbuser, $dbpass );
18my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
19my $testrun;
20
21GetOptions(
22 'dsn=s' => \$dsn,
23 'u|user=s' => \$dbuser,
24 'p|pass=s' => \$dbpass,
25 'n|test' => \$testrun,
26 );
27
28my $dbopts = { dsn => $dsn };
29$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
30$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
31
32my $dir = Text::Tradition::Directory->new( $dbopts );
33
34my $scope = $dir->new_scope();
35my $lookfor = $ARGV[0] || '';
36foreach my $tinfo ( $dir->traditionlist() ) {
37 next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
38 my $tradition = $dir->lookup( $tinfo->{'id'} );
39 my $c = $tradition->collation;
40
41 # Anywhere in the graph that there is a reading that joins only to a single
42 # successor, and neither of these have any relationships, just join the two
43 # readings.
44 my %gobbled;
45 foreach my $rdg ( sort { $a->rank <=> $b->rank } $c->readings ) {
46 next if $rdg->is_meta;
47 next if $gobbled{$rdg->id};
48 next if $rdg->grammar_invalid || $rdg->is_nonsense;
49 next if $rdg->related_readings();
50 my %seen;
51 while( $c->sequence->successors( $rdg ) == 1 ) {
52 my( $next ) = $c->reading( $c->sequence->successors( $rdg ) );
53 die "Infinite loop" if $seen{$next->id};
54 $seen{$next->id} = 1;
55 last if $c->sequence->predecessors( $next ) > 1;
56 last if $next->is_meta;
57 last if $next->grammar_invalid || $next->is_nonsense;
58 last if $next->related_readings();
59 say "Joining readings $rdg and $next";
60 $c->merge_readings( $rdg, $next, 1 );
61 }
62 }
63 # Make sure we haven't screwed anything up
64 foreach my $wit ( $tradition->witnesses ) {
65 my $pathtext = $c->path_text( $wit->sigil );
66 my $origtext = join( ' ', @{$wit->text} );
67 die "Text differs for witness " . $wit->sigil
68 unless $pathtext eq $origtext;
69 if( $wit->is_layered ) {
70 $pathtext = $c->path_text( $wit->sigil.$c->ac_label );
71 $origtext = join( ' ', @{$wit->layertext} );
72 die "Ante-corr text differs for witness " . $wit->sigil
73 unless $pathtext eq $origtext;
74 }
75 }
76
77 $c->relations->rebuild_equivalence();
78 $c->calculate_ranks();
79 $dir->save( $tradition );
80}