8 use Lingua::Features::Structure;
9 use Text::Tradition::Directory;
10 use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
13 binmode STDOUT, ':utf8';
14 binmode STDERR, ':utf8';
15 eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
17 my( $dbuser, $dbpass );
18 my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
23 'u|user=s' => \$dbuser,
24 'p|pass=s' => \$dbpass,
25 'n|test' => \$testrun,
28 my $dbopts = { dsn => $dsn };
29 $dbopts->{extra_args}->{user} = $dbuser if $dbuser;
30 $dbopts->{extra_args}->{password} = $dbpass if $dbpass;
32 my $dir = Text::Tradition::Directory->new( $dbopts );
34 my $scope = $dir->new_scope();
35 my $lookfor = $ARGV[0] || '';
36 foreach my $tinfo ( $dir->traditionlist() ) {
37 next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
38 my $tradition = $dir->lookup( $tinfo->{'id'} );
39 my $c = $tradition->collation;
41 # Anywhere in the graph that there is a reading that joins only to a single
42 # successor, and neither of these have any relationships, just join the two
45 foreach my $rdg ( sort { $a->rank <=> $b->rank } $c->readings ) {
46 next if $rdg->is_meta;
47 next if $gobbled{$rdg->id};
48 next if $rdg->grammar_invalid || $rdg->is_nonsense;
49 next if $rdg->related_readings();
51 while( $c->sequence->successors( $rdg ) == 1 ) {
52 my( $next ) = $c->reading( $c->sequence->successors( $rdg ) );
53 die "Infinite loop" if $seen{$next->id};
55 last if $c->sequence->predecessors( $next ) > 1;
56 last if $next->is_meta;
57 last if $next->grammar_invalid || $next->is_nonsense;
58 last if $next->related_readings();
59 say "Joining readings $rdg and $next";
60 $c->merge_readings( $rdg, $next, 1 );
63 # Make sure we haven't screwed anything up
64 foreach my $wit ( $tradition->witnesses ) {
65 my $pathtext = $c->path_text( $wit->sigil );
66 my $origtext = join( ' ', @{$wit->text} );
67 die "Text differs for witness " . $wit->sigil
68 unless $pathtext eq $origtext;
69 if( $wit->is_layered ) {
70 $pathtext = $c->path_text( $wit->sigil.$c->ac_label );
71 $origtext = join( ' ', @{$wit->layertext} );
72 die "Ante-corr text differs for witness " . $wit->sigil
73 unless $pathtext eq $origtext;
77 $c->relations->rebuild_equivalence();
78 $c->calculate_ranks();
79 $dir->save( $tradition );