regularize some script libs and interface
[scpubgit/stemmatology.git] / script / propagate_transitive.pl
CommitLineData
898b1310 1#!/usr/bin/env perl
2
3use lib 'lib';
4use feature 'say';
5use strict;
6use warnings;
7use Getopt::Long;
898b1310 8use Text::Tradition::Directory;
898b1310 9use TryCatch;
10
11binmode STDOUT, ':utf8';
12binmode STDERR, ':utf8';
13eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
14
15my %TYPEVALUES = (
16 orthographic => 1,
17 spelling => 2,
18 grammatical => 3,
19 lexical => 3,
20 collated => 50,
21 );
22
23my( $dbuser, $dbpass );
24my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
25my $testrun;
26
27GetOptions(
28 'dsn=s' => \$dsn,
29 'u|user=s' => \$dbuser,
30 'p|pass=s' => \$dbpass,
31 'n|test' => \$testrun,
32 );
33
34my $dbopts = { dsn => $dsn };
35$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
36$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
37
38my $dir = Text::Tradition::Directory->new( $dbopts );
39
40my $scope = $dir->new_scope();
41my $lookfor = $ARGV[0] || '';
42foreach my $tinfo ( $dir->traditionlist() ) {
43 next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
44 my $tradition = $dir->lookup( $tinfo->{'id'} );
45 my $c = $tradition->collation;
46
47 my $represented_by = {};
48 my $representative = {};
49 # For each set of ranked relationships, make all the implied links
50 # explicit. Start with orthographic readings
51 push_rel_type( $c, 'orthographic', $representative, $represented_by );
52 # then move on to spelling readings
53 push_rel_type( $c, 'spelling', $representative, $represented_by );
54
55 # Now all orth/spelling linked words are the same word for the purposes of
56 # other colocated links, and in our representation hashes.
57 # Go through the other relationships and propagate them to all words that are
58 # the same word.
59 foreach my $rel ( $c->relationships ) {
60 my $relobj = $c->get_relationship( $rel );
61 next unless $relobj->type =~ /^(grammatical|lexical)$/;
62 my $r1pool = $represented_by->{$representative->{$rel->[0]}};
63 my $r2pool = $represented_by->{$representative->{$rel->[1]}};
64 # Error check
65 if( check_distinct( $r1pool, $r2pool ) ) {
66 map { propagate_rel( $c, $relobj->type, $_, @$r2pool ) } @$r1pool;
67 } else {
68 warn "Pools not distinct for " . join( ' and ', @$rel );
69 }
70 }
71 $dir->save( $tradition ) unless $testrun;
72}
73
74sub propagate_rel {
75 my( $c, $type, @list ) = @_;
76 my $curr = shift @list;
77 while( @list ) {
78 foreach my $r ( @list ) {
79 next if $curr eq $r;
80 my $hasrel = $c->get_relationship( $curr, $r );
81 if( !$hasrel || $hasrel->type eq 'collated' ) {
82 say STDERR "Propagating $type relationship $curr -> $r";
83 $c->add_relationship( $curr, $r, { type => $type } );
84 } elsif( $hasrel->type ne $type ) {
85 warn "Found relationship conflict at $curr / $r: "
86 . $hasrel->type . " instead of $type"
87 unless( $TYPEVALUES{$hasrel->type} < $TYPEVALUES{$type} );
88 }
89 }
90 $curr = shift @list;
91 }
92}
93
94sub push_rel_type {
95 my( $c, $type, $r2rep, $rep2r ) = @_;
96 my %handled;
97 foreach my $rdg ( $c->readings ) {
98 next if $rdg->is_meta;
99 next if $handled{"$rdg"};
100 if( exists $r2rep->{"$rdg"} ) {
101 $rdg = $r2rep->{"$rdg"};
102 }
103 # Get the specified relationships
104 my @set = $rdg->related_readings( sub {
105 $_[0]->colocated && ( $_[0]->type eq $type ||
106 $TYPEVALUES{$_[0]->type} < $TYPEVALUES{$type} ) } );
107 push( @set, $rdg );
108 propagate_rel( $c, $type, @set ) if @set > 2;
109 # Set up the representatives
110 map { $r2rep->{"$_"} = $rdg } @set;
111 $rep2r->{"$rdg"} = \@set;
112 map { $handled{"$_"} = 1 } @set;
113 }
114}
115
116sub check_distinct {
117 my( $l1, $l2 ) = @_;
118 my %seen;
119 map { $seen{"$_"} = 1 } @$l1;
120 map { return 0 if $seen{"$_"} } @$l2;
121 return 1;
122}
123