Commit | Line | Data |
898b1310 |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
4 | use feature 'say'; |
5 | use strict; |
6 | use warnings; |
7 | use Getopt::Long; |
898b1310 |
8 | use Text::Tradition::Directory; |
898b1310 |
9 | use TryCatch; |
10 | |
11 | binmode STDOUT, ':utf8'; |
12 | binmode STDERR, ':utf8'; |
13 | eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 }; |
14 | |
15 | my %TYPEVALUES = ( |
16 | orthographic => 1, |
17 | spelling => 2, |
18 | grammatical => 3, |
19 | lexical => 3, |
ee978519 |
20 | punctuation => 4, |
898b1310 |
21 | collated => 50, |
22 | ); |
23 | |
24 | my( $dbuser, $dbpass ); |
25 | my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db'; |
26 | my $testrun; |
27 | |
28 | GetOptions( |
29 | 'dsn=s' => \$dsn, |
30 | 'u|user=s' => \$dbuser, |
31 | 'p|pass=s' => \$dbpass, |
32 | 'n|test' => \$testrun, |
33 | ); |
34 | |
35 | my $dbopts = { dsn => $dsn }; |
36 | $dbopts->{extra_args}->{user} = $dbuser if $dbuser; |
37 | $dbopts->{extra_args}->{password} = $dbpass if $dbpass; |
38 | |
39 | my $dir = Text::Tradition::Directory->new( $dbopts ); |
40 | |
41 | my $scope = $dir->new_scope(); |
42 | my $lookfor = $ARGV[0] || ''; |
43 | foreach my $tinfo ( $dir->traditionlist() ) { |
428bcf0b |
44 | next if $tinfo->{'name'} eq 'xxxxx'; |
898b1310 |
45 | next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor; |
46 | my $tradition = $dir->lookup( $tinfo->{'id'} ); |
428bcf0b |
47 | say "Looking at tradition " . $tradition->name; |
898b1310 |
48 | my $c = $tradition->collation; |
49 | |
50 | my $represented_by = {}; |
51 | my $representative = {}; |
52 | # For each set of ranked relationships, make all the implied links |
53 | # explicit. Start with orthographic readings |
54 | push_rel_type( $c, 'orthographic', $representative, $represented_by ); |
55 | # then move on to spelling readings |
56 | push_rel_type( $c, 'spelling', $representative, $represented_by ); |
57 | |
58 | # Now all orth/spelling linked words are the same word for the purposes of |
59 | # other colocated links, and in our representation hashes. |
60 | # Go through the other relationships and propagate them to all words that are |
61 | # the same word. |
62 | foreach my $rel ( $c->relationships ) { |
63 | my $relobj = $c->get_relationship( $rel ); |
428bcf0b |
64 | if( $relobj->type =~ /^(grammatical|lexical)$/ ) { |
65 | my $r1pool = $represented_by->{$representative->{$rel->[0]}}; |
66 | my $r2pool = $represented_by->{$representative->{$rel->[1]}}; |
67 | # Error check |
68 | if( check_distinct( $r1pool, $r2pool ) ) { |
69 | map { propagate_rel( $c, $relobj->type, $_, @$r2pool ) } @$r1pool; |
70 | } else { |
71 | warn "Pools not distinct for " . join( ' and ', @$rel ); |
72 | } |
73 | } elsif( $relobj->type eq 'transposition' ) { |
74 | # We also need to propagate transposition links, but rather more strictly. |
75 | propagate_rel( $c, 'transposition', map { $c->reading( $_ ) } @$rel ); |
898b1310 |
76 | } |
77 | } |
428bcf0b |
78 | |
79 | |
898b1310 |
80 | $dir->save( $tradition ) unless $testrun; |
81 | } |
82 | |
83 | sub propagate_rel { |
84 | my( $c, $type, @list ) = @_; |
85 | my $curr = shift @list; |
ee978519 |
86 | push( @list, $curr ); # make sure we close the A -> B -> C -> A loop |
898b1310 |
87 | while( @list ) { |
88 | foreach my $r ( @list ) { |
89 | next if $curr eq $r; |
428bcf0b |
90 | # Check that the given relationship type exists between $curr and $r. |
91 | # Also check that the given relationship type exists between $curr and |
92 | # the same-type relationships of $r. |
93 | my @candidates = ( $r ); |
94 | foreach my $rrel ( $r->related_readings() ) { |
95 | next if $curr eq $rrel; |
96 | my $rrelobj = $c->get_relationship( $r, $rrel ); |
97 | if( $rrelobj && $rrelobj->type eq $type ) { |
98 | my $hasrrel = $c->get_relationship( $curr, $rrel ); |
99 | push( @candidates, $rrel ) unless $hasrrel; |
100 | } |
101 | } |
102 | foreach my $cand ( @candidates ) { |
103 | my $hasrel = $c->get_relationship( $curr, $cand ); |
104 | if( !$hasrel || $hasrel->type eq 'collated' ) { |
105 | say STDERR "Propagating $type relationship $curr -> $cand"; |
106 | $c->add_relationship( $curr, $cand, { type => $type } ); |
107 | } elsif( $hasrel->type ne $type ) { |
108 | warn "Found relationship conflict at $curr / $cand: " |
109 | . $hasrel->type . " instead of $type" |
110 | unless( $TYPEVALUES{$hasrel->type} < $TYPEVALUES{$type} ); |
111 | } |
898b1310 |
112 | } |
113 | } |
114 | $curr = shift @list; |
115 | } |
116 | } |
117 | |
118 | sub push_rel_type { |
119 | my( $c, $type, $r2rep, $rep2r ) = @_; |
120 | my %handled; |
121 | foreach my $rdg ( $c->readings ) { |
122 | next if $rdg->is_meta; |
123 | next if $handled{"$rdg"}; |
124 | if( exists $r2rep->{"$rdg"} ) { |
125 | $rdg = $r2rep->{"$rdg"}; |
126 | } |
127 | # Get the specified relationships |
128 | my @set = $rdg->related_readings( sub { |
129 | $_[0]->colocated && ( $_[0]->type eq $type || |
130 | $TYPEVALUES{$_[0]->type} < $TYPEVALUES{$type} ) } ); |
131 | push( @set, $rdg ); |
132 | propagate_rel( $c, $type, @set ) if @set > 2; |
133 | # Set up the representatives |
134 | map { $r2rep->{"$_"} = $rdg } @set; |
135 | $rep2r->{"$rdg"} = \@set; |
136 | map { $handled{"$_"} = 1 } @set; |
137 | } |
138 | } |
139 | |
140 | sub check_distinct { |
141 | my( $l1, $l2 ) = @_; |
142 | my %seen; |
143 | map { $seen{"$_"} = 1 } @$l1; |
144 | map { return 0 if $seen{"$_"} } @$l2; |
145 | return 1; |
146 | } |
147 | |