Commit | Line | Data |
a0a550ef |
1 | #!/usr/bin/env perl |
2 | |
3 | use lib 'lib'; |
446a7389 |
4 | use feature 'say'; |
a0a550ef |
5 | use strict; |
6 | use warnings; |
446a7389 |
7 | use Getopt::Long; |
a0a550ef |
8 | use Text::Tradition::Directory; |
446a7389 |
9 | use TryCatch; |
a0a550ef |
10 | |
a0a550ef |
11 | binmode STDOUT, ':utf8'; |
446a7389 |
12 | binmode STDERR, ':utf8'; |
13 | eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 }; |
14 | |
15 | my( $dbuser, $dbpass ); |
16 | my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db'; |
17 | my $testrun; |
18 | |
19 | GetOptions( |
20 | 'dsn=s' => \$dsn, |
21 | 'u|user=s' => \$dbuser, |
22 | 'p|pass=s' => \$dbpass, |
23 | 'n|test' => \$testrun, |
24 | ); |
a0a550ef |
25 | |
446a7389 |
26 | my $dbopts = { dsn => $dsn }; |
27 | $dbopts->{extra_args}->{user} = $dbuser if $dbuser; |
28 | $dbopts->{extra_args}->{password} = $dbpass if $dbpass; |
a0a550ef |
29 | |
446a7389 |
30 | my $dir = Text::Tradition::Directory->new( $dbopts ); |
a0a550ef |
31 | |
446a7389 |
32 | my $scope = $dir->new_scope(); |
33 | my $lookfor = $ARGV[0] || ''; |
34 | foreach my $tinfo ( $dir->traditionlist() ) { |
35 | next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor; |
36 | my $tradition = $dir->lookup( $tinfo->{id} ); |
37 | say STDERR "Processing tradition " . $tradition->name; |
a0a550ef |
38 | my $c = $tradition->collation; |
7f45b1dd |
39 | $c->flatten_ranks(); # just in case |
a0a550ef |
40 | foreach my $rank ( 1 .. $c->end->rank - 1 ) { |
41 | my @readings = $c->readings_at_rank( $rank ); |
7f45b1dd |
42 | my %merged; |
a0a550ef |
43 | while( @readings ) { |
44 | my $r = pop @readings; |
45 | next if $r->is_meta; |
7f45b1dd |
46 | next if $merged{$r->id}; |
a0a550ef |
47 | my @orthmatch = grep { lc( $r->text ) eq lc( $_->text ) } @readings; |
48 | foreach my $om ( @orthmatch ) { |
7f45b1dd |
49 | if( $r->text eq $om->text ) { |
446a7389 |
50 | say STDERR "Merging identical readings $r and $om (" |
51 | . $r->text . ")"; |
7f45b1dd |
52 | $merged{$om->id} = 1; |
53 | $c->merge_readings( $r, $om ); |
9f86af4b |
54 | } else { |
446a7389 |
55 | say STDERR sprintf( "Adding orthographic link for %s and %s (%s / %s)", |
7f45b1dd |
56 | $r->id, $om->id, $r->text, $om->text ); |
446a7389 |
57 | try { |
58 | $c->add_relationship( $r, $om, |
59 | { 'type' => 'orthographic', 'scope' => 'global' } ); }; |
60 | } catch ( Text::Tradition::Error $e ) { |
61 | say STDERR "Relationship skipped: " . $e->message; |
62 | } |
a0a550ef |
63 | } |
64 | } |
65 | } |
66 | } |
67 | $dir->save( $tradition ); |
68 | } |
69 | |
70 | print STDERR "Done\n"; |