Merge branch 'master' of github.com:tla/stemmatology
[scpubgit/stemmatology.git] / script / orth_case_links.pl
CommitLineData
a0a550ef 1#!/usr/bin/env perl
2
3use lib 'lib';
446a7389 4use feature 'say';
a0a550ef 5use strict;
6use warnings;
446a7389 7use Getopt::Long;
a0a550ef 8use Text::Tradition::Directory;
446a7389 9use TryCatch;
a0a550ef 10
a0a550ef 11binmode STDOUT, ':utf8';
446a7389 12binmode STDERR, ':utf8';
13eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
14
15my( $dbuser, $dbpass );
16my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
17my $testrun;
18
19GetOptions(
20 'dsn=s' => \$dsn,
21 'u|user=s' => \$dbuser,
22 'p|pass=s' => \$dbpass,
23 'n|test' => \$testrun,
24 );
a0a550ef 25
446a7389 26my $dbopts = { dsn => $dsn };
27$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
28$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
a0a550ef 29
446a7389 30my $dir = Text::Tradition::Directory->new( $dbopts );
a0a550ef 31
446a7389 32my $scope = $dir->new_scope();
33my $lookfor = $ARGV[0] || '';
34foreach my $tinfo ( $dir->traditionlist() ) {
35 next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
36 my $tradition = $dir->lookup( $tinfo->{id} );
37 say STDERR "Processing tradition " . $tradition->name;
a0a550ef 38 my $c = $tradition->collation;
7f45b1dd 39 $c->flatten_ranks(); # just in case
a0a550ef 40 foreach my $rank ( 1 .. $c->end->rank - 1 ) {
41 my @readings = $c->readings_at_rank( $rank );
7f45b1dd 42 my %merged;
a0a550ef 43 while( @readings ) {
44 my $r = pop @readings;
45 next if $r->is_meta;
7f45b1dd 46 next if $merged{$r->id};
a0a550ef 47 my @orthmatch = grep { lc( $r->text ) eq lc( $_->text ) } @readings;
48 foreach my $om ( @orthmatch ) {
7f45b1dd 49 if( $r->text eq $om->text ) {
446a7389 50 say STDERR "Merging identical readings $r and $om ("
51 . $r->text . ")";
7f45b1dd 52 $merged{$om->id} = 1;
53 $c->merge_readings( $r, $om );
9f86af4b 54 } else {
446a7389 55 say STDERR sprintf( "Adding orthographic link for %s and %s (%s / %s)",
7f45b1dd 56 $r->id, $om->id, $r->text, $om->text );
446a7389 57 try {
58 $c->add_relationship( $r, $om,
59 { 'type' => 'orthographic', 'scope' => 'global' } ); };
60 } catch ( Text::Tradition::Error $e ) {
61 say STDERR "Relationship skipped: " . $e->message;
62 }
a0a550ef 63 }
64 }
65 }
66 }
67 $dir->save( $tradition );
68}
69
70print STDERR "Done\n";