start implementing morphology on readings
[scpubgit/stemmatology.git] / script / orth_case_links.pl
CommitLineData
a0a550ef 1#!/usr/bin/env perl
2
3use lib 'lib';
4use strict;
5use warnings;
6use Text::Tradition::Directory;
7
8binmode STDERR, ':utf8';
9binmode STDOUT, ':utf8';
10eval { no warnings; binmode $DB::OUT, ':utf8' };
11
12my( $dsn, $user, $pass ) = @ARGV;
13
14my $connect_args = { dsn => $dsn };
15$connect_args->{'extra_args'} = { user => $user, password => $pass }
16 if $user && $pass;
17my $dir = Text::Tradition::Directory->new( $connect_args );
18
19foreach my $text ( $dir->traditionlist ) {
20 my $id = $text->{'id'};
7f45b1dd 21 next unless $text->{'name'} =~ /Virtutes/;
a0a550ef 22 my $scope = $dir->new_scope;
23 my $tradition = $dir->lookup( $id );
24 print STDERR "Processing tradition " . $tradition->name . "\n";
25 my $c = $tradition->collation;
7f45b1dd 26 $c->flatten_ranks(); # just in case
a0a550ef 27 foreach my $rank ( 1 .. $c->end->rank - 1 ) {
28 my @readings = $c->readings_at_rank( $rank );
7f45b1dd 29 my %merged;
a0a550ef 30 while( @readings ) {
31 my $r = pop @readings;
32 next if $r->is_meta;
7f45b1dd 33 next if $merged{$r->id};
a0a550ef 34 my @orthmatch = grep { lc( $r->text ) eq lc( $_->text ) } @readings;
35 foreach my $om ( @orthmatch ) {
7f45b1dd 36 if( $r->text eq $om->text ) {
37 print STDERR "Merging identical readings $r and $om ("
38 . $r->text . ")\n";
39 $merged{$om->id} = 1;
40 $c->merge_readings( $r, $om );
41 } elsif ( $c->get_relationship( $r, $om ) ) {
42 print STDERR sprintf( "Adding orthographic link for %s and %s (%s / %s)\n",
43 $r->id, $om->id, $r->text, $om->text );
a0a550ef 44 $c->add_relationship( $r, $om,
45 { 'type' => 'orthographic', 'scope' => 'global' } );
46 }
47 }
48 }
49 }
50 $dir->save( $tradition );
51}
52
53print STDERR "Done\n";