$joinstr = $self->wordsep unless defined $joinstr;
}
$kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
+ # Change this reading to a joining one if necessary
+ $kept_obj->_set_join_next( $del_obj->join_next );
$kept_obj->normal_form(
join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
# Combine the lexemes present in the readings
is => 'ro',
isa => 'Bool',
default => undef,
+ writer => '_set_join_prior',
);
has 'join_next' => (
is => 'ro',
isa => 'Bool',
default => undef,
+ writer => '_set_join_next',
);
$is_layer ? $wit->layertext( \@words ) : $wit->text( \@words );
}
+ my $nocollate = ( scalar( @witnesses ) * scalar @$alignment_table ) > 150000;
+ print STDERR "Tradition too big for row collation\n" if $nocollate;
+
# Now for the next rows, make nodes as necessary, assign their ranks, and
# add them to the witness paths.
foreach my $idx ( 1 .. $#{$alignment_table} ) {
my $row = $alignment_table->[$idx];
- my $nodes = _make_nodes( $c, $row, $idx );
+ my $nodes = _make_nodes( $c, $row, $idx, $nocollate );
foreach my $w ( 0 .. $#{$row} ) {
# push the appropriate node onto the appropriate witness path
my $word = $row->[$w];
# Note that our ranks and common readings are set.
$c->_graphcalc_done(1);
# Remove redundant collation relationships.
- $c->relations->filter_collations();
+ $c->relations->filter_collations() unless $nocollate;
}
sub _make_nodes {
- my( $collation, $row, $index ) = @_;
+ my( $collation, $row, $index, $nocollate ) = @_;
my %unique;
my $commonctr = 0; # Holds the number of unique readings + gaps, ex. lacunae.
foreach my $w ( @$row ) {
$ctr++;
}
# Collate this sequence of readings via a single 'collation' relationship.
- my @rankrdgs = values %unique;
- my $collation_rel;
- while( @rankrdgs ) {
- my $r = shift @rankrdgs;
- next if $r->is_meta;
- foreach my $nr ( @rankrdgs ) {
- next if $nr->is_meta;
- if( $collation_rel ) {
- $collation->add_relationship( $r, $nr, $collation_rel );
- } else {
- $collation->add_relationship( $r, $nr,
- { 'type' => 'collated',
- 'annotation' => "Parsed together for rank $index" } );
- $collation_rel = $collation->get_relationship( $r, $nr );
- }
- }
- }
-
+ unless( $nocollate ) {
+ my @rankrdgs = values %unique;
+ my $collation_rel;
+ while( @rankrdgs ) {
+ my $r = shift @rankrdgs;
+ next if $r->is_meta;
+ foreach my $nr ( @rankrdgs ) {
+ next if $nr->is_meta;
+ if( $collation_rel ) {
+ $collation->add_relationship( $r, $nr, $collation_rel );
+ } else {
+ $collation->add_relationship( $r, $nr,
+ { 'type' => 'collated',
+ 'annotation' => "Parsed together for rank $index" } );
+ $collation_rel = $collation->get_relationship( $r, $nr );
+ }
+ }
+ }
+ }
return \%unique;
}
use strict;
use warnings;
use Getopt::Long;
-use Lingua::Features::Structure;
use Text::Tradition::Directory;
-use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
use TryCatch;
binmode STDOUT, ':utf8';
#!/usr/bin/env perl
use lib 'lib';
+use feature 'say';
use strict;
use warnings;
+use Getopt::Long;
use Text::Tradition::Directory;
+use TryCatch;
-binmode STDERR, ':utf8';
binmode STDOUT, ':utf8';
-eval { no warnings; binmode $DB::OUT, ':utf8' };
+binmode STDERR, ':utf8';
+eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
+
+my( $dbuser, $dbpass );
+my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
+my $testrun;
+
+GetOptions(
+ 'dsn=s' => \$dsn,
+ 'u|user=s' => \$dbuser,
+ 'p|pass=s' => \$dbpass,
+ 'n|test' => \$testrun,
+ );
-my( $dsn, $user, $pass ) = @ARGV;
+my $dbopts = { dsn => $dsn };
+$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
+$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
-my $connect_args = { dsn => $dsn };
-$connect_args->{'extra_args'} = { user => $user, password => $pass }
- if $user && $pass;
-my $dir = Text::Tradition::Directory->new( $connect_args );
+my $dir = Text::Tradition::Directory->new( $dbopts );
-foreach my $text ( $dir->traditionlist ) {
- my $id = $text->{'id'};
- next unless $text->{'name'} =~ /Heinrichi/;
- my $scope = $dir->new_scope;
- my $tradition = $dir->lookup( $id );
- print STDERR "Processing tradition " . $tradition->name . "\n";
+my $scope = $dir->new_scope();
+my $lookfor = $ARGV[0] || '';
+foreach my $tinfo ( $dir->traditionlist() ) {
+ next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
+ my $tradition = $dir->lookup( $tinfo->{id} );
+ say STDERR "Processing tradition " . $tradition->name;
my $c = $tradition->collation;
$c->flatten_ranks(); # just in case
foreach my $rank ( 1 .. $c->end->rank - 1 ) {
my @orthmatch = grep { lc( $r->text ) eq lc( $_->text ) } @readings;
foreach my $om ( @orthmatch ) {
if( $r->text eq $om->text ) {
- print STDERR "Merging identical readings $r and $om ("
- . $r->text . ")\n";
+ say STDERR "Merging identical readings $r and $om ("
+ . $r->text . ")";
$merged{$om->id} = 1;
$c->merge_readings( $r, $om );
} else {
- print STDERR sprintf( "Adding orthographic link for %s and %s (%s / %s)\n",
+ say STDERR sprintf( "Adding orthographic link for %s and %s (%s / %s)",
$r->id, $om->id, $r->text, $om->text );
- eval { $c->add_relationship( $r, $om,
- { 'type' => 'orthographic', 'scope' => 'global' } ); };
- print STDERR $@ if $@;
+ try {
+ $c->add_relationship( $r, $om,
+ { 'type' => 'orthographic', 'scope' => 'global' } ); };
+ } catch ( Text::Tradition::Error $e ) {
+ say STDERR "Relationship skipped: " . $e->message;
+ }
}
}
}
use strict;
use warnings;
use Getopt::Long;
-use Lingua::Features::Structure;
use Text::Tradition::Directory;
-use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
use TryCatch;
binmode STDOUT, ':utf8';