package Text::Tradition::Collation;
+use feature 'say';
use Encode qw( decode_utf8 );
use File::Temp;
use File::Which;
# Do the deletion deed.
if( $combine ) {
+ # Combine the text of the readings
my $joinstr = $combine_char;
unless( defined $joinstr ) {
$joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
$joinstr = $self->wordsep unless defined $joinstr;
}
$kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
+ $kept_obj->normal_form(
+ join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
+ # Combine the lexemes present in the readings
+ if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
+ $kept_obj->add_lexeme( $del_obj->lexemes );
+ }
}
$self->del_reading( $deleted );
}
+=head2 compress_readings
+
+Where possible in the graph, compresses plain sequences of readings into a
+single reading. The sequences must consist of readings with no
+relationships to other readings, with only a single witness path between
+them and no other witness paths from either that would skip the other. The
+readings must also not be marked as nonsense or bad grammar.
+
+WARNING: This operation cannot be undone.
+
+=cut
+
+sub compress_readings {
+ my $self = shift;
+ # Anywhere in the graph that there is a reading that joins only to a single
+ # successor, and neither of these have any relationships, just join the two
+ # readings.
+ my %gobbled;
+ foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
+ next if $rdg->is_meta;
+ next if $gobbled{$rdg->id};
+ next if $rdg->grammar_invalid || $rdg->is_nonsense;
+ next if $rdg->related_readings();
+ my %seen;
+ while( $self->sequence->successors( $rdg ) == 1 ) {
+ my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
+ throw( "Infinite loop" ) if $seen{$next->id};
+ $seen{$next->id} = 1;
+ last if $self->sequence->predecessors( $next ) > 1;
+ last if $next->is_meta;
+ last if $next->grammar_invalid || $next->is_nonsense;
+ last if $next->related_readings();
+ say "Joining readings $rdg and $next";
+ $self->merge_readings( $rdg, $next, 1 );
+ }
+ }
+ # Make sure we haven't screwed anything up
+ foreach my $wit ( $self->tradition->witnesses ) {
+ my $pathtext = $self->path_text( $wit->sigil );
+ my $origtext = join( ' ', @{$wit->text} );
+ throw( "Text differs for witness " . $wit->sigil )
+ unless $pathtext eq $origtext;
+ if( $wit->is_layered ) {
+ $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
+ $origtext = join( ' ', @{$wit->layertext} );
+ throw( "Ante-corr text differs for witness " . $wit->sigil )
+ unless $pathtext eq $origtext;
+ }
+ }
+
+ $self->relations->rebuild_equivalence();
+ $self->calculate_ranks();
+}
# Helper function for manipulating the graph.
sub _stringify_args {
my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
my @all_pos = ( 1 .. $self->end->rank - 1 );
foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
- # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
+ # say STDERR "Making witness row(s) for " . $wit->sigil;
my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
my @row = _make_witness_row( \@wit_path, \@all_pos );
push( @{$table->{'alignment'}},
foreach my $rdg ( @$path ) {
my $rtext = $rdg->text;
$rtext = '#LACUNA#' if $rdg->is_lacuna;
- print STDERR "rank " . $rdg->rank . "\n" if $debug;
- # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
+ say STDERR "rank " . $rdg->rank if $debug;
+ # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
$char_hash{$rdg->rank} = { 't' => $rdg };
}
my @row = map { $char_hash{$_} } @$positions;
sub make_witness_paths {
my( $self ) = @_;
foreach my $wit ( $self->tradition->witnesses ) {
- # print STDERR "Making path for " . $wit->sigil . "\n";
+ # say STDERR "Making path for " . $wit->sigil;
$self->make_witness_path( $wit );
}
}
next;
}
# Combine!
- #print STDERR "Combining readings at same rank: $key\n";
+ #say STDERR "Combining readings at same rank: $key";
$changed = 1;
$self->merge_readings( $unique_rank_rdg{$key}, $rdg );
# TODO see if this now makes a common point.
my @last_r2 = ( $r2 );
# my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
my %all_seen;
- # print STDERR "Finding common $dir for $r1, $r2\n";
+ # say STDERR "Finding common $dir for $r1, $r2";
while( !@candidates ) {
last unless $iter--; # Avoid looping infinitely
# Iterate separately down the graph from r1 and r2
foreach my $lc ( @last_r1 ) {
foreach my $p ( $lc->$dir ) {
if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
- # print STDERR "Path candidate $p from $lc\n";
+ # say STDERR "Path candidate $p from $lc";
push( @candidates, $p );
} elsif( !$all_seen{$p->id} ) {
$all_seen{$p->id} = 'r1';
foreach my $lc ( @last_r2 ) {
foreach my $p ( $lc->$dir ) {
if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
- # print STDERR "Path candidate $p from $lc\n";
+ # say STDERR "Path candidate $p from $lc";
push( @candidates, $p );
} elsif( !$all_seen{$p->id} ) {
$all_seen{$p->id} = 'r2';
}
}
- # Check to see if a nonlocal relationship is defined for the two readings
- $rel = $self->scoped_relationship( $options->{'reading_a'},
- $options->{'reading_b'} );
- if( $rel && $rel->type eq $options->{'type'} ) {
- return $rel;
- } elsif( $rel ) {
- throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
- } else {
- $rel = Text::Tradition::Collation::Relationship->new( $options );
- $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
- return $rel;
- }
+ $rel = Text::Tradition::Collation::Relationship->new( $options );
+ $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
+ return $rel;
}
=head2 add_scoped_relationship( $rel )
my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
if( $otherrel && $otherrel->type eq $options->{type}
&& $otherrel->scope eq $options->{scope} ) {
- warn "Applying existing scoped relationship";
+ warn "Applying existing scoped relationship for $rdga / $rdgb";
$relationship = $otherrel;
+ } elsif( $otherrel ) {
+ throw( "Conflicting scoped relationship for $rdga / $rdgb at $source / $target" );
}
}
$relationship = $self->create( $options ) unless $relationship; # Will throw on error
# Backwards compat
if( $filter eq 'colocated' ) {
$filter = sub { $_[0]->colocated };
+ } elsif( !ref( $filter ) ) {
+ my $type = $filter;
+ $filter = sub { $_[0]->type eq $type };
}
my %found = ( $reading => 1 );
my $check = [ $reading ];
--- /dev/null
+#!/usr/bin/env perl
+
+use lib 'lib';
+use feature 'say';
+use strict;
+use warnings;
+use Getopt::Long;
+use Lingua::Features::Structure;
+use Text::Tradition::Directory;
+use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
+use TryCatch;
+
+binmode STDOUT, ':utf8';
+binmode STDERR, ':utf8';
+eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
+
+my( $dbuser, $dbpass );
+my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
+my $testrun;
+
+GetOptions(
+ 'dsn=s' => \$dsn,
+ 'u|user=s' => \$dbuser,
+ 'p|pass=s' => \$dbpass,
+ 'n|test' => \$testrun,
+ );
+
+my $dbopts = { dsn => $dsn };
+$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
+$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
+
+my $dir = Text::Tradition::Directory->new( $dbopts );
+
+my $scope = $dir->new_scope();
+my $lookfor = $ARGV[0] || '';
+foreach my $tinfo ( $dir->traditionlist() ) {
+ next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
+ my $tradition = $dir->lookup( $tinfo->{'id'} );
+ my $c = $tradition->collation;
+
+ # Anywhere in the graph that there is a reading that joins only to a single
+ # successor, and neither of these have any relationships, just join the two
+ # readings.
+
+ # Save/update the current path texts
+ foreach my $wit ( $tradition->witnesses ) {
+ my @pathtext = split( /\s+/, $c->path_text( $wit->sigil ) );
+ $wit->text( \@pathtext );
+ if( $wit->is_layered ) {
+ my @layertext = split( /\s+/, $c->path_text( $wit->sigil.$c->ac_label ) );
+ $wit->layertext( \@layertext );
+ }
+ }
+
+ # Do the deed
+ $c->compress_readings();
+ # ...and save it.
+ $dir->save( $tradition );
+}
\ No newline at end of file
--- /dev/null
+#!/usr/bin/env perl
+
+use lib 'lib';
+use feature 'say';
+use strict;
+use warnings;
+use Getopt::Long;
+use Lingua::Features::Structure;
+use Text::Tradition::Directory;
+use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
+use TryCatch;
+
+binmode STDOUT, ':utf8';
+binmode STDERR, ':utf8';
+eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
+
+my %TYPEVALUES = (
+ orthographic => 1,
+ spelling => 2,
+ grammatical => 3,
+ lexical => 3,
+ collated => 50,
+ );
+
+my( $dbuser, $dbpass );
+my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
+my $testrun;
+
+GetOptions(
+ 'dsn=s' => \$dsn,
+ 'u|user=s' => \$dbuser,
+ 'p|pass=s' => \$dbpass,
+ 'n|test' => \$testrun,
+ );
+
+my $dbopts = { dsn => $dsn };
+$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
+$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
+
+my $dir = Text::Tradition::Directory->new( $dbopts );
+
+my $scope = $dir->new_scope();
+my $lookfor = $ARGV[0] || '';
+foreach my $tinfo ( $dir->traditionlist() ) {
+ next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
+ my $tradition = $dir->lookup( $tinfo->{'id'} );
+ my $c = $tradition->collation;
+
+ my $represented_by = {};
+ my $representative = {};
+ # For each set of ranked relationships, make all the implied links
+ # explicit. Start with orthographic readings
+ push_rel_type( $c, 'orthographic', $representative, $represented_by );
+ # then move on to spelling readings
+ push_rel_type( $c, 'spelling', $representative, $represented_by );
+
+ # Now all orth/spelling linked words are the same word for the purposes of
+ # other colocated links, and in our representation hashes.
+ # Go through the other relationships and propagate them to all words that are
+ # the same word.
+ foreach my $rel ( $c->relationships ) {
+ my $relobj = $c->get_relationship( $rel );
+ next unless $relobj->type =~ /^(grammatical|lexical)$/;
+ my $r1pool = $represented_by->{$representative->{$rel->[0]}};
+ my $r2pool = $represented_by->{$representative->{$rel->[1]}};
+ # Error check
+ if( check_distinct( $r1pool, $r2pool ) ) {
+ map { propagate_rel( $c, $relobj->type, $_, @$r2pool ) } @$r1pool;
+ } else {
+ warn "Pools not distinct for " . join( ' and ', @$rel );
+ }
+ }
+ $dir->save( $tradition ) unless $testrun;
+}
+
+sub propagate_rel {
+ my( $c, $type, @list ) = @_;
+ my $curr = shift @list;
+ while( @list ) {
+ foreach my $r ( @list ) {
+ next if $curr eq $r;
+ my $hasrel = $c->get_relationship( $curr, $r );
+ if( !$hasrel || $hasrel->type eq 'collated' ) {
+ say STDERR "Propagating $type relationship $curr -> $r";
+ $c->add_relationship( $curr, $r, { type => $type } );
+ } elsif( $hasrel->type ne $type ) {
+ warn "Found relationship conflict at $curr / $r: "
+ . $hasrel->type . " instead of $type"
+ unless( $TYPEVALUES{$hasrel->type} < $TYPEVALUES{$type} );
+ }
+ }
+ $curr = shift @list;
+ }
+}
+
+sub push_rel_type {
+ my( $c, $type, $r2rep, $rep2r ) = @_;
+ my %handled;
+ foreach my $rdg ( $c->readings ) {
+ next if $rdg->is_meta;
+ next if $handled{"$rdg"};
+ if( exists $r2rep->{"$rdg"} ) {
+ $rdg = $r2rep->{"$rdg"};
+ }
+ # Get the specified relationships
+ my @set = $rdg->related_readings( sub {
+ $_[0]->colocated && ( $_[0]->type eq $type ||
+ $TYPEVALUES{$_[0]->type} < $TYPEVALUES{$type} ) } );
+ push( @set, $rdg );
+ propagate_rel( $c, $type, @set ) if @set > 2;
+ # Set up the representatives
+ map { $r2rep->{"$_"} = $rdg } @set;
+ $rep2r->{"$rdg"} = \@set;
+ map { $handled{"$_"} = 1 } @set;
+ }
+}
+
+sub check_distinct {
+ my( $l1, $l2 ) = @_;
+ my %seen;
+ map { $seen{"$_"} = 1 } @$l1;
+ map { return 0 if $seen{"$_"} } @$l2;
+ return 1;
+}
+
$c->stash->{graphdot} = $stemma->editable({ linesep => ' ' });
$c->stash->{text_title} = $tradition->name;
$c->stash->{template} = 'stexaminer.tt';
+
+ # Get the analysis options
+ my( $use_type1, $ignore_sort ) = ( 0, 'none' );
+ if( $c->req->method eq 'POST' ) {
+ $use_type1 = $c->req->param( 'show_type1' ) eq 'on' ? 1 : 0;
+ $ignore_sort = $c->req->param( 'ignore_variant' );
+ }
+ $c->stash->{'show_type1'} = $use_type1;
+ $c->stash->{'ignore_variant'} = $ignore_sort;
# TODO Run the analysis as AJAX from the loaded page.
- my $t = run_analysis( $tradition, 'exclude_type1' => 1 );
+ my %analysis_options;
+ $analysis_options{'exclude_type1'} = !$use_type1;
+ if( $ignore_sort eq 'spelling' ) {
+ $analysis_options{'collapse'} = [ qw/ spelling orthographic / ];
+ } elsif( $ignore_sort eq 'orthographic' ) {
+ $analysis_options{'collapse'} = 'orthographic';
+ }
+
+ my $t = run_analysis( $tradition, %analysis_options );
# Stringify the reading groups
foreach my $loc ( @{$t->{'variants'}} ) {
my $mst = wit_stringify( $loc->{'missing'} );
+#options {
+ position: relative;
+ border: 1px #c6dcf1 solid;
+ margin-left: 20px;
+ margin-bottom: 20px;
+ padding: 10px;
+ width: 500px;
+}
+.optionformelement {
+ float: left;
+ padding-left: 15px;
+}
+#options_button {
+ position: absolute;
+ bottom: 20px;
+ right: 20px;
+}
#variants_table {
- float: left;
+ clear: both;
width: 90%;
height: 190px;
border: 1px #c6dcf1 solid;
.cellb7 {
border-right: 20px solid #ffd5e5;
}
+
+/* Clearfix hack to make div container height work */
+.clearfix:after {
+ content: ".";
+ display: block;
+ clear: both;
+ visibility: hidden;
+ line-height: 0;
+ height: 0;
+}
+
+.clearfix {
+ display: inline-block;
+}
+
+html[xmlns] .clearfix {
+ display: block;
+}
+
+* html .clearfix {
+ height: 1%;
+}
\ No newline at end of file
[% END -%]
<h1>Stexaminer</h1>
<h2>[% text_title %]</h2>
+ <div id="options">
+ <h3>Analysis options:</h3>
+ <form id="use_variants_form" name="use_variants_form" class="clearfix" method="POST">
+ <div class="optionformelement">
+ <input type="radio" name="ignore_variant" value="none" [% 'checked="true"' IF ignore_variant == 'none' %]>Analyze all variation</input><br/>
+ <input type="radio" name="ignore_variant" value="orthographic" [% 'checked="true"' IF ignore_variant == 'orthographic' %]>Ignore orthographic variation</input><br/>
+ <input type="radio" name="ignore_variant" value="spelling" [% 'checked="true"' IF ignore_variant == 'spelling' %]>Ignore orthographic and spelling variation</input>
+ </div>
+ <div class="optionformelement">
+ <input type="checkbox" name="show_type1" [% 'checked="true"' IF show_type1 %]>Include type-1 variation</input>
+ </div>
+ <div id="options_button" class="button optionformelement" onclick="$('#use_variants_form').submit()">
+ <span>Re-analyze</span>
+ </div>
+ </form>
+ </div>
<div id="variants_table">
<table>
[% FOREACH row IN variants -%]