Merge branch 'master' of github.com:tla/stemmatology
Tara L Andrews [Wed, 11 Jul 2012 19:34:02 +0000 (21:34 +0200)]
lib/Text/Tradition/Collation.pm
lib/Text/Tradition/Collation/RelationshipStore.pm
script/join_readings.pl [new file with mode: 0755]
script/propagate_transitive.pl [new file with mode: 0755]
stemmaweb/lib/stemmaweb/Controller/Stexaminer.pm
stemmaweb/root/css/stexaminer.css
stemmaweb/root/src/stexaminer.tt

index d3a8567..bd3b58d 100644 (file)
@@ -1,5 +1,6 @@
 package Text::Tradition::Collation;
 
+use feature 'say';
 use Encode qw( decode_utf8 );
 use File::Temp;
 use File::Which;
@@ -409,16 +410,76 @@ sub merge_readings {
        
        # Do the deletion deed.
        if( $combine ) {
+               # Combine the text of the readings
                my $joinstr = $combine_char;
                unless( defined $joinstr ) {
                        $joinstr = '' if $kept_obj->join_next || $del_obj->join_prior;
                        $joinstr = $self->wordsep unless defined $joinstr;
                }
                $kept_obj->alter_text( join( $joinstr, $kept_obj->text, $del_obj->text ) );
+               $kept_obj->normal_form( 
+                       join( $joinstr, $kept_obj->normal_form, $del_obj->normal_form ) );
+               # Combine the lexemes present in the readings
+               if( $kept_obj->has_lexemes && $del_obj->has_lexemes ) {
+                       $kept_obj->add_lexeme( $del_obj->lexemes );
+               }
        }
        $self->del_reading( $deleted );
 }
 
+=head2 compress_readings
+
+Where possible in the graph, compresses plain sequences of readings into a
+single reading. The sequences must consist of readings with no
+relationships to other readings, with only a single witness path between
+them and no other witness paths from either that would skip the other. The
+readings must also not be marked as nonsense or bad grammar.
+
+WARNING: This operation cannot be undone.
+
+=cut
+
+sub compress_readings {
+       my $self = shift;
+       # Anywhere in the graph that there is a reading that joins only to a single
+       # successor, and neither of these have any relationships, just join the two
+       # readings.
+       my %gobbled;
+       foreach my $rdg ( sort { $a->rank <=> $b->rank } $self->readings ) {
+               next if $rdg->is_meta;
+               next if $gobbled{$rdg->id};
+               next if $rdg->grammar_invalid || $rdg->is_nonsense;
+               next if $rdg->related_readings();
+               my %seen;
+               while( $self->sequence->successors( $rdg ) == 1 ) {
+                       my( $next ) = $self->reading( $self->sequence->successors( $rdg ) );
+                       throw( "Infinite loop" ) if $seen{$next->id};
+                       $seen{$next->id} = 1;
+                       last if $self->sequence->predecessors( $next ) > 1;
+                       last if $next->is_meta;
+                       last if $next->grammar_invalid || $next->is_nonsense;
+                       last if $next->related_readings();
+                       say "Joining readings $rdg and $next";
+                       $self->merge_readings( $rdg, $next, 1 );
+               }
+       }
+       # Make sure we haven't screwed anything up
+       foreach my $wit ( $self->tradition->witnesses ) {
+               my $pathtext = $self->path_text( $wit->sigil );
+               my $origtext = join( ' ', @{$wit->text} );
+               throw( "Text differs for witness " . $wit->sigil )
+                       unless $pathtext eq $origtext;
+               if( $wit->is_layered ) {
+                       $pathtext = $self->path_text( $wit->sigil.$self->ac_label );
+                       $origtext = join( ' ', @{$wit->layertext} );
+                       throw( "Ante-corr text differs for witness " . $wit->sigil )
+                               unless $pathtext eq $origtext;
+               }
+       }
+
+       $self->relations->rebuild_equivalence();
+       $self->calculate_ranks();
+}
 
 # Helper function for manipulating the graph.
 sub _stringify_args {
@@ -1227,7 +1288,7 @@ sub alignment_table {
     my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
     my @all_pos = ( 1 .. $self->end->rank - 1 );
     foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
-        # print STDERR "Making witness row(s) for " . $wit->sigil . "\n";
+        # say STDERR "Making witness row(s) for " . $wit->sigil;
         my @wit_path = $self->reading_sequence( $self->start, $self->end, $wit->sigil );
         my @row = _make_witness_row( \@wit_path, \@all_pos );
         push( @{$table->{'alignment'}}, 
@@ -1252,8 +1313,8 @@ sub _make_witness_row {
     foreach my $rdg ( @$path ) {
         my $rtext = $rdg->text;
         $rtext = '#LACUNA#' if $rdg->is_lacuna;
-        print STDERR "rank " . $rdg->rank . "\n" if $debug;
-        # print STDERR "No rank for " . $rdg->id . "\n" unless defined $rdg->rank;
+        say STDERR "rank " . $rdg->rank if $debug;
+        # say STDERR "No rank for " . $rdg->id unless defined $rdg->rank;
         $char_hash{$rdg->rank} = { 't' => $rdg };
     }
     my @row = map { $char_hash{$_} } @$positions;
@@ -1477,7 +1538,7 @@ Call make_witness_path for all witnesses in the tradition.
 sub make_witness_paths {
     my( $self ) = @_;
     foreach my $wit ( $self->tradition->witnesses ) {
-        # print STDERR "Making path for " . $wit->sigil . "\n";
+        # say STDERR "Making path for " . $wit->sigil;
         $self->make_witness_path( $wit );
     }
 }
@@ -1608,7 +1669,7 @@ sub flatten_ranks {
                                next;
                        }
             # Combine!
-               #print STDERR "Combining readings at same rank: $key\n";
+               #say STDERR "Combining readings at same rank: $key";
                $changed = 1;
             $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
             # TODO see if this now makes a common point.
@@ -1770,7 +1831,7 @@ sub _common_in_path {
        my @last_r2 = ( $r2 );
        # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' );
        my %all_seen;
-       # print STDERR "Finding common $dir for $r1, $r2\n";
+       # say STDERR "Finding common $dir for $r1, $r2";
        while( !@candidates ) {
                last unless $iter--;  # Avoid looping infinitely
                # Iterate separately down the graph from r1 and r2
@@ -1778,7 +1839,7 @@ sub _common_in_path {
                foreach my $lc ( @last_r1 ) {
                        foreach my $p ( $lc->$dir ) {
                                if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) {
-                                       # print STDERR "Path candidate $p from $lc\n";
+                                       # say STDERR "Path candidate $p from $lc";
                                        push( @candidates, $p );
                                } elsif( !$all_seen{$p->id} ) {
                                        $all_seen{$p->id} = 'r1';
@@ -1789,7 +1850,7 @@ sub _common_in_path {
                foreach my $lc ( @last_r2 ) {
                        foreach my $p ( $lc->$dir ) {
                                if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) {
-                                       # print STDERR "Path candidate $p from $lc\n";
+                                       # say STDERR "Path candidate $p from $lc";
                                        push( @candidates, $p );
                                } elsif( !$all_seen{$p->id} ) {
                                        $all_seen{$p->id} = 'r2';
index aeb8b7f..d3ca9bf 100644 (file)
@@ -196,18 +196,9 @@ sub create {
                }
        }
        
-       # Check to see if a nonlocal relationship is defined for the two readings
-       $rel = $self->scoped_relationship( $options->{'reading_a'}, 
-               $options->{'reading_b'} );
-       if( $rel && $rel->type eq $options->{'type'} ) {
-               return $rel;
-       } elsif( $rel ) {
-               throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
-       } else {
-               $rel = Text::Tradition::Collation::Relationship->new( $options );
-               $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
-               return $rel;
-       }
+       $rel = Text::Tradition::Collation::Relationship->new( $options );
+       $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
+       return $rel;
 }
 
 =head2 add_scoped_relationship( $rel )
@@ -435,8 +426,10 @@ sub add_relationship {
                        my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
                        if( $otherrel && $otherrel->type eq $options->{type}
                                && $otherrel->scope eq $options->{scope} ) {
-                               warn "Applying existing scoped relationship";
+                               warn "Applying existing scoped relationship for $rdga / $rdgb";
                                $relationship = $otherrel;
+                       } elsif( $otherrel ) {
+                               throw( "Conflicting scoped relationship for $rdga / $rdgb at $source / $target" );
                        }
        }
                $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
@@ -717,6 +710,9 @@ sub related_readings {
                # Backwards compat
                if( $filter eq 'colocated' ) {
                        $filter = sub { $_[0]->colocated };
+               } elsif( !ref( $filter ) ) {
+                       my $type = $filter;
+                       $filter = sub { $_[0]->type eq $type };
                }
                my %found = ( $reading => 1 );
                my $check = [ $reading ];
diff --git a/script/join_readings.pl b/script/join_readings.pl
new file mode 100755 (executable)
index 0000000..0b803a8
--- /dev/null
@@ -0,0 +1,59 @@
+#!/usr/bin/env perl
+
+use lib 'lib';
+use feature 'say';
+use strict;
+use warnings;
+use Getopt::Long;
+use Lingua::Features::Structure;
+use Text::Tradition::Directory;
+use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
+use TryCatch;
+
+binmode STDOUT, ':utf8';
+binmode STDERR, ':utf8';
+eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
+
+my( $dbuser, $dbpass );
+my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
+my $testrun;
+
+GetOptions( 
+       'dsn=s'    => \$dsn,
+       'u|user=s' => \$dbuser,
+       'p|pass=s' => \$dbpass,
+       'n|test'   => \$testrun,
+       );
+
+my $dbopts = { dsn => $dsn };
+$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
+$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
+
+my $dir = Text::Tradition::Directory->new( $dbopts );
+
+my $scope = $dir->new_scope();
+my $lookfor = $ARGV[0] || '';
+foreach my $tinfo ( $dir->traditionlist() ) {
+       next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
+       my $tradition = $dir->lookup( $tinfo->{'id'} );
+       my $c = $tradition->collation;
+
+       # Anywhere in the graph that there is a reading that joins only to a single
+       # successor, and neither of these have any relationships, just join the two
+       # readings.
+       
+       # Save/update the current path texts
+       foreach my $wit ( $tradition->witnesses ) {
+               my @pathtext = split( /\s+/, $c->path_text( $wit->sigil ) );
+               $wit->text( \@pathtext );
+               if( $wit->is_layered ) {
+                       my @layertext = split( /\s+/, $c->path_text( $wit->sigil.$c->ac_label ) );
+                       $wit->layertext( \@layertext );
+               }
+       }
+       
+       # Do the deed
+       $c->compress_readings();
+       # ...and save it.
+       $dir->save( $tradition );
+}
\ No newline at end of file
diff --git a/script/propagate_transitive.pl b/script/propagate_transitive.pl
new file mode 100755 (executable)
index 0000000..afe11d7
--- /dev/null
@@ -0,0 +1,125 @@
+#!/usr/bin/env perl
+
+use lib 'lib';
+use feature 'say';
+use strict;
+use warnings;
+use Getopt::Long;
+use Lingua::Features::Structure;
+use Text::Tradition::Directory;
+use XML::Easy::Syntax qw/ $xml10_name_rx $xml10_namestartchar_rx /;
+use TryCatch;
+
+binmode STDOUT, ':utf8';
+binmode STDERR, ':utf8';
+eval { no warnings; binmode $DB::OUT, ':utf8'; $DB::deep = 1000 };
+
+my %TYPEVALUES = (
+       orthographic => 1,
+       spelling => 2,
+       grammatical => 3,
+       lexical => 3,
+       collated => 50,
+       );
+
+my( $dbuser, $dbpass );
+my $dsn = 'dbi:SQLite:dbname=stemmaweb/db/traditions.db';
+my $testrun;
+
+GetOptions( 
+       'dsn=s'    => \$dsn,
+       'u|user=s' => \$dbuser,
+       'p|pass=s' => \$dbpass,
+       'n|test'   => \$testrun,
+       );
+
+my $dbopts = { dsn => $dsn };
+$dbopts->{extra_args}->{user} = $dbuser if $dbuser;
+$dbopts->{extra_args}->{password} = $dbpass if $dbpass;
+
+my $dir = Text::Tradition::Directory->new( $dbopts );
+
+my $scope = $dir->new_scope();
+my $lookfor = $ARGV[0] || '';
+foreach my $tinfo ( $dir->traditionlist() ) {
+       next unless $tinfo->{'name'} =~ /$lookfor/ || $tinfo->{'id'} eq $lookfor;
+       my $tradition = $dir->lookup( $tinfo->{'id'} );
+       my $c = $tradition->collation;
+
+       my $represented_by = {};
+       my $representative = {};
+       # For each set of ranked relationships, make all the implied links 
+       # explicit. Start with orthographic readings
+       push_rel_type( $c, 'orthographic', $representative, $represented_by );
+       # then move on to spelling readings
+       push_rel_type( $c, 'spelling', $representative, $represented_by );
+       
+       # Now all orth/spelling linked words are the same word for the purposes of
+       # other colocated links, and in our representation hashes.
+       # Go through the other relationships and propagate them to all words that are
+       # the same word.
+       foreach my $rel ( $c->relationships ) {
+               my $relobj = $c->get_relationship( $rel );
+               next unless $relobj->type =~ /^(grammatical|lexical)$/;
+               my $r1pool = $represented_by->{$representative->{$rel->[0]}};
+               my $r2pool = $represented_by->{$representative->{$rel->[1]}};
+               # Error check
+               if( check_distinct( $r1pool, $r2pool ) ) {
+                       map { propagate_rel( $c, $relobj->type, $_, @$r2pool ) } @$r1pool;
+               } else {
+                       warn "Pools not distinct for " . join( ' and ', @$rel );
+               }
+       }
+       $dir->save( $tradition ) unless $testrun;
+}
+
+sub propagate_rel {
+       my( $c, $type, @list ) = @_;
+       my $curr = shift @list;
+       while( @list ) {
+               foreach my $r ( @list ) {
+                       next if $curr eq $r;
+                       my $hasrel = $c->get_relationship( $curr, $r );
+                       if( !$hasrel || $hasrel->type eq 'collated' ) {
+                               say STDERR "Propagating $type relationship $curr -> $r";
+                               $c->add_relationship( $curr, $r, { type => $type } );
+                       } elsif( $hasrel->type ne $type ) {
+                               warn "Found relationship conflict at $curr / $r: "
+                                       . $hasrel->type . " instead of $type"
+                                       unless( $TYPEVALUES{$hasrel->type} < $TYPEVALUES{$type} );
+                       }
+               }
+               $curr = shift @list;
+       }
+}
+
+sub push_rel_type {
+       my( $c, $type, $r2rep, $rep2r ) = @_;
+       my %handled;
+       foreach my $rdg ( $c->readings ) {
+               next if $rdg->is_meta;
+               next if $handled{"$rdg"};
+               if( exists $r2rep->{"$rdg"} ) {
+                       $rdg = $r2rep->{"$rdg"};
+               }
+               # Get the specified relationships
+               my @set = $rdg->related_readings( sub {
+                       $_[0]->colocated && ( $_[0]->type eq $type ||
+                       $TYPEVALUES{$_[0]->type} < $TYPEVALUES{$type} ) } );
+               push( @set, $rdg );
+               propagate_rel( $c, $type, @set ) if @set > 2;
+               # Set up the representatives
+               map { $r2rep->{"$_"} = $rdg } @set;
+               $rep2r->{"$rdg"} = \@set;
+               map { $handled{"$_"} = 1 } @set;
+       }
+}
+
+sub check_distinct {
+       my( $l1, $l2 ) = @_;
+       my %seen;
+       map { $seen{"$_"} = 1 } @$l1;
+       map { return 0 if $seen{"$_"} } @$l2;
+       return 1;
+}
+       
index e556e32..e7eee1b 100644 (file)
@@ -39,8 +39,25 @@ sub index :Path :Args(1) {
                $c->stash->{graphdot} = $stemma->editable({ linesep => ' ' });
                $c->stash->{text_title} = $tradition->name;
                $c->stash->{template} = 'stexaminer.tt'; 
+               
+               # Get the analysis options
+               my( $use_type1, $ignore_sort ) = ( 0, 'none' );
+               if( $c->req->method eq 'POST' ) {
+                       $use_type1 = $c->req->param( 'show_type1' ) eq 'on' ? 1 : 0;
+                       $ignore_sort = $c->req->param( 'ignore_variant' );
+               }
+               $c->stash->{'show_type1'} = $use_type1;
+               $c->stash->{'ignore_variant'} = $ignore_sort;
                # TODO Run the analysis as AJAX from the loaded page.
-               my $t = run_analysis( $tradition, 'exclude_type1' => 1 );
+               my %analysis_options;
+               $analysis_options{'exclude_type1'} = !$use_type1;
+               if( $ignore_sort eq 'spelling' ) {
+                       $analysis_options{'collapse'} = [ qw/ spelling orthographic / ];
+               } elsif( $ignore_sort eq 'orthographic' ) {
+                       $analysis_options{'collapse'} = 'orthographic';
+               }
+                       
+               my $t = run_analysis( $tradition, %analysis_options );
                # Stringify the reading groups
                foreach my $loc ( @{$t->{'variants'}} ) {
                        my $mst = wit_stringify( $loc->{'missing'} );
index 760fbb2..5bf6401 100644 (file)
@@ -1,5 +1,22 @@
+#options {
+       position: relative;
+       border: 1px #c6dcf1 solid;
+       margin-left: 20px;
+       margin-bottom: 20px;
+       padding: 10px;
+       width: 500px;
+}
+.optionformelement {
+       float: left;
+       padding-left: 15px;
+}
+#options_button {
+       position: absolute;
+       bottom: 20px;
+       right: 20px;
+}
 #variants_table {
-    float: left;
+    clear: both;
     width: 90%;
     height: 190px;
     border: 1px #c6dcf1 solid;
 .cellb7 {
     border-right: 20px solid #ffd5e5;
 }
+
+/* Clearfix hack to make div container height work */
+.clearfix:after {
+       content: ".";
+       display: block;
+       clear: both;
+       visibility: hidden;
+       line-height: 0;
+       height: 0;
+}
+.clearfix {
+       display: inline-block;
+}
+html[xmlns] .clearfix {
+       display: block;
+}
+* html .clearfix {
+       height: 1%;
+}
\ No newline at end of file
index 25f1b32..de72766 100644 (file)
@@ -10,6 +10,22 @@ var graphdot = '[% graphdot %]';
 [% END -%]
     <h1>Stexaminer</h1>
     <h2>[% text_title %]</h2>
+    <div id="options">
+       <h3>Analysis options:</h3>
+       <form id="use_variants_form" name="use_variants_form" class="clearfix" method="POST">
+               <div class="optionformelement">
+                       <input type="radio" name="ignore_variant" value="none" [% 'checked="true"' IF ignore_variant == 'none' %]>Analyze all variation</input><br/>
+                       <input type="radio" name="ignore_variant" value="orthographic" [% 'checked="true"' IF ignore_variant == 'orthographic' %]>Ignore orthographic variation</input><br/>
+                       <input type="radio" name="ignore_variant" value="spelling" [% 'checked="true"' IF ignore_variant == 'spelling' %]>Ignore orthographic and spelling variation</input>
+               </div>
+               <div class="optionformelement">
+                       <input type="checkbox" name="show_type1" [% 'checked="true"' IF show_type1 %]>Include type-1 variation</input>
+               </div>
+               <div id="options_button" class="button optionformelement" onclick="$('#use_variants_form').submit()">
+                       <span>Re-analyze</span>
+               </div>
+       </form>
+    </div>
     <div id="variants_table">
       <table>
 [% FOREACH row IN variants -%]