From: Tara L Andrews Date: Mon, 6 Jun 2011 22:13:44 +0000 (+0200) Subject: new position logic for the lemmatizer and toggler; still need non-linear positions X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=4cdd82f11ff3566dcb09b89aa7bc3ba908a5e677;p=scpubgit%2Fstemmatology.git new position logic for the lemmatizer and toggler; still need non-linear positions --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index 3d43a84..aa0680a 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -3,6 +3,7 @@ package Text::Tradition::Collation; use Graph::Easy; use IPC::Run qw( run binary ); use Text::Tradition::Collation::Path; +use Text::Tradition::Collation::Position; use Text::Tradition::Collation::Reading; use Text::Tradition::Collation::Relationship; use Text::Tradition::Collation::Segment; @@ -195,7 +196,8 @@ sub add_segment { } ## Dealing with relationships between readings. This is a different -## sort of graph edge. +## sort of graph edge. Return a success/failure value and a list of +## node pairs that have been linked. sub add_relationship { my( $self, $source, $target, $options ) = @_; @@ -206,30 +208,46 @@ sub add_relationship { unless ref( $source ) && $source->isa( 'Graph::Easy::Node' ); $target = $self->reading( $target ) unless ref( $target ) && $target->isa( 'Graph::Easy::Node' ); - foreach my $rel ( $source->edges_to( $target ) ) { - if( $rel->label eq $options->{'type'} && $rel->class eq 'edge.relationship' ) { - return; + foreach my $rel ( $source->edges_to( $target ), $target->edges_to( $source ) ) { + if( $rel->class eq 'edge.relationship' ) { + return ( undef, "Relationship already exists between these readings" ); + } else { + return ( undef, "There is a witness path between these readings" ); + } + } + + if( $source->has_position && $target->has_position ) { + unless( grep { $_ eq $target } $self->same_position_as( $source ) ) { + return( undef, "Cannot set relationship at different positions" ); } } - $options->{'orig_relation'} = [ $source, $target ]; + my @joined = ( [ $source->name, $target->name ] ); # Keep track of the nodes we join. + + $options->{'this_relation'} = [ $source, $target ]; my $rel = Text::Tradition::Collation::Relationship->new( %$options ); $self->graph->add_edge( $source, $target, $rel ); if( $options->{'global'} ) { # Look for all readings with the source label, and if there are # colocated readings with the target label, join them too. - foreach my $r ( $self->readings() ) { - next unless $r->label eq $source->label; + foreach my $r ( grep { $_->label eq $source->label } $self->readings() ) { + next if $r->name eq $source->name; my @colocated = grep { $_->label eq $target->label } $self->same_position_as( $r ); if( @colocated ) { warn "Multiple readings with same label at same position!" if @colocated > 1; + my $colo = $colocated[0]; + next if $colo->edges_to( $r ) || $r->edges_to( $colo ); + $options->{'primary_relation'} = $options->{'this_relation'}; + $options->{'this_relation'} = [ $r, $colocated[0] ]; my $dup_rel = Text::Tradition::Collation::Relationship->new( %$options ); $self->graph->add_edge( $r, $colocated[0], $dup_rel ); + push( @joined, [ $r->name, $colocated[0]->name ] ); } } } + return( 1, @joined ); } =head2 Output method(s) @@ -751,7 +769,7 @@ sub make_witness_path { sub common_readings { my $self = shift; my @common = grep { $_->is_common } $self->readings(); - return sort { _cmp_position( $a->position, $b->position ) } @common; + return sort { $a->position->cmp_with( $b->position ) } @common; } # Calculate the relative positions of nodes in the graph, if they @@ -759,103 +777,102 @@ sub common_readings { sub calculate_positions { my( $self, @ordered_common ) = @_; - # We have to calculate the position identifiers for each word, - # keyed on the common nodes. This will be 'fun'. The end result - # is a hash per witness, whose key is the word node and whose - # value is its position in the text. Common nodes are always N,1 - # so have identical positions in each text. - - my $node_pos = {}; - foreach my $wit ( @{$self->tradition->witnesses} ) { - print STDERR "Calculating positions in " . $wit->sigil . "\n"; - _update_positions_from_path( $wit->path, @ordered_common ); - _update_positions_from_path( $wit->uncorrected_path, @ordered_common ) - if $wit->has_ante_corr; - } - - # DEBUG - foreach my $r ( $self->readings() ) { - print STDERR "Reading " . $r->name . "/" . $r->label . " has no position\n" - unless( $r->has_position ); + # First assign positions to all the common nodes. + my $l = 1; + foreach my $oc ( @ordered_common ) { + $oc->position( $l++, 1 ); } - $self->init_lemmata(); -} - -sub _update_positions_from_path { - my( $path, @ordered_common ) = @_; - - # First we walk the given path, making a matrix for the witness - # that corresponds to its eventual position identifier. Common - # nodes always start a new row, and are thus always in the first - # column. - my $wit_matrix = []; - my $cn = 0; # We should hit the common readings in order. - my $row = []; - foreach my $wn ( @{$path} ) { - if( $wn eq $ordered_common[$cn] ) { - # Set up to look for the next common node, and - # start a new row of words. - $cn++; - push( @$wit_matrix, $row ) if scalar( @$row ); - $row = []; - } - push( @$row, $wn ); - } - push( @$wit_matrix, $row ); # Push the last row onto the matrix - - # Now we have a matrix per witness, so that each row in the - # matrix begins with a common node, and continues with all the - # variant words that appear in the witness. We turn this into - # real positions in row,cell format. But we need some - # trickery in order to make sure that each node gets assigned - # to only one position. - - foreach my $li ( 1..scalar(@$wit_matrix) ) { - foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) { - my $reading = $wit_matrix->[$li-1]->[$di-1]; - my $position = "$li,$di"; - - # If we have seen this node before, we need to compare - # its position with what went before. - unless( $reading->has_position && - _cmp_position( $position, $reading->position ) < 1 ) { - # The new position ID replaces the old one. - $reading->position( $position ); - } # otherwise, the old position needs to stay. + if( $self->linear ) { + # For the space between each common node, we have to find all the chains + # from all the witnesses. The longest chain gives us our max, and the + # others get min/max ranges to fit. + my $first = shift @ordered_common; + while( @ordered_common ) { + my %paths; + my $next = shift @ordered_common; + my $longest = 0; + foreach my $wit ( @{$self->tradition->witnesses} ) { + # Key to the path is not important; we just have to get + # all unique paths. + my $length = $self->_track_paths( \%paths, $first, $next, $wit->sigil ); + $longest = $length unless $longest > $length; + if( $wit->has_ante_corr ) { + my $length = $self->_track_paths( \%paths, $first, $next, + $wit->sigil.$self->ac_label, $wit->sigil ); + $longest = $length unless $longest > $length; + } + } + + # Transform the path values from unique strings to arrays. + foreach my $k ( keys %paths ) { + my @v = split( /\s+/, $paths{$k} ); + $paths{$k} = \@v; + } + + # Now %paths has all the unique paths, and we know how long the + # longest of these is. Assign positions, starting with the + # longest. All non-common positions start at 2. + foreach my $path ( sort { scalar @$b <=> scalar @$a } values %paths ) { + my $range = $longest - scalar @$path; + foreach my $i ( 0 .. $#{$path} ) { + my $min = $i+2; + my $rdg = $self->reading( $path->[$i] ); + unless( $rdg->has_position ) { + $rdg->position( $first->position->common, $min, $min+$range ); + } + } + } + + $first = $next; } - } -} + } else { + + # Non-linear positions are pretty much impossible to pin down. + # Any reading might appear anywhere in the graph. I guess we + # can do positions where there aren't transpositions... -sub _cmp_position { - my( $a, $b ) = @_; - if ( $a && $b ) { - my @pos_a = split(/,/, $a ); - my @pos_b = split(/,/, $b ); - - my $big_cmp = $pos_a[0] <=> $pos_b[0]; - return $big_cmp if $big_cmp; - # else - return $pos_a[1] <=> $pos_b[1]; - } elsif ( $b ) { # a is undefined - return -1; - } elsif ( $a ) { # b is undefined - return 1; } - return 0; # they are both undefined + + $self->init_lemmata(); } -sub all_positions { +# Helper function for the guts of calculate_positions. +sub _track_paths { + my $self = shift; + my $track_hash = shift; + # Args are first, last, wit, backup + my @path = $self->reading_sequence( @_ ); + # Top and tail the array + shift @path; + pop @path; + $track_hash->{$_[2]} = join( ' ', map { $_->name } @path ) + if @path; + return @path; +} + +sub possible_positions { my $self = shift; + my @answer; my %positions = (); - map { $positions{$_->position} = 1 } $self->readings; - my @answer = sort { _cmp_position( $a, $b ) } keys( %positions ); + foreach my $r ( $self->readings ) { + next unless $r->has_position; + $positions{$r->position->maxref} = 1; + } + @answer = keys %positions; return @answer; } +# TODO think about indexing this. sub readings_at_position { - my( $self, $pos ) = @_; - my @answer = grep { $_->position eq $pos } $self->readings; + my( $self, $position, $strict ) = @_; + unless( ref( $position ) eq 'Text::Tradition::Collation::Position' ) { + $position = Text::Tradition::Collation::Position->new( $position ); + } + my @answer; + foreach my $r ( $self->readings ) { + push( @answer, $r ) if $r->is_at_position( $position, $strict ); + } return @answer; } @@ -863,13 +880,13 @@ sub readings_at_position { sub init_lemmata { my $self = shift; - - foreach my $position ( $self->all_positions ) { + + foreach my $position ( $self->possible_positions ) { $self->lemmata->{$position} = undef; } foreach my $cr ( $self->common_readings ) { - $self->lemmata->{$cr->position} = $cr->name; + $self->lemmata->{$cr->position->maxref} = $cr->name; } } @@ -906,16 +923,25 @@ sub lemma_readings { # First get the positions of those nodes which have been # toggled off. my $positions_off = {}; - map { $positions_off->{ $_->position } = $_->name } @toggled_off_nodes; + map { $positions_off->{ $_->position->reference } = $_->name } + @toggled_off_nodes; # Now for each position, we have to see if a node is on, and we - # have to see if a node has been turned off. + # have to see if a node has been turned off. The lemmata hash + # should contain fixed positions, range positions whose node was + # just turned off, and range positions whose node is on. my @answer; - foreach my $pos ( $self->all_positions() ) { + my %fixed_positions; + # TODO One of these is probably redundant. + map { $fixed_positions{$_} = 0 } keys %{$self->lemmata}; + map { $fixed_positions{$_} = 0 } keys %{$positions_off}; + map { $fixed_positions{$_} = 1 } $self->possible_positions; + foreach my $pos ( sort { Text::Tradition::Collation::Position::str_cmp( $a, $b ) } keys %fixed_positions ) { # Find the state of this position. If there is an active node, # its name will be the state; otherwise the state will be 0 # (nothing at this position) or undef (ellipsis at this position) - my $active = $self->lemmata->{$pos}; + my $active = undef; + $active = $self->lemmata->{$pos} if exists $self->lemmata->{$pos}; # Is there a formerly active node that was toggled off? if( exists( $positions_off->{$pos} ) ) { @@ -923,6 +949,10 @@ sub lemma_readings { if( $active && $active ne $off_node) { push( @answer, [ $off_node, 0 ], [ $active, 1 ] ); } else { + unless( $fixed_positions{$pos} ) { + $active = 0; + delete $self->lemmata->{$pos}; + } push( @answer, [ $off_node, $active ] ); } @@ -936,9 +966,10 @@ sub lemma_readings { # at that position. my @pos_nodes = $self->readings_at_position( $pos ); push( @answer, [ $pos_nodes[0]->name, $self->lemmata->{$pos} ] ); + delete $self->lemmata->{$pos} unless $fixed_positions{$pos}; } } - + return @answer; } @@ -963,37 +994,44 @@ sub toggle_reading { } my $pos = $reading->position; - my $old_state = $self->lemmata->{$pos}; + my $fixed = $reading->position->fixed; + my $old_state = $self->lemmata->{$pos->reference}; + my @readings_off; if( $old_state && $old_state eq $rname ) { # Turn off the node. We turn on no others by default. push( @readings_off, $reading ); } else { # Turn on the node. - $self->lemmata->{$pos} = $rname; - # Any other 'on' readings in the same position should be off. - push( @readings_off, $self->same_position_as( $reading ) ); + $self->lemmata->{$pos->reference} = $rname; + # Any other 'on' readings in the same position should be off + # if we have a fixed position. + push( @readings_off, $self->same_position_as( $reading, 1 ) ) + if $pos->fixed; # Any node that is an identical transposed one should be off. push( @readings_off, $reading->identical_readings ); } @readings_off = unique_list( @readings_off ); - + # Turn off the readings that need to be turned off. my @readings_delemmatized; foreach my $n ( @readings_off ) { - my $state = $self->lemmata->{$n->position}; + my $npos = $n->position; + my $state = undef; + $state = $self->lemmata->{$npos->reference} + if defined $self->lemmata->{$npos->reference}; if( $state && $state eq $n->name ) { # this reading is still on, so turn it off push( @readings_delemmatized, $n ); my $new_state = undef; - if( $n eq $reading ) { + if( $npos->fixed && $n eq $reading ) { # This is the reading that was clicked, so if there are no - # other readings there, turn off the position. In all other - # cases, restore the ellipsis. - my @other_n = $self->same_position_as( $n ); + # other readings there and this is a fixed position, turn off + # the position. In all other cases, restore the ellipsis. + my @other_n = $self->same_position_as( $n ); # TODO do we need strict? $new_state = 0 unless @other_n; } - $self->lemmata->{$n->position} = $new_state; + $self->lemmata->{$npos->reference} = $new_state; } elsif( $old_state && $old_state eq $n->name ) { # another reading has already been turned on here push( @readings_delemmatized, $n ); @@ -1003,9 +1041,15 @@ sub toggle_reading { } sub same_position_as { - my( $self, $reading ) = @_; + my( $self, $reading, $strict ) = @_; my $pos = $reading->position; - my @same = grep { $_ ne $reading } $self->readings_at_position( $reading->position ); + my %onpath = ( $reading->name => 1 ); + # TODO This might not always be sufficient. We really want to + # exclude all readings on this one's path between its two + # common points. + map { $onpath{$_->name} = 1 } $reading->neighbor_readings; + my @same = grep { !$onpath{$_->name} } + $self->readings_at_position( $reading->position, $strict ); return @same; } diff --git a/lib/Text/Tradition/Collation/Position.pm b/lib/Text/Tradition/Collation/Position.pm index f180046..f226e2f 100644 --- a/lib/Text/Tradition/Collation/Position.pm +++ b/lib/Text/Tradition/Collation/Position.pm @@ -20,6 +20,13 @@ has 'max' => ( required => 1, ); +# This gets set if we are tracking a more specifically-positioned +# reading. +has 'matched' => ( + is => 'rw', + isa => 'Bool', + ); + around BUILDARGS => sub { my $orig = shift; my $class = shift; diff --git a/lib/Text/Tradition/Collation/Reading.pm b/lib/Text/Tradition/Collation/Reading.pm index e16818c..a2fff66 100644 --- a/lib/Text/Tradition/Collation/Reading.pm +++ b/lib/Text/Tradition/Collation/Reading.pm @@ -1,20 +1,14 @@ package Text::Tradition::Collation::Reading; use Moose; -use Moose::Util::TypeConstraints; use MooseX::NonMoose; -use Text::Tradition::Collation::Relationship; +use Text::Tradition::Collation::Position; extends 'Graph::Easy::Node'; -subtype 'Position' - => as 'Str', - => where { $_ =~ /^\d+\,\d+$/ }, - message { 'Position must be of the form x,y' }; - has 'position' => ( is => 'rw', - isa => 'Position', + isa => 'Text::Tradition::Collation::Position', predicate => 'has_position', ); @@ -39,6 +33,20 @@ around BUILDARGS => sub { } }; +# Take constructor args as well as a Position argument. +around position => sub { + my $orig = shift; + my $self = shift; + return $self->$orig() unless @_; + + my @args = @_; + unless( @_ == 1 && ref( $_[0] ) eq 'Text::Tradition::Collation::Position' ) { + # We have constructor arguments; pass them to Position. + @args = ( Text::Tradition::Collation::Position->new( @_ ) ); + } + $self->$orig( @args ); +}; + # Initialize the identity pool. sub BUILD { my( $self, $args ) = @_; @@ -109,6 +117,94 @@ sub primary { return $self->same_as->[0]; } +sub is_at_position { + my $self = shift; + return undef unless $self->has_position; + return $self->position->is_colocated( @_ ); +} + +# Returns all readings that adjoin this one on any path. +sub neighbor_readings { + my( $self, $direction ) = @_; + $direction = 'both' unless $direction; + my @paths = grep { $_->isa( 'Text::Tradition::Collation::Path' ) } $self->edges; + my %connected; + foreach my $p ( @paths ) { + if( $p->to eq $self ) { + next if $direction eq 'forward'; + $connected{$p->from->name} = $p->from; + } else { # $p->from eq $self + next if $direction =~ /^back/; + $connected{$p->to->name} = $p->to; + } + } + return values( %connected ); +} + +sub adjust_neighbor_position { + my $self = shift; + return unless $self->position->fixed; + + # TODO This is a naive and repetitive implementation and + # I don't like it. + foreach my $neighbor ( $self->neighbor_readings('forward') ) { + next unless !$neighbor->is_common && + $neighbor->position->common == $self->position->common; + if( $neighbor->position->fixed && + $neighbor->position->min == $self->position->min ) { + warn sprintf( "Readings %s and %s are at the same position!", + $neighbor->name, $self->name ); + } + next if $neighbor->position->fixed || $neighbor->position->matched; + $neighbor->position->min( $self->position->min + 1 ); + # Recurse if necessary. + $neighbor->adjust_neighbor_position() + unless $neighbor->position->fixed; + } + foreach my $neighbor ( $self->neighbor_readings('back') ) { + next unless !$neighbor->is_common && + $neighbor->position->common == $self->position->common; + if( $neighbor->position->fixed && + $neighbor->position->min == $self->position->min ) { + warn sprintf( "Readings %s and %s are at the same position!", + $neighbor->name, $self->name ); + } + next if $neighbor->position->fixed || $neighbor->position->matched; + $neighbor->position->max( $self->position->max - 1 ); + # Recurse if necessary. + $neighbor->adjust_neighbor_position() + unless $neighbor->position->fixed; + } + return; +} + +sub match_position { + my( $self, $other ) = @_; + $DB::single = 1; + # Adjust the position of both these nodes to be as restrictive as possible. + unless( $self->position->is_colocated( $other->position ) ) { + warn "Cannot match positions of non-colocated readings"; + return; + } + my $sp = $self->position; + my $op = $other->position; + my $newmin = $sp->min > $op->min ? $sp->min : $op->min; + my $newmax = $sp->max < $op->max ? $sp->max : $op->max; + my $newpos = Text::Tradition::Collation::Position->new( + 'common' => $sp->common, + 'min' => $newmin, + 'max' => $newmax, + 'matched' => 1, + ); + # We are setting the positions to be the same object. I don't + # think that actually matters. We may eventually want unique + # objects for each position. + $self->position( $newpos ); + $other->position( $newpos ); + $self->adjust_neighbor_position(); + $other->adjust_neighbor_position(); +} + ## Keep track of which readings are unchanged across witnesses. sub is_common { diff --git a/lib/Text/Tradition/Collation/Relationship.pm b/lib/Text/Tradition/Collation/Relationship.pm index 49ac1db..4efab5e 100644 --- a/lib/Text/Tradition/Collation/Relationship.pm +++ b/lib/Text/Tradition/Collation/Relationship.pm @@ -27,19 +27,19 @@ subtype 'RelationshipTokenVector', no Moose::Util::TypeConstraints; ## see comment above -has 'sort' => ( +has 'type' => ( is => 'rw', isa => 'RelationshipType', required => 1, ); -has 'orig_relation' => ( +has 'this_relation' => ( is => 'rw', isa => 'RelationshipVector', required => 1, ); -has 'related_readings' => ( +has 'primary_relation' => ( is => 'rw', isa => 'RelationshipTokenVector', ); @@ -64,10 +64,10 @@ sub FOREIGNBUILDARGS { my $class = shift; my %args = @_; - # Make the label match our 'sort' attribute. + # Make the label match our 'type' attribute. my @superclass_args; - if( exists $args{'sort'} ) { - push( @superclass_args, 'label', $args{'sort'} ); + if( exists $args{'type'} ) { + push( @superclass_args, 'label', $args{'type'} ); } return @superclass_args; } @@ -77,14 +77,14 @@ sub BUILD { $self->set_attribute( 'class', 'relationship' ); - my( $source, $target ) = @{$self->orig_relation}; - if( $source->has_position && $target->has_position - && $source->position ne $target->position ) { - die "Cannot set relationship between readings in different positions"; + my( $source, $target ) = @{$self->this_relation}; + if( $source->has_position && $target->has_position ) { + # Harmonize the positions. + $source->match_position( $target ); } - unless( $self->related_readings ) { - $self->related_readings( [ $self->orig_relation->[0]->label, - $self->orig_relation->[1]->label ] ); + unless( $self->primary_relation ) { + $self->primary_relation( [ $self->this_relation->[0]->label, + $self->this_relation->[1]->label ] ); } } diff --git a/lib/Text/Tradition/Parser/BaseText.pm b/lib/Text/Tradition/Parser/BaseText.pm index ad963cf..eedaed9 100644 --- a/lib/Text/Tradition/Parser/BaseText.pm +++ b/lib/Text/Tradition/Parser/BaseText.pm @@ -285,28 +285,38 @@ sub merge_base { ### HACKY HACKY Do some one-off path corrections here. if( $collation->linear ) { - # What? + my $c = $collation; + my $end = $SHORTEND ? $SHORTEND : 155; + my $path = $c->tradition->witness('Vb11')->path; + if( $end > 16 ) { + $c->merge_readings( $c->reading('rdg_1/16.3.0'), $c->reading('rdg_1/16.2.1') ); + splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( 'rdg_1/16.2.2' ) ); + } + # What else? } else { my $c = $collation; + my $end = $SHORTEND ? $SHORTEND : 155; # Vb5: my $path = $c->tradition->witness('Vb5')->path; - splice( @$path, 1436, 0, $c->reading('106,14') ); + splice( @$path, 1436, 0, $c->reading('106,14') ) if $end > 106; # Vb11: $path = $c->tradition->witness('Vb11')->path; - $c->merge_readings( $c->reading('rdg_1/16.3.0'), $c->reading('rdg_1/16.2.1') ); - splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( '16,1' ) ); + if( $end > 16 ) { + $c->merge_readings( $c->reading('rdg_1/16.3.0'), $c->reading('rdg_1/16.2.1') ); + splice( @$path, 209, 2, $c->reading( 'rdg_1/16.3.0' ), $c->reading( '16,1' ) ); + } # Vb12 a.c.: $path = $c->tradition->witness('Vb12')->uncorrected_path; - splice( @$path, 1828, 1, $c->reading('rdg_2/137.5.0') ); + splice( @$path, 1828, 1, $c->reading('rdg_2/137.5.0') ) if $end > 137; # Vb13: $path = $c->tradition->witness('Vb13')->path; - splice( @$path, 782, 0, $c->reading( '58,5' ) ); + splice( @$path, 782, 0, $c->reading( '58,5' ) ) if $end > 58; # Vb20 a.c.: $path = $c->tradition->witness('Vb20')->uncorrected_path; - splice( @$path, 1251, 1, $c->reading( '94,6' ) ); + splice( @$path, 1251, 1, $c->reading( '94,6' ) ) if $end > 94; # Vb26: $path = $c->tradition->witness('Vb26')->path; - splice( @$path, 618, 0, $c->reading('46,2') ) + splice( @$path, 618, 0, $c->reading('46,2') ) if $end > 46; } # Now walk paths and calculate positions. @@ -518,7 +528,7 @@ sub set_relationships { # Transposition or repetition: look for nodes with the # same label but different IDs and mark them. $type = 'repetition' if $type =~ /^rep/i; - $rel_options{'sort'} = $type; + $rel_options{'type'} = $type; my %labels; foreach my $r ( @$lemma ) { $labels{cmp_str( $r )} = $r; @@ -545,7 +555,7 @@ sub set_relationships { $type = 'spelling' if $type =~ /sp/i; $type = 'repetition' if $type =~ /rep/i; $type = 'lexical' if $type =~ /lex/i; - $rel_options{'sort'} = $type; + $rel_options{'type'} = $type; if( @$lemma == @$var ) { foreach my $i ( 0 .. $#{$lemma} ) { $collation->add_relationship( $var->[$i], $lemma->[$i], diff --git a/t/graph.t b/t/graph.t index b6844f9..ff6f0e7 100644 --- a/t/graph.t +++ b/t/graph.t @@ -119,7 +119,6 @@ $string = '# when ... ... ... showers sweet with ... fruit ... ... of ... has pi is( make_text( @active_nodes ), $string, "Got the right text" ); # Test the toggling effects of transposition - @off = $collation->toggle_reading( 'n14' ); # Add the turned on node $expected_nodes[ 11 ] = [ "n14", 1 ]; @@ -210,11 +209,11 @@ my %expected_colocations = ( 'n18' => [ 'n17' ], # drought -> march 'n17' => [ 'n18' ], # march -> drought 'n15' => [ 'n14' ], # march -> drought - 'n21' => [ 'n9', 'n22' ], # unto -> to, teh - 'n22' => [ 'n9', 'n21' ], # to -> unto, teh + 'n21' => [ 'n22', 'n9' ], # unto -> to, teh + 'n22' => [ 'n21', 'n9' ], # to -> unto, teh 'n9' => [ 'n21', 'n22', 'n23' ], # teh -> unto, to, the - 'n23' => [ 'n9', 'n25' ], # the -> teh, rood - 'n25' => [ 'n9', 'n26' ], # rood -> the, root + 'n23' => [ 'n25', 'n9' ], # the -> teh, rood + 'n25' => [ 'n23', 'n26' ], # rood -> the, root 'n26' => [ 'n25' ], # root -> rood ); @@ -224,4 +223,80 @@ foreach my $n ( keys %expected_colocations ) { is_deeply( \@colocated, $expected_colocations{$n}, "Colocated nodes for $n correct" ); } +# Test strict colocations +$expected_colocations{'n9'} = []; +$expected_colocations{'n21'} = ['n22']; +$expected_colocations{'n22'} = ['n21']; +$expected_colocations{'n23'} = []; +$expected_colocations{'n25'} = []; +$expected_colocations{'n26'} = []; + +foreach my $n ( keys %expected_colocations ) { + my $nr = $collation->reading( $n ); + my @colocated = sort( map { $_->name } $collation->same_position_as( $nr, 1 ) ); + is_deeply( \@colocated, $expected_colocations{$n}, "Strictly colocated nodes for $n correct" ); +} + +# Test turning on, then off, an annoyingly overlapping node + +@off = $collation->toggle_reading( 'n9' ); +# Remove the old toggle-off +splice( @expected_nodes, 16, 1 ); +splice( @expected_nodes, 17, 0, [ "n9", 1 ] ); +@active_nodes = $collation->lemma_readings( @off ); +subtest 'Turned on a node without fixed position' => \&compare_active; +$string = '# when ... ... showers sweet with ... fruit ... march of ... has pierced unto teh ... ... #'; +is( make_text( @active_nodes ), $string, "Got the right text" ); + +@off = $collation->toggle_reading( 'n23' ); +splice( @expected_nodes, 18, 1, [ "n23", 1 ] ); +@active_nodes = $collation->lemma_readings( @off ); +subtest 'Turned on a node colocated to one without fixed position' => \&compare_active; +$string = '# when ... ... showers sweet with ... fruit ... march of ... has pierced unto teh the ... #'; +is( make_text( @active_nodes ), $string, "Got the right text" ); + +@off = $collation->toggle_reading( 'n9' ); +splice( @expected_nodes, 17, 1, [ "n9", 0 ] ); +@active_nodes = $collation->lemma_readings( @off ); +subtest 'Turned on a node colocated to one without fixed position' => \&compare_active; +$string = '# when ... ... showers sweet with ... fruit ... march of ... has pierced unto the ... #'; +is( make_text( @active_nodes ), $string, "Got the right text" ); + +### Now test relationship madness. + +my( $result, @relations ) = $collation->add_relationship( 'n25', 'n23', {'type' => 'lexical'} ); # rood -> the +ok( $result, "Added relationship between nodes" ); +is( scalar @relations, 1, "Returned only the one collapse" ); +is_deeply( $relations[0], [ 'n25', 'n23' ], "Returned the correct collapse" ); +is( $collation->reading( 'n25' )->position->reference, '9,3', "Harmonized position for n25 correct" ); +is( $collation->reading( 'n23' )->position->reference, '9,3', "Harmonized position for n23 correct" ); +is( $collation->reading( 'n9' )->position->reference, '9,2', "Adjusted position for n9 correct" ); + +# Do some yucky hardcoded cleanup to undo this relationship. +$collation->reading('n25')->position->max( 4 ); +$collation->reading('n9')->position->max( 3 ); +$collation->graph->del_edge( $collation->reading('n25')->edges_to( $collation->reading('n23')) ); + +( $result, @relations ) = $collation->add_relationship( 'n26', 'n25', {'type' => 'spelling'} ); # root -> rood +ok( $result, "Added relationship between nodes" ); +is( scalar @relations, 1, "Returned only the one collapse" ); +is_deeply( $relations[0], [ 'n26', 'n25' ], "Returned the correct collapse" ); +is( $collation->reading( 'n26' )->position->reference, '9,4', "Harmonized position for n26 correct" ); +is( $collation->reading( 'n25' )->position->reference, '9,4', "Harmonized position for n25 correct" ); +is( $collation->reading( 'n9' )->position->reference, '9,2-3', "Adjusted position for n9 correct" ); + +( $result, @relations ) = $collation->add_relationship( 'n15', 'n9', {'type' => 'lexical'} ); # bogus march -> teh +ok( !$result, "Refused to add skewed relationship: " . $relations[0] ); + +( $result, @relations ) = $collation->add_relationship( 'n25', 'n26', {'type' => 'spelling'} ); # root -> rood +ok( !$result, "Refused to add dupe relationship: " . $relations[0] ); + +( $result, @relations ) = $collation->add_relationship( 'n8', 'n13', {'type' => 'spelling', 'global' => 1 } ); # teh -> the +ok( $result, "Added global relationship between nodes" ); +is( scalar @relations, 2, "Returned two relationship creations" ); +is_deeply( $relations[0], [ 'n8', 'n13' ], "Returned the original collapse" ); +is_deeply( $relations[1], [ 'n9', 'n23' ], "Returned the other collapse" ); +is( $collation->reading( 'n8' )->position->reference, '6,2', "Harmonized position for n8 correct" ); +is( $collation->reading( 'n9' )->position->reference, '9,3', "Harmonized position for n9 correct" ); + done_testing();