replace local with global rel if they are equivalent anyway
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
index 93c4699..3d98174 100644 (file)
@@ -23,9 +23,34 @@ general) between readings.
 =begin testing
 
 use Text::Tradition;
+use TryCatch;
 
 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
 
+# Add some relationships, and delete them
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new( 
+    'name'  => 'inline', 
+    'input' => 'CollateX',
+    'file'  => $cxfile,
+    );
+my $c = $t->collation;
+
+my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
+is( scalar @v1, 1, "Added a single relationship" );
+is( $v1[0]->[0], 'n21', "Got correct node 1" );
+is( $v1[0]->[1], 'n22', "Got correct node 2" );
+my @v2 = $c->add_relationship( 'n24', 'n23', 
+       { 'type' => 'spelling', 'scope' => 'global' } );
+is( scalar @v2, 2, "Added a global relationship with two instances" );
+@v1 = $c->del_relationship( 'n22', 'n21' );
+is( scalar @v1, 1, "Deleted first relationship" );
+@v2 = $c->del_relationship( 'n12', 'n13' );
+is( scalar @v2, 2, "Deleted second global relationship" );
+my @v3 = $c->del_relationship( 'n1', 'n2' );
+is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
+
 =end testing
 
 =head1 METHODS
@@ -67,11 +92,19 @@ Return the relationship object, if any, that exists between two readings.
 =cut
 
 sub get_relationship {
-       my( $self, @vector ) = @_;
+       my $self = shift;
+       my @vector;
+       if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
+               # Dereference the edge arrayref that was passed.
+               my $edge = shift;
+               @vector = @$edge;
+       } else {
+               @vector = @_;
+       }
        my $relationship;
        if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
                $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
-       }
+       } 
        return $relationship;
 }
 
@@ -81,11 +114,6 @@ sub _set_relationship {
        $self->graph->set_edge_attribute( @vector, 'object', $relationship );
 }
 
-sub _remove_relationship {
-       my( $self, @vector ) = @_;
-       $self->graph->delete_edge( @vector );
-}
-       
 =head2 create
 
 Create a new relationship with the given options and return it.
@@ -100,7 +128,11 @@ sub create {
        my $target = delete $options->{'orig_b'};
        my $rel = $self->get_relationship( $source, $target );
        if( $rel ) {
-               if( $rel->type ne $options->{'type'} ) {
+               if( $rel->type eq 'collated' ) {
+                       # Always replace a 'collated' relationship with a more descriptive
+                       # one, if asked.
+                       $self->del_relationship( $source, $target );
+               } elsif( $rel->type ne $options->{'type'} ) {
                        throw( "Another relationship of type " . $rel->type 
                                . " already exists between $source and $target" );
                } else {
@@ -131,13 +163,16 @@ non-locally.  Key on whichever reading occurs first alphabetically.
 
 sub add_scoped_relationship {
        my( $self, $rel ) = @_;
-       my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
+       my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
+       my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );      
+       my $r = $self->scoped_relationship( $rdga, $rdgb );
        if( $r ) {
                warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
-                       $r->type, $rel->reading_a, $rel->reading_b );
+                       $r->type, $rdga, $rdgb );
                return;
        }
-       $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
+       my( $first, $second ) = sort ( $rdga, $rdgb );
+       $self->scopedrels->{$first}->{$second} = $rel;
 }
 
 =head2 scoped_relationship( $reading_a, $reading_b )
@@ -172,57 +207,72 @@ add_relationship.
 sub add_relationship {
        my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
 
-       # Check the options
-       $options->{'scope'} = 'local' unless $options->{'scope'};
-       
-       my( $is_valid, $reason ) = 
-               $self->relationship_valid( $source, $target, $options->{'type'} );
-    unless( $is_valid ) {
-        throw( "Invalid relationship: $reason" );
+       my $relationship;
+       my $thispaironly;
+       if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
+               $relationship = $options;
+               $thispaironly = 1;  # If existing rel, set only where asked.
+       } else {
+               # Check the options
+               $options->{'scope'} = 'local' unless $options->{'scope'};
+               $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
+               $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
+               
+               my( $is_valid, $reason ) = 
+                       $self->relationship_valid( $source, $target, $options->{'type'} );
+               unless( $is_valid ) {
+                       throw( "Invalid relationship: $reason" );
+               }
+               
+               # Try to create the relationship object.
+               $options->{'reading_a'} = $source_rdg->text;
+               $options->{'reading_b'} = $target_rdg->text;
+               $options->{'orig_a'} = $source;
+               $options->{'orig_b'} = $target;
+       if( $options->{'scope'} ne 'local' ) {
+                       # Is there a relationship with this a & b already?
+                       # Case-insensitive for non-orthographics.
+                       my $rdga = $options->{'type'} eq 'orthographic' 
+                               ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
+                       my $rdgb = $options->{'type'} eq 'orthographic' 
+                               ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
+                       my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
+                       if( $otherrel && $otherrel->type eq $options->{type}
+                               && $otherrel->scope eq $options->{scope} ) {
+                               warn "Applying existing scoped relationship";
+                               $relationship = $otherrel;
+                       }
+       }
+               $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
     }
-    
-    # Try to create the relationship object.
-    $options->{'reading_a'} = $source_rdg->text;
-    $options->{'reading_b'} = $target_rdg->text;
-    $options->{'orig_a'} = $source;
-    $options->{'orig_b'} = $target;
-    my $relationship = $self->create( $options );  # Will throw on error
+
 
        # Find all the pairs for which we need to set the relationship.
-       my @vectors = ( [ $source, $target ] ); 
-    if( $relationship->colocated && $relationship->nonlocal ) {
-       my $c = $self->collation;
-       # Set the same relationship everywhere we can, throughout the graph.
-       my @identical_readings = grep { $_->text eq $relationship->reading_a }
-               $c->readings;
-       foreach my $ir ( @identical_readings ) {
-               next if $ir->id eq $source;
-               # Check to see if there is a target reading with the same text at
-               # the same rank.
-               my @itarget = grep 
-                       { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
-                       $c->readings;
-               if( @itarget ) {
-                       # We found a hit.
-                       warn "More than one reading with text " . $target_rdg->text
-                               . " at rank " . $ir->rank . "!" if @itarget > 1;
-                       push( @vectors, [ $ir->id, $itarget[0]->id ] );
-               }
-       }       
+       my @vectors = [ $source, $target ];
+    if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
+       push( @vectors, $self->_find_applicable( $relationship ) );
     }
-    
+        
     # Now set the relationship(s).
     my @pairs_set;
     foreach my $v ( @vectors ) {
                my $rel = $self->get_relationship( @$v );
-       if( $rel ) {
+       if( $rel && $rel ne $relationship ) {
                if( $rel->nonlocal ) {
                        throw( "Found conflicting relationship at @$v" );
-               } else {
-                       warn "Not overriding local relationship set at @$v";
+               } elsif( $rel->type ne 'collated' ) {
+                       # Replace a collation relationship; leave any other sort in place.
+                       my $r1ann = $rel->has_annotation ? $rel->annotation : '';
+                       my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
+                       unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
+                                       warn sprintf( "Not overriding local relationship %s with global %s " 
+                                               . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
+                                               @$v, $rel->reading_a, $rel->reading_b );
+                                       next;
+                               }
                }
-               next;
        }
+       map { $self->_drop_collations( $_ ) } @$v;
        $self->_set_relationship( $relationship, @$v );
        push( @pairs_set, $v );
     }
@@ -230,6 +280,92 @@ sub add_relationship {
     return @pairs_set;
 }
 
+=head2 del_scoped_relationship( $reading_a, $reading_b )
+
+Returns the general (document-level or global) relationship that has been defined 
+between the two reading strings. Returns undef if there is no general relationship.
+
+=cut
+
+sub del_scoped_relationship {
+       my( $self, $rdga, $rdgb ) = @_;
+       my( $first, $second ) = sort( $rdga, $rdgb );
+       return delete $self->scopedrels->{$first}->{$second};
+}
+
+sub _find_applicable {
+       my( $self, $rel ) = @_;
+       my $c = $self->collation;
+       # TODO Someday we might use a case sensitive language.
+       my $lang = $c->tradition->language;
+       my @vectors;
+       my @identical_readings;
+       if( $rel->type eq 'orthographic' ) {
+               @identical_readings = grep { $_->text eq $rel->reading_a } 
+                       $c->readings;
+       } else {
+               @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
+                       $c->readings;
+       }
+       foreach my $ir ( @identical_readings ) {
+               my @itarget;
+               if( $rel->type eq 'orthographic' ) {
+                       @itarget = grep { $_->rank == $ir->rank 
+                                                         && $_->text eq $rel->reading_b } $c->readings;
+               } else {
+                       @itarget = grep { $_->rank == $ir->rank 
+                                                         && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
+               }
+               if( @itarget ) {
+                       # Warn if there is more than one hit with no orth link between them.
+                       my $itmain = shift @itarget;
+                       if( @itarget ) {
+                               my %all_targets;
+                               map { $all_targets{$_} = 1 } @itarget;
+                               map { delete $all_targets{$_} } 
+                                       $self->related_readings( $itmain, 
+                                               sub { $_[0]->type eq 'orthographic' } );
+                       warn "More than one unrelated reading with text " . $itmain->text
+                               . " at rank " . $ir->rank . "!" if keys %all_targets;
+                       }
+                       push( @vectors, [ $ir->id, $itmain->id ] );
+               }
+       }
+       return @vectors;
+}
+
+=head2 del_relationship( $source, $target )
+
+Removes the relationship between the given readings. If the relationship is
+non-local, removes the relationship everywhere in the graph.
+
+=cut
+
+sub del_relationship {
+       my( $self, $source, $target ) = @_;
+       my $rel = $self->get_relationship( $source, $target );
+       return () unless $rel; # Nothing to delete; return an empty set.
+       my @vectors = ( [ $source, $target ] );
+       $self->_remove_relationship( $source, $target );
+       if( $rel->nonlocal ) {
+               # Remove the relationship wherever it occurs.
+               # Remove the relationship wherever it occurs.
+               my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
+                       $self->relationships;
+               foreach my $re ( @rel_edges ) {
+                       $self->_remove_relationship( @$re );
+                       push( @vectors, $re );
+               }
+               $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
+       }
+       return @vectors;
+}
+
+sub _remove_relationship {
+       my( $self, @vector ) = @_;
+       $self->graph->delete_edge( @vector );
+}
+       
 =head2 relationship_valid( $source, $target, $type )
 
 Checks whether a relationship of type $type may exist between the readings given
@@ -244,6 +380,10 @@ sub relationship_valid {
     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
                # Check that the two readings do (for a repetition) or do not (for
                # a transposition) appear in the same witness.
+               # If we haven't made reading paths yet, take it on faith.
+               return( 1, "no paths yet" ) unless $c->sequence->successors( $c->start );
+               
+               # We have some paths, so carry on.
                my %seen_wits;
                map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
                foreach my $w ( $c->reading_witnesses( $target ) ) {
@@ -251,21 +391,52 @@ sub relationship_valid {
                                return ( 0, "Readings both occur in witness $w" ) 
                                        if $rel eq 'transposition';
                                return ( 1, "ok" ) if $rel eq 'repetition';
+                       }
+               }
+               # For transpositions, there should also be a path from one reading
+               # to the other.
+               if( $rel eq 'transposition' ) {
+                       my( %sourceseq, %targetseq );
+                       map { $sourceseq{$_} = 1 } $c->sequence->all_successors( $source );
+                       map { $targetseq{$_} = 1 } $c->sequence->all_successors( $target );
+                       return( 0, "Readings are parallel" )
+                               unless $sourceseq{$target} || $targetseq{$source};
                }
                return $rel eq 'transposition' ? ( 1, "ok" )
                        : ( 0, "Readings occur only in distinct witnesses" );
-               }
-       } else {
+       } 
+       if( $rel ne 'repetition' ) {
                # Check that linking the source and target in a relationship won't lead
-               # to a path loop for any witness.  If they have the same rank then fine.
-               return( 1, "ok" ) 
-                       if $c->reading( $source )->rank == $c->reading( $target )->rank;
+               # to a path loop for any witness.  If they have the same rank then
+               # they are parallel by definition.
+               # For transpositions, we want the opposite result: it is only valid if
+               # the readings cannot be parallel.
+               my $sourcerank = $c->reading( $source )->has_rank
+                       ? $c->reading( $source )->rank : undef;
+               my $targetrank = $c->reading( $target )->has_rank
+                       ? $c->reading( $target )->rank : undef;
+               if( $sourcerank && $targetrank && $sourcerank == $targetrank ) {
+                       return( 0, "Cannot transpose readings of same rank" )
+                               if $rel eq 'transposition';
+                       return( 1, "ok" );
+               }
                
                # Otherwise, first make a lookup table of all the
                # readings related to either the source or the target.
                my @proposed_related = ( $source, $target );
-               push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
-               push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
+               # Drop the collation links of source and target, unless we want to
+               # add a collation relationship.
+               my @dropped;
+               foreach my $r ( ( $source, $target ) ) {
+                       push( @dropped, $self->_drop_collations( $r ) )
+                               unless $rel eq 'collated';
+                       push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
+               }
+               # Also drop any collation links at intermediate ranks.
+               foreach my $rank ( $sourcerank+1 .. $targetrank-1 ) {
+                       map { push( @dropped, $self->_drop_collations( $_ ) ) }
+                               $c->readings_at_rank( $rank );
+               }
                my %pr_ids;
                map { $pr_ids{ $_ } = 1 } @proposed_related;
        
@@ -278,34 +449,71 @@ sub relationship_valid {
                        map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
                }
                foreach my $k ( keys %all_pred ) {
-                       return( 0, "Relationship would create witness loop" )
-                               if exists $all_succ{$k};
+                       if( exists $all_succ{$k} ) {
+                               $self->_restore_collations( @dropped );
+                               return( 1, "ok" ) if $rel eq 'transposition';
+                               return( 0, "Relationship would create witness loop" );
+                       }
                }
                foreach my $k ( keys %pr_ids ) {
-                       return( 0, "Relationship would create witness loop" )
-                               if exists $all_pred{$k} || exists $all_succ{$k};
+                       if( exists $all_pred{$k} || exists $all_succ{$k} ) {
+                               $self->_restore_collations( @dropped );
+                               return( 1, "ok" ) if $rel eq 'transposition';
+                               return( 0, "Relationship would create witness loop" );
+                       }
+               }
+               if( $rel eq 'transposition' ) {
+                       $self->_restore_collations( @dropped );
+                       return ( 0, "Cannot transpose parallel readings" );
                }
                return ( 1, "ok" );
        }
 }
 
-=head2 related_readings( $reading, $colocated_only )
+sub _drop_collations {
+       my( $self, $reading ) = @_;
+       my @deleted;
+       foreach my $n ( $self->graph->neighbors( $reading ) ) {
+               if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
+                       $self->del_relationship( $reading, $n );
+                       push( @deleted, [ $reading, $n ] );
+               }
+       }
+       return @deleted;
+}
+
+sub _restore_collations {
+       my( $self, @vectors ) = @_;
+       foreach my $v ( @vectors ) {
+               try {
+                       $self->add_relationship( @$v, { 'type' => 'collated' } );
+               } catch ( Text::Tradition::Error $e ) {
+                       warn "Could not restore collation " . join( ' -> ', @$v );
+               }
+       }
+}
+
+=head2 related_readings( $reading, $filter )
 
 Returns a list of readings that are connected via relationship links to $reading.
-If $colocated_only is true, restricts the list to those readings that are in the
-same logical location (and therefore have the same rank in the collation graph.)
+If $filter is set to a subroutine ref, returns only those related readings where
+$filter( $relationship ) returns a true value.
 
 =cut
 
 sub related_readings {
-       my( $self, $reading, $colocated ) = @_;
+       my( $self, $reading, $filter ) = @_;
        my $return_object;
        if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
                $reading = $reading->id;
                $return_object = 1;
        }
        my @answer;
-       if( $colocated ) {
+       if( $filter ) {
+               # Backwards compat
+               if( $filter eq 'colocated' ) {
+                       $filter = sub { $_[0]->colocated };
+               }
                my %found = ( $reading => 1 );
                my $check = [ $reading ];
                my $iter = 0;
@@ -313,7 +521,7 @@ sub related_readings {
                        my $more = [];
                        foreach my $r ( @$check ) {
                                foreach my $nr ( $self->graph->neighbors( $r ) ) {
-                                       if( $self->get_relationship( $r, $nr )->colocated ) {
+                                       if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
                                                push( @$more, $nr ) unless exists $found{$nr};
                                                $found{$nr} = 1;
                                        }
@@ -321,6 +529,7 @@ sub related_readings {
                        }
                        $check = $more;
                }
+               delete $found{$reading};
                @answer = keys %found;
        } else {
                @answer = $self->graph->all_reachable( $reading );
@@ -342,6 +551,8 @@ stops tracking the to-be-deleted reading.
 
 sub merge_readings {
        my( $self, $kept, $deleted, $combined ) = @_;
+       # Delete any relationship between kept and deleted
+       $self->del_relationship( $kept, $deleted );
        foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
                # Get the pair of kept / rel
                my @vector = ( $kept );
@@ -351,12 +562,9 @@ sub merge_readings {
                # If kept changes its text, drop the relationship.
                next if $combined;
                        
-               # If kept / rel already has a relationship, warn and keep the old
+               # If kept / rel already has a relationship, just keep the old
                my $rel = $self->get_relationship( @vector );
-               if( $rel ) {
-                       warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
-                       next;
-               }
+               next if $rel;
                
                # Otherwise, adopt the relationship that would be deleted.
                $rel = $self->get_relationship( @$edge );
@@ -395,12 +603,11 @@ sub _as_graphml {
                $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
 
                my $rel_obj = $self->get_relationship( @$e );
-               _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
-               _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
-               _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
-                       $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
-               _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
-                       $rel_obj->non_independent ) if $rel_obj->nonind_set;
+               foreach my $key ( keys %$edge_keys ) {
+                       my $value = $rel_obj->$key;
+                       _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
+                               if defined $value;
+               }
        }
 }