workaround for segfault; slightly poor form GraphML but who cares?
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
index 2d69b0c..ddad4db 100644 (file)
@@ -202,13 +202,76 @@ scoped non-locally.
 Returns a status boolean and a list of all reading pairs connected by the call to
 add_relationship.
 
+=begin testing
+
+use Text::Tradition;
+use TryCatch;
+
+my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
+# Test 1: try to equate nodes that are prevented with an intermediate collation
+ok( $t1, "Parsed test fragment file" );
+my $c1 = $t1->collation;
+## HACK
+$c1->calculate_ranks();
+my $trel = $c1->get_relationship( '9,2', '9,3' );
+is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
+       "Troublesome relationship exists" );
+is( $trel->type, 'collated', "Troublesome relationship is a collation" );
+
+# Try to make the link we want
+try {
+       $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
+       ok( 1, "Added cross-collation relationship as expected" );
+} catch {
+       ok( 0, "Existing collation blocked equivalence relationship" );
+}
+
+try {
+       $c1->calculate_ranks();
+       ok( 1, "Successfully calculated ranks" );
+} catch {
+       ok( 0, "Collation now has a cycle" );
+}
+
+# Test 2: try to equate nodes that are prevented with a real intermediate
+# equivalence
+
+my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
+# Test 1: try to equate nodes that are prevented with an intermediate collation
+my $c2 = $t2->collation;
+## HACK
+$c2->calculate_ranks();
+$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
+my $trel2 = $c2->get_relationship( '9,2', '9,3' );
+is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
+       "Created blocking relationship" );
+is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
+# This time the link ought to fail
+try {
+       $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
+       ok( 0, "Added cross-equivalent bad relationship" );
+} catch {
+       ok( 1, "Existing equivalence blocked crossing relationship" );
+}
+
+try {
+       $c2->calculate_ranks();
+       ok( 1, "Successfully calculated ranks" );
+} catch {
+       ok( 0, "Collation now has a cycle" );
+}
+
+=end testing
+
 =cut
 
 sub add_relationship {
-       my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
+       my( $self, $source, $target, $options ) = @_;
+    my $c = $self->collation;
 
        my $relationship;
        my $thispaironly;
+       my $droppedcolls = [];
        if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
                $relationship = $options;
                $thispaironly = 1;  # If existing rel, set only where asked.
@@ -218,15 +281,15 @@ sub add_relationship {
                $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
                $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
                
-               my( $is_valid, $reason ) = 
-                       $self->relationship_valid( $source, $target, $options->{'type'} );
+               my( $is_valid, $reason ) = $self->relationship_valid( $source, $target, 
+                       $options->{'type'}, $droppedcolls );
                unless( $is_valid ) {
                        throw( "Invalid relationship: $reason" );
                }
                
                # Try to create the relationship object.
-               $options->{'reading_a'} = $source_rdg->text;
-               $options->{'reading_b'} = $target_rdg->text;
+               $options->{'reading_a'} = $c->reading( $source )->text;
+               $options->{'reading_b'} = $c->reading( $target )->text;
                $options->{'orig_a'} = $source;
                $options->{'orig_b'} = $target;
        if( $options->{'scope'} ne 'local' ) {
@@ -248,35 +311,42 @@ sub add_relationship {
 
 
        # Find all the pairs for which we need to set the relationship.
-       my @vectors = [ $source, $target ];
+       my @vectors;
     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
        push( @vectors, $self->_find_applicable( $relationship ) );
     }
         
     # Now set the relationship(s).
     my @pairs_set;
+       my $rel = $self->get_relationship( $source, $target );
+       if( $rel && $rel ne $relationship ) {
+               if( $rel->nonlocal ) {
+                       throw( "Found conflicting relationship at $source - $target" );
+               } elsif( $rel->type ne 'collated' ) {
+                       # Replace a collation relationship; leave any other sort in place.
+                       my $r1ann = $rel->has_annotation ? $rel->annotation : '';
+                       my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
+                       unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
+                               warn sprintf( "Not overriding local relationship %s with global %s " 
+                                       . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
+                                       $source, $target, $rel->reading_a, $rel->reading_b );
+                               next;
+                       }
+               }
+       }
+       $self->_set_relationship( $relationship, $source, $target );
+       push( @pairs_set, [ $source, $target ] );
+    
+    # Set any additional relationships that might be in @vectors.
     foreach my $v ( @vectors ) {
-               my $rel = $self->get_relationship( @$v );
-       if( $rel && $rel ne $relationship ) {
-               if( $rel->nonlocal ) {
-                       throw( "Found conflicting relationship at @$v" );
-               } elsif( $rel->type ne 'collated' ) {
-                       # Replace a collation relationship; leave any other sort in place.
-                       my $r1ann = $rel->has_annotation ? $rel->annotation : '';
-                       my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
-                       unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
-                                       warn sprintf( "Not overriding local relationship %s with global %s " 
-                                               . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
-                                               @$v, $rel->reading_a, $rel->reading_b );
-                                       next;
-                               }
-               }
-       }
-       map { $self->_drop_collations( $_ ) } @$v;
-       $self->_set_relationship( $relationship, @$v );
-       push( @pairs_set, $v );
+       next if $v->[0] eq $source && $v->[1] eq $target;
+       next if $v->[1] eq $source && $v->[0] eq $target;
+       my @added = $self->add_relationship( @$v, $relationship );
+       push( @pairs_set, @added );
     }
     
+    # Finally, restore whatever collations we can, and return.
+    $self->_restore_collations( @$droppedcolls );
     return @pairs_set;
 }
 
@@ -375,15 +445,13 @@ a yes/no boolean and, if the answer is no, message gives the reason why.
 =cut
 
 sub relationship_valid {
-    my( $self, $source, $target, $rel ) = @_;
+    my( $self, $source, $target, $rel, $mustdrop ) = @_;
+    $mustdrop = [] unless $mustdrop; # in case we were passed nothing
     my $c = $self->collation;
     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
                # Check that the two readings do (for a repetition) or do not (for
                # a transposition) appear in the same witness.
-               # If we haven't made reading paths yet, take it on faith.
-               return( 1, "no paths yet" ) unless $c->sequence->successors( $c->start );
-               
-               # We have some paths, so carry on.
+               # TODO this might be called before witness paths are set...
                my %seen_wits;
                map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
                foreach my $w ( $c->reading_witnesses( $target ) ) {
@@ -393,78 +461,39 @@ sub relationship_valid {
                                return ( 1, "ok" ) if $rel eq 'repetition';
                        }
                }
-               # For transpositions, there should also be a path from one reading
-               # to the other.
-               if( $rel eq 'transposition' ) {
-                       my( %sourceseq, %targetseq );
-                       map { $sourceseq{$_} = 1 } $c->sequence->all_successors( $source );
-                       map { $targetseq{$_} = 1 } $c->sequence->all_successors( $target );
-                       return( 0, "Readings are parallel" )
-                               unless $sourceseq{$target} || $targetseq{$source};
-               }
                return $rel eq 'transposition' ? ( 1, "ok" )
                        : ( 0, "Readings occur only in distinct witnesses" );
-       } 
-       if( $rel ne 'repetition' ) {
+       } else {
                # Check that linking the source and target in a relationship won't lead
-               # to a path loop for any witness.  If they have the same rank then
-               # they are parallel by definition.
-               # For transpositions, we want the opposite result: it is only valid if
-               # the readings cannot be parallel.
-               my $sourcerank = $c->reading( $source )->has_rank
-                       ? $c->reading( $source )->rank : undef;
-               my $targetrank = $c->reading( $target )->has_rank
-                       ? $c->reading( $target )->rank : undef;
-               if( $sourcerank && $targetrank && $sourcerank == $targetrank ) {
-                       return( 0, "Cannot transpose readings of same rank" )
-                               if $rel eq 'transposition';
-                       return( 1, "ok" );
-               }
-               
-               # Otherwise, first make a lookup table of all the
-               # readings related to either the source or the target.
-               my @proposed_related = ( $source, $target );
-               # Drop the collation links of source and target, unless we want to
-               # add a collation relationship.
-               my @dropped;
-               foreach my $r ( ( $source, $target ) ) {
-                       push( @dropped, $self->_drop_collations( $r ) )
-                               unless $rel eq 'collated';
-                       push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
+               # to a path loop for any witness. 
+               # First, drop/stash any collations that might interfere
+               my $sourceobj = $c->reading( $source );
+               my $targetobj = $c->reading( $target );
+               my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
+               my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
+               unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
+                       push( @$mustdrop, $self->_drop_collations( $source ) );
+                       push( @$mustdrop, $self->_drop_collations( $target ) );
                }
-               # Also drop any collation links at intermediate ranks.
-               foreach my $rank ( $sourcerank+1 .. $targetrank-1 ) {
-                       map { push( @dropped, $self->_drop_collations( $_ ) ) }
-                               $c->readings_at_rank( $rank );
-               }
-               my %pr_ids;
-               map { $pr_ids{ $_ } = 1 } @proposed_related;
-       
-               # The cumulative predecessors and successors of the proposed-related readings
-               # should not overlap.
-               my %all_pred;
-               my %all_succ;
-               foreach my $pr ( keys %pr_ids ) {
-                       map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
-                       map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
-               }
-               foreach my $k ( keys %all_pred ) {
-                       if( exists $all_succ{$k} ) {
-                               $self->_restore_collations( @dropped );
-                               return( 1, "ok" ) if $rel eq 'transposition';
-                               return( 0, "Relationship would create witness loop" );
-                       }
-               }
-               foreach my $k ( keys %pr_ids ) {
-                       if( exists $all_pred{$k} || exists $all_succ{$k} ) {
-                               $self->_restore_collations( @dropped );
-                               return( 1, "ok" ) if $rel eq 'transposition';
-                               return( 0, "Relationship would create witness loop" );
+               my $map = {};
+               my( $startrank, $endrank );
+               if( $c->end->has_rank ) {
+                       my $cpred = $c->common_predecessor( $source, $target );
+                       my $csucc = $c->common_successor( $source, $target );
+                       $startrank = $cpred->rank;
+                       $endrank = $csucc->rank;
+                       unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
+                               foreach my $rk ( $startrank+1 .. $endrank-1 ) {
+                                       map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
+                                               $c->readings_at_rank( $rk );
+                               }
                        }
                }
-               if( $rel eq 'transposition' ) {
-                       $self->_restore_collations( @dropped );
-                       return ( 0, "Cannot transpose parallel readings" );
+               my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank, 
+                       $source, $target );
+               if( $eqgraph->has_a_cycle ) {
+                       $self->_restore_collations( @$mustdrop );
+                       return( 0, "Relationship would create witness loop" );
                }
                return ( 1, "ok" );
        }
@@ -472,14 +501,14 @@ sub relationship_valid {
 
 sub _drop_collations {
        my( $self, $reading ) = @_;
-       my @deleted;
+       my @dropped;
        foreach my $n ( $self->graph->neighbors( $reading ) ) {
                if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
+                       push( @dropped, [ $reading, $n ] );
                        $self->del_relationship( $reading, $n );
-                       push( @deleted, [ $reading, $n ] );
                }
        }
-       return @deleted;
+       return @dropped;
 }
 
 sub _restore_collations {
@@ -487,8 +516,8 @@ sub _restore_collations {
        foreach my $v ( @vectors ) {
                try {
                        $self->add_relationship( @$v, { 'type' => 'collated' } );
-               } catch ( Text::Tradition::Error $e ) {
-                       warn "Could not restore collation " . join( ' -> ', @$v );
+               } catch {
+                       print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
                }
        }
 }
@@ -551,8 +580,6 @@ stops tracking the to-be-deleted reading.
 
 sub merge_readings {
        my( $self, $kept, $deleted, $combined ) = @_;
-       # Delete any relationship between kept and deleted
-       $self->del_relationship( $kept, $deleted );
        foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
                # Get the pair of kept / rel
                my @vector = ( $kept );
@@ -587,7 +614,8 @@ sub _as_graphml {
     
     # Add the vertices according to their XML IDs
     my %rdg_lookup = ( reverse %$node_hash );
-    foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
+    my @nlist = sort keys( %rdg_lookup );
+    foreach my $n ( @nlist ) {
        my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
        $n_el->setAttribute( 'id', $n );
        _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );