load XML::LibXML only when required; handle global relationships more correctly;...
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
index 21faa93..4342bd9 100644 (file)
@@ -31,29 +31,25 @@ use_ok( 'Text::Tradition::Collation::RelationshipStore' );
 
 my $cxfile = 't/data/Collatex-16.xml';
 my $t = Text::Tradition->new( 
-    'name'  => 'inline', 
-    'input' => 'CollateX',
-    'file'  => $cxfile,
-    );
+       'name'  => 'inline', 
+       'input' => 'CollateX',
+       'file'  => $cxfile,
+       );
 my $c = $t->collation;
 
-my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
+my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
 is( scalar @v1, 1, "Added a single relationship" );
 is( $v1[0]->[0], 'n21', "Got correct node 1" );
 is( $v1[0]->[1], 'n22', "Got correct node 2" );
-my @v2 = $c->add_relationship( 'n9', 'n23', 
+my @v2 = $c->add_relationship( 'n24', 'n23', 
        { 'type' => 'spelling', 'scope' => 'global' } );
 is( scalar @v2, 2, "Added a global relationship with two instances" );
 @v1 = $c->del_relationship( 'n22', 'n21' );
 is( scalar @v1, 1, "Deleted first relationship" );
-@v2 = $c->del_relationship( 'n8', 'n13' );
+@v2 = $c->del_relationship( 'n12', 'n13' );
 is( scalar @v2, 2, "Deleted second global relationship" );
-try {
-       my @v3 = $c->del_relationship( 'n1', 'n2' );
-       ok( 0, "Should have errored on non-existent relationship" );
-} catch( Text::Tradition::Error $e ) {
-       like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
-}
+my @v3 = $c->del_relationship( 'n1', 'n2' );
+is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
 
 =end testing
 
@@ -89,6 +85,61 @@ has 'graph' => (
     },
        );
        
+=head2 equivalence_graph()
+
+Returns an equivalence graph of the collation, in which all readings
+related via a 'colocated' relationship are transformed into a single
+vertex. Can be used to determine the validity of a new relationship. 
+
+=cut
+
+has 'equivalence_graph' => (
+       is => 'ro',
+       isa => 'Graph',
+       default => sub { Graph->new() },
+       writer => '_reset_equivalence',
+       );
+       
+has '_node_equivalences' => (
+       is => 'ro',
+       traits => ['Hash'],
+       handles => {
+               equivalence => 'get',
+               set_equivalence => 'set',
+               remove_equivalence => 'delete',
+               _clear_equivalence => 'clear',
+       },
+       );
+
+has '_equivalence_readings' => (
+       is => 'ro',
+       traits => ['Hash'],
+       handles => {
+               eqreadings => 'get',
+               set_eqreadings => 'set',
+               remove_eqreadings => 'delete',
+               _clear_eqreadings => 'clear',
+       },
+       );
+       
+around add_reading => sub {
+       my $orig = shift;
+       my $self = shift;
+       
+       $self->equivalence_graph->add_vertex( @_ );
+       $self->set_equivalence( $_[0], $_[0] );
+       $self->set_eqreadings( $_[0], [ $_[0] ] );
+       $self->$orig( @_ );
+};
+
+around delete_reading => sub {
+       my $orig = shift;
+       my $self = shift;
+       
+       $self->_remove_equivalence_node( @_ );
+       $self->$orig( @_ );
+};
+
 =head2 get_relationship
 
 Return the relationship object, if any, that exists between two readings.
@@ -116,6 +167,7 @@ sub _set_relationship {
        my( $self, $relationship, @vector ) = @_;
        $self->graph->add_edge( @vector );
        $self->graph->set_edge_attribute( @vector, 'object', $relationship );
+       $self->_make_equivalence( @vector ) if $relationship->colocated;
 }
 
 =head2 create
@@ -144,18 +196,9 @@ sub create {
                }
        }
        
-       # Check to see if a nonlocal relationship is defined for the two readings
-       $rel = $self->scoped_relationship( $options->{'reading_a'}, 
-               $options->{'reading_b'} );
-       if( $rel && $rel->type eq $options->{'type'} ) {
-               return $rel;
-       } elsif( $rel ) {
-               throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
-       } else {
-               $rel = Text::Tradition::Collation::Relationship->new( $options );
-               $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
-               return $rel;
-       }
+       $rel = Text::Tradition::Collation::Relationship->new( $options );
+       $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
+       return $rel;
 }
 
 =head2 add_scoped_relationship( $rel )
@@ -189,8 +232,12 @@ between the two reading strings. Returns undef if there is no general relationsh
 sub scoped_relationship {
        my( $self, $rdga, $rdgb ) = @_;
        my( $first, $second ) = sort( $rdga, $rdgb );
+       my( $lcfirst, $lcsecond ) = sort( lc( $rdga ), lc( $rdgb ) );
        if( exists $self->scopedrels->{$first}->{$second} ) {
                return $self->scopedrels->{$first}->{$second};
+       } elsif( exists $self->scopedrels->{$lcfirst}->{$lcsecond} ) {
+               my $rel = $self->scopedrels->{$lcfirst}->{$lcsecond};
+               return $rel->type ne 'orthographic' ? $rel : undef;
        } else {
                return undef;
        }
@@ -206,13 +253,153 @@ scoped non-locally.
 Returns a status boolean and a list of all reading pairs connected by the call to
 add_relationship.
 
+=begin testing
+
+use Test::Warn;
+use Text::Tradition;
+use TryCatch;
+
+my $t1;
+warning_is {
+       $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
+} 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
+       "Got expected relationship drop warning on parse";
+
+# Test 1.1: try to equate nodes that are prevented with an intermediate collation
+ok( $t1, "Parsed test fragment file" );
+my $c1 = $t1->collation;
+my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
+is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
+       "Troublesome relationship exists" );
+is( $trel->type, 'collated', "Troublesome relationship is a collation" );
+
+# Try to make the link we want
+try {
+       $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
+       ok( 1, "Added cross-collation relationship as expected" );
+} catch( Text::Tradition::Error $e ) {
+       ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
+}
+
+try {
+       $c1->calculate_ranks();
+       ok( 1, "Successfully calculated ranks" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Collation now has a cycle: " . $e->message );
+}
+
+# Test 1.2: attempt merge of an identical reading
+try {
+       $c1->merge_readings( 'r9.3', 'r11.5' );
+       ok( 1, "Successfully merged reading 'pontifex'" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Merge of mergeable readings failed: $e->message" );
+       
+}
+
+# Test 1.3: attempt relationship with a meta reading (should fail)
+try {
+       $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
+       ok( 0, "Allowed a meta-reading to be used in a relationship" );
+} catch ( Text::Tradition::Error $e ) {
+       is( $e->message, 'Cannot set relationship on a meta reading', 
+               "Relationship link prevented for a meta reading" );
+}
+
+# Test 1.4: try to break a relationship near a meta reading
+$c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
+try {
+       $c1->del_relationship( 'r7.6', 'r7.7' );
+       $c1->del_relationship( 'r7.6', 'r7.3' );
+       ok( 1, "Relationship broken with a meta reading as neighbor" );
+} catch {
+       ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
+}
+
+# Test 2.1: try to equate nodes that are prevented with a real intermediate
+# equivalence
+my $t2;
+warning_is {
+       $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
+} 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
+       "Got expected relationship drop warning on parse";
+my $c2 = $t2->collation;
+$c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
+my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
+is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
+       "Created blocking relationship" );
+is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
+# This time the link ought to fail
+try {
+       $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
+       ok( 0, "Added cross-equivalent bad relationship" );
+} catch ( Text::Tradition::Error $e ) {
+       like( $e->message, qr/witness loop/,
+               "Existing equivalence blocked crossing relationship" );
+}
+
+try {
+       $c2->calculate_ranks();
+       ok( 1, "Successfully calculated ranks" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Collation now has a cycle: " . $e->message );
+}
+
+# Test 3.1: make a straightforward pair of transpositions.
+my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
+# Test 1: try to equate nodes that are prevented with an intermediate collation
+my $c3 = $t3->collation;
+try {
+       $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
+       ok( 1, "Added straightforward transposition" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Failed to add normal transposition: " . $e->message );
+}
+try {
+       $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
+       ok( 1, "Added straightforward transposition complement" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Failed to add normal transposition complement: " . $e->message );
+}
+
+# Test 3.2: try to make a transposition that could be a parallel.
+try {
+       $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
+       ok( 0, "Added bad colocated transposition" );
+} catch ( Text::Tradition::Error $e ) {
+       like( $e->message, qr/Readings appear to be colocated/,
+               "Prevented bad colocated transposition" );
+}
+
+# Test 3.3: make the parallel, and then make the transposition again.
+try {
+       $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
+       ok( 1, "Equated identical readings for transposition" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Failed to equate identical readings: " . $e->message );
+}
+try {
+       $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
+       ok( 1, "Added straightforward transposition complement" );
+} catch ( Text::Tradition::Error $e ) {
+       ok( 0, "Failed to add normal transposition complement: " . $e->message );
+}
+
+=end testing
+
 =cut
 
 sub add_relationship {
-       my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
-
+       my( $self, $source, $target, $options ) = @_;
+    my $c = $self->collation;
+       my $sourceobj = $c->reading( $source );
+       my $targetobj = $c->reading( $target );
+       throw( "Adding self relationship at $source" ) if $source eq $target;
+       throw( "Cannot set relationship on a meta reading" )
+               if( $sourceobj->is_meta || $targetobj->is_meta );
        my $relationship;
        my $thispaironly;
+       my $droppedcolls = [];
        if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
                $relationship = $options;
                $thispaironly = 1;  # If existing rel, set only where asked.
@@ -220,60 +407,122 @@ sub add_relationship {
                # Check the options
                $options->{'scope'} = 'local' unless $options->{'scope'};
                $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
+               $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
                
-               my( $is_valid, $reason ) = 
-                       $self->relationship_valid( $source, $target, $options->{'type'} );
+               my( $is_valid, $reason ) = $self->relationship_valid( $source, $target, 
+                       $options->{'type'}, $droppedcolls );
                unless( $is_valid ) {
                        throw( "Invalid relationship: $reason" );
                }
                
                # Try to create the relationship object.
-               $options->{'reading_a'} = $source_rdg->text;
-               $options->{'reading_b'} = $target_rdg->text;
+               $options->{'reading_a'} = $sourceobj->text;
+               $options->{'reading_b'} = $targetobj->text;
                $options->{'orig_a'} = $source;
                $options->{'orig_b'} = $target;
        if( $options->{'scope'} ne 'local' ) {
                        # Is there a relationship with this a & b already?
                        # Case-insensitive for non-orthographics.
-                       my $rdga = $options->{'type'} eq 'orthographic' 
-                               ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
-                       my $rdgb = $options->{'type'} eq 'orthographic' 
-                               ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
+                       my $rdga = $options->{'reading_a'};
+                       my $rdgb = $options->{'reading_b'};
                        my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
                        if( $otherrel && $otherrel->type eq $options->{type}
                                && $otherrel->scope eq $options->{scope} ) {
-                               warn "Applying existing scoped relationship";
+                               warn "Applying existing scoped relationship for $rdga / $rdgb";
                                $relationship = $otherrel;
+                       } elsif( $otherrel ) {
+                               throw( "Conflicting scoped relationship for $rdga / $rdgb at $source / $target" );
                        }
        }
                $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
     }
 
 
+    # Now set the relationship(s).
+    my @pairs_set;
+       my $rel = $self->get_relationship( $source, $target );
+       my $skip;
+       if( $rel && $rel ne $relationship ) {
+               if( $rel->nonlocal ) {
+                       throw( "Found conflicting relationship at $source - $target" );
+               } elsif( $rel->type ne 'collated' ) {
+                       # Replace a collation relationship; leave any other sort in place.
+                       my $r1ann = $rel->has_annotation ? $rel->annotation : '';
+                       my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
+                       unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
+                               warn sprintf( "Not overriding local relationship %s with global %s " 
+                                       . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
+                                       $source, $target, $rel->reading_a, $rel->reading_b );
+                               $skip = 1;
+                       }
+               }
+       }
+       $self->_set_relationship( $relationship, $source, $target ) unless $skip;
+       push( @pairs_set, [ $source, $target ] );
+    
        # Find all the pairs for which we need to set the relationship.
-       my @vectors = [ $source, $target ];
     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
-       push( @vectors, $self->_find_applicable( $relationship ) );
+               push( @pairs_set, $self->add_global_relationship( $relationship ) );
     }
-    $DB::single = 1 if grep { $_->[0] eq 'w494' || $_->[1] eq 'w494' } @vectors;
-        
-    # Now set the relationship(s).
-    my @pairs_set;
-    foreach my $v ( @vectors ) {
-               my $rel = $self->get_relationship( @$v );
-       if( $rel && $rel ne $relationship ) {
-               if( $rel->nonlocal ) {
-                       throw( "Found conflicting relationship at @$v" );
-               } else {
-                       warn "Not overriding local relationship set at @$v";
-               }
-               next;
+    # Finally, restore whatever collations we can, and return.
+    $self->_restore_collations( @$droppedcolls );
+    return @pairs_set;
+}
+
+=head2 add_global_relationship( $options, $skipvector )
+
+Adds the relationship specified wherever the relevant readings appear together 
+in the graph.  Options as in add_relationship above. 
+
+=cut
+
+sub add_global_relationship {
+       my( $self, $options ) = @_;
+       # First see if we are dealing with a relationship object already
+       my $relationship;
+       if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
+               $relationship = $options;
+       } else {
+               # Then see if a scoped relationship already applies for the words.
+               my $scopedrel = $self->scoped_relationship( 
+                       $options->{reading_a}, $options->{reading_b} );
+               $relationship = $scopedrel ? $scopedrel 
+                       : $self->create( $options );
+       }
+       # Sanity checking
+       throw( "Relationship passed to add_global is not global" )
+               unless $relationship->nonlocal;
+       throw( "Relationship passed to add_global is not a valid global type" )
+               unless $relationship->colocated && $relationship->type ne 'collated';
+               
+       # Apply the relationship wherever it is valid
+       my @pairs_set;
+    foreach my $v ( $self->_find_applicable( $relationship ) ) {
+       my $exists = $self->get_relationship( @$v );
+       if( $exists && $exists->type ne 'collated' ) {
+               throw( "Found conflicting relationship at @$v" )
+                       unless $exists->type eq $relationship->type
+                               && $exists->scope eq $relationship->scope;
+       } else {
+               my @added = $self->add_relationship( @$v, $relationship );
+               push( @pairs_set, @added );
        }
-       $self->_set_relationship( $relationship, @$v );
-       push( @pairs_set, $v );
     }
-    
-    return @pairs_set;
+       return @pairs_set;      
+}
+
+
+=head2 del_scoped_relationship( $reading_a, $reading_b )
+
+Returns the general (document-level or global) relationship that has been defined 
+between the two reading strings. Returns undef if there is no general relationship.
+
+=cut
+
+sub del_scoped_relationship {
+       my( $self, $rdga, $rdgb ) = @_;
+       my( $first, $second ) = sort( $rdga, $rdgb );
+       return delete $self->scopedrels->{$first}->{$second};
 }
 
 sub _find_applicable {
@@ -327,24 +576,28 @@ non-local, removes the relationship everywhere in the graph.
 sub del_relationship {
        my( $self, $source, $target ) = @_;
        my $rel = $self->get_relationship( $source, $target );
-       throw( "No relationship defined between $source and $target" ) unless $rel;
+       return () unless $rel; # Nothing to delete; return an empty set.
+       my $colo = $rel->colocated;
        my @vectors = ( [ $source, $target ] );
-       $self->_remove_relationship( $source, $target );
+       $self->_remove_relationship( $colo, $source, $target );
        if( $rel->nonlocal ) {
                # Remove the relationship wherever it occurs.
+               # Remove the relationship wherever it occurs.
                my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
                        $self->relationships;
                foreach my $re ( @rel_edges ) {
-                       $self->_remove_relationship( @$re );
+                       $self->_remove_relationship( $colo, @$re );
                        push( @vectors, $re );
                }
+               $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
        }
        return @vectors;
 }
 
 sub _remove_relationship {
-       my( $self, @vector ) = @_;
+       my( $self, $equiv, @vector ) = @_;
        $self->graph->delete_edge( @vector );
+       $self->_break_equivalence( @vector ) if $equiv;
 }
        
 =head2 relationship_valid( $source, $target, $type )
@@ -356,9 +609,12 @@ a yes/no boolean and, if the answer is no, message gives the reason why.
 =cut
 
 sub relationship_valid {
-    my( $self, $source, $target, $rel ) = @_;
+    my( $self, $source, $target, $rel, $mustdrop ) = @_;
+    $mustdrop = [] unless $mustdrop; # in case we were passed nothing
     my $c = $self->collation;
-    if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
+    ## Assume validity is okay if we are initializing from scratch.
+    return ( 1, "initializing" ) unless $c->tradition->_initialized;
+        if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
                # Check that the two readings do (for a repetition) or do not (for
                # a transposition) appear in the same witness.
                # TODO this might be called before witness paths are set...
@@ -369,45 +625,42 @@ sub relationship_valid {
                                return ( 0, "Readings both occur in witness $w" ) 
                                        if $rel eq 'transposition';
                                return ( 1, "ok" ) if $rel eq 'repetition';
+                       }
                }
-               return $rel eq 'transposition' ? ( 1, "ok" )
-                       : ( 0, "Readings occur only in distinct witnesses" );
+               return ( 0, "Readings occur only in distinct witnesses" )
+                       if $rel eq 'repetition';
+       } 
+       if ( $rel eq 'transposition' ) {
+               # We also need to check both that the readings occur in distinct
+               # witnesses, and that they are not in the same place. That is,
+               # proposing to link them should cause a witness loop.
+               if( $self->test_equivalence( $source, $target ) ) {
+                       return ( 0, "Readings appear to be colocated, not transposed" );
+               } else {
+                       return ( 1, "ok" );
                }
-       } else {
-               # Check that linking the source and target in a relationship won't lead
-               # to a path loop for any witness.  If they have the same rank then fine.
-               return( 1, "ok" ) 
-                       if $c->reading( $source )->has_rank
-                               && $c->reading( $target )->has_rank
-                               && $c->reading( $source )->rank == $c->reading( $target )->rank;
                
-               # Otherwise, first make a lookup table of all the
-               # readings related to either the source or the target.
-               my @proposed_related = ( $source, $target );
-               # Drop the collation links of source and target, unless we want to
-               # add a collation relationship.
-               foreach my $r ( ( $source, $target ) ) {
-                       $self->_drop_collations( $r ) unless $rel eq 'collated';
-                       push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
-               }
-               my %pr_ids;
-               map { $pr_ids{ $_ } = 1 } @proposed_related;
-       
-               # The cumulative predecessors and successors of the proposed-related readings
-               # should not overlap.
-               my %all_pred;
-               my %all_succ;
-               foreach my $pr ( keys %pr_ids ) {
-                       map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
-                       map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
-               }
-               foreach my $k ( keys %all_pred ) {
-                       return( 0, "Relationship would create witness loop" )
-                               if exists $all_succ{$k};
+       } elsif( $rel ne 'repetition' ) {
+               # Check that linking the source and target in a relationship won't lead
+               # to a path loop for any witness. 
+               # First, drop/stash any collations that might interfere
+               my $sourceobj = $c->reading( $source );
+               my $targetobj = $c->reading( $target );
+               my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
+               my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
+               unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
+                       push( @$mustdrop, $self->_drop_collations( $source ) );
+                       push( @$mustdrop, $self->_drop_collations( $target ) );
+                       if( $c->end->has_rank ) {
+                               foreach my $rk ( $sourcerank .. $targetrank ) {
+                                       map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
+                                               $c->readings_at_rank( $rk );
+                               }
+                       }
                }
-               foreach my $k ( keys %pr_ids ) {
-                       return( 0, "Relationship would create witness loop" )
-                               if exists $all_pred{$k} || exists $all_succ{$k};
+               unless( $self->test_equivalence( $source, $target ) ) {
+                       $self->_restore_collations( @$mustdrop );
+                       return( 0, "Relationship would create witness loop" );
                }
                return ( 1, "ok" );
        }
@@ -415,10 +668,60 @@ sub relationship_valid {
 
 sub _drop_collations {
        my( $self, $reading ) = @_;
+       my @dropped;
        foreach my $n ( $self->graph->neighbors( $reading ) ) {
                if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
+                       push( @dropped, [ $reading, $n ] );
                        $self->del_relationship( $reading, $n );
+                       #print STDERR "Dropped collation $reading -> $n\n";
+               }
+       }
+       return @dropped;
+}
+
+sub _restore_collations {
+       my( $self, @vectors ) = @_;
+       foreach my $v ( @vectors ) {
+               try {
+                       $self->add_relationship( @$v, { 'type' => 'collated' } );
+                       #print STDERR "Restored collation @$v\n";
+               } catch {
+                       print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
+               }
+       }
+}
+
+=head2 filter_collations()
+
+Utility function. Removes any redundant 'collated' relationships from the graph.
+A collated relationship is redundant if the readings in question would occupy
+the same rank regardless of the existence of the relationship.
+
+=cut
+
+sub filter_collations {
+       my $self = shift;
+       my $c = $self->collation;
+       foreach my $r ( 1 .. $c->end->rank - 1 ) {
+               my $anchor;
+               my @need_collations;
+               foreach my $rdg ( $c->readings_at_rank( $r ) ) {
+                       next if $rdg->is_meta;
+                       my $ip = 0;
+                       foreach my $pred ( $rdg->predecessors ) {
+                               if( $pred->rank == $r - 1 ) {
+                                       $ip = 1;
+                                       $anchor = $rdg unless( $anchor );
+                                       last;
+                               }
+                       }
+                       push( @need_collations, $rdg ) unless $ip;
+                       $c->relations->_drop_collations( "$rdg" );
                }
+               $anchor
+                       ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
+                                               unless $c->get_relationship( $anchor, $_ ) } @need_collations
+                       : warn "No anchor found at $r";
        }
 }
 
@@ -442,6 +745,9 @@ sub related_readings {
                # Backwards compat
                if( $filter eq 'colocated' ) {
                        $filter = sub { $_[0]->colocated };
+               } elsif( !ref( $filter ) ) {
+                       my $type = $filter;
+                       $filter = sub { $_[0]->type eq $type };
                }
                my %found = ( $reading => 1 );
                my $check = [ $reading ];
@@ -497,9 +803,348 @@ sub merge_readings {
                $rel = $self->get_relationship( @$edge );
                $self->_set_relationship( $rel, @vector );
        }
-       $self->delete_reading( $deleted );
+       $self->_make_equivalence( $deleted, $kept );
+}
+
+### Equivalence logic
+
+sub _remove_equivalence_node {
+       my( $self, $node ) = @_;
+       my $group = $self->equivalence( $node );
+       my $nodelist = $self->eqreadings( $group );
+       if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
+               $self->equivalence_graph->delete_vertex( $group );
+               $self->remove_eqreadings( $group );
+               $self->remove_equivalence( $group );
+       } elsif( @$nodelist == 1 ) {
+               throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
+                       " in group that should have only $node" );
+       } else {
+               my @newlist = grep { $_ ne $node } @$nodelist;
+               $self->set_eqreadings( $group, \@newlist );
+               $self->remove_equivalence( $node );
+       }
 }
 
+=head2 add_equivalence_edge
+
+Add an edge in the equivalence graph corresponding to $source -> $target in the
+collation. Should only be called by Collation.
+
+=cut
+
+sub add_equivalence_edge {
+       my( $self, $source, $target ) = @_;
+       my $seq = $self->equivalence( $source );
+       my $teq = $self->equivalence( $target );
+       $self->equivalence_graph->add_edge( $seq, $teq );
+}
+
+=head2 delete_equivalence_edge
+
+Remove an edge in the equivalence graph corresponding to $source -> $target in the
+collation. Should only be called by Collation.
+
+=cut
+
+sub delete_equivalence_edge {
+       my( $self, $source, $target ) = @_;
+       my $seq = $self->equivalence( $source );
+       my $teq = $self->equivalence( $target );
+       $self->equivalence_graph->delete_edge( $seq, $teq );
+}
+
+sub _is_disconnected {
+       my $self = shift;
+       return( scalar $self->equivalence_graph->predecessorless_vertices > 1
+               || scalar $self->equivalence_graph->successorless_vertices > 1 );
+}
+
+# Equate two readings in the equivalence graph
+sub _make_equivalence {
+       my( $self, $source, $target ) = @_;
+       # Get the source equivalent readings
+       my $seq = $self->equivalence( $source );
+       my $teq = $self->equivalence( $target );
+       # Nothing to do if they are already equivalent...
+       return if $seq eq $teq;
+       my $sourcepool = $self->eqreadings( $seq );
+       # and add them to the target readings.
+       push( @{$self->eqreadings( $teq )}, @$sourcepool );
+       map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
+       # Then merge the nodes in the equivalence graph.
+       foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
+               $self->equivalence_graph->add_edge( $pred, $teq );
+       }
+       foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
+               $self->equivalence_graph->add_edge( $teq, $succ );
+       }
+       $self->equivalence_graph->delete_vertex( $seq );
+       # TODO enable this after collation parsing is done
+       throw( "Graph got disconnected making $source / $target equivalence" )
+               if $self->_is_disconnected && $self->collation->tradition->_initialized;
+}
+
+=head2 test_equivalence
+
+Test whether, if two readings were equated with a 'colocated' relationship, 
+the graph would still be valid.
+
+=cut
+
+sub test_equivalence {
+       my( $self, $source, $target ) = @_;
+       # Try merging the nodes in the equivalence graph; return a true value if
+       # no cycle is introduced thereby. Restore the original graph first.
+       
+       # Keep track of edges we add
+       my %added_pred;
+       my %added_succ;
+       # Get the reading equivalents
+       my $seq = $self->equivalence( $source );
+       my $teq = $self->equivalence( $target );
+       # Maybe this is easy?
+       return 1 if $seq eq $teq;
+       
+       # Save the first graph
+       my $checkstr = $self->equivalence_graph->stringify();
+       # Add and save relevant edges
+       foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
+               if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
+                       $added_pred{$pred} = 0;
+               } else {
+                       $self->equivalence_graph->add_edge( $pred, $teq );
+                       $added_pred{$pred} = 1;
+               }
+       }
+       foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
+               if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
+                       $added_succ{$succ} = 0;
+               } else {
+                       $self->equivalence_graph->add_edge( $teq, $succ );
+                       $added_succ{$succ} = 1;
+               }
+       }
+       # Delete source equivalent and test
+       $self->equivalence_graph->delete_vertex( $seq );
+       my $ret = !$self->equivalence_graph->has_a_cycle;
+       
+       # Restore what we changed
+       $self->equivalence_graph->add_vertex( $seq );
+       foreach my $pred ( keys %added_pred ) {
+               $self->equivalence_graph->add_edge( $pred, $seq );
+               $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
+       }
+       foreach my $succ ( keys %added_succ ) {
+               $self->equivalence_graph->add_edge( $seq, $succ );
+               $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
+       }
+       unless( $self->equivalence_graph->eq( $checkstr ) ) {
+               warn "GRAPH CHANGED after testing";
+       }
+       # Return our answer
+       return $ret;
+}
+
+# Unmake an equivalence link between two readings. Should only be called internally.
+sub _break_equivalence {
+       my( $self, $source, $target ) = @_;
+       
+       # This is the hard one. Need to reconstruct the equivalence groups without
+       # the given link.
+       my( %sng, %tng );
+       map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
+       map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
+       # If these groups intersect, they are still connected; do nothing.
+       foreach my $el ( keys %tng ) {
+               return if( exists $sng{$el} );
+       }
+       # If they don't intersect, then we split the nodes in the graph and in
+       # the hashes. First figure out which group has which name
+       my $oldgroup = $self->equivalence( $source ); # same as $target
+       my $keepsource = $sng{$oldgroup};
+       my $newgroup = $keepsource ? $target : $source;
+       my( $oldmembers, $newmembers );
+       if( $keepsource ) {
+               $oldmembers = [ keys %sng ];
+               $newmembers = [ keys %tng ];
+       } else {
+               $oldmembers = [ keys %tng ];
+               $newmembers = [ keys %sng ];
+       }
+               
+       # First alter the old group in the hash
+       $self->set_eqreadings( $oldgroup, $oldmembers );
+       foreach my $el ( @$oldmembers ) {
+               $self->set_equivalence( $el, $oldgroup );
+       }
+       
+       # then add the new group back to the hash with its new key
+       $self->set_eqreadings( $newgroup, $newmembers );
+       foreach my $el ( @$newmembers ) {
+               $self->set_equivalence( $el, $newgroup );
+       }
+       
+       # Now add the new group back to the equivalence graph
+       $self->equivalence_graph->add_vertex( $newgroup );
+       # ...add the appropriate edges to the source group vertext
+       my $c = $self->collation;
+       foreach my $rdg ( @$newmembers ) {
+               foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
+                       next unless $self->equivalence( $rp );
+                       $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
+               }
+               foreach my $rs ( $c->sequence->successors( $rdg ) ) {
+                       next unless $self->equivalence( $rs );
+                       $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
+               }
+       }
+       
+       # ...and figure out which edges on the old group vertex to delete.
+       my( %old_pred, %old_succ );
+       foreach my $rdg ( @$oldmembers ) {
+               foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
+                       next unless $self->equivalence( $rp );
+                       $old_pred{$self->equivalence( $rp )} = 1;
+               }
+               foreach my $rs ( $c->sequence->successors( $rdg ) ) {
+                       next unless $self->equivalence( $rs );
+                       $old_succ{$self->equivalence( $rs )} = 1;
+               }
+       }
+       foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
+               unless( $old_pred{$p} ) {
+                       $self->equivalence_graph->delete_edge( $p, $oldgroup );
+               }
+       }
+       foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
+               unless( $old_succ{$s} ) {
+                       $self->equivalence_graph->delete_edge( $oldgroup, $s );
+               }
+       }
+       # TODO enable this after collation parsing is done
+       throw( "Graph got disconnected breaking $source / $target equivalence" )
+               if $self->_is_disconnected && $self->collation->tradition->_initialized;
+}
+
+sub _find_equiv_without {
+       my( $self, $first, $second ) = @_;
+       my %found = ( $first => 1 );
+       my $check = [ $first ];
+       my $iter = 0;
+       while( @$check ) {
+               my $more = [];
+               foreach my $r ( @$check ) {
+                       foreach my $nr ( $self->graph->neighbors( $r ) ) {
+                               next if $r eq $second;
+                               if( $self->get_relationship( $r, $nr )->colocated ) {
+                                       push( @$more, $nr ) unless exists $found{$nr};
+                                       $found{$nr} = 1;
+                               }
+                       }
+               }
+               $check = $more;
+       }
+       return keys %found;
+}
+
+=head2 rebuild_equivalence
+
+(Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
+adds all readings and edges, then makes an equivalence for all relationships.
+
+=cut
+
+sub rebuild_equivalence {
+       my $self = shift;
+       my $newgraph = Graph->new();
+       # Set this as the new equivalence graph
+       $self->_reset_equivalence( $newgraph );
+       # Clear out the data hashes
+       $self->_clear_equivalence;
+       $self->_clear_eqreadings;
+       
+       $self->collation->tradition->_init_done(0);
+       # Add the readings
+       foreach my $r ( $self->collation->readings ) {
+               my $rid = $r->id;
+               $newgraph->add_vertex( $rid );
+               $self->set_equivalence( $rid, $rid );
+               $self->set_eqreadings( $rid, [ $rid ] );
+       }
+
+       # Now add the edges
+       foreach my $e ( $self->collation->paths ) {
+               $self->add_equivalence_edge( @$e );
+       }
+
+       # Now equate the colocated readings. This does no testing; 
+       # it assumes that all preexisting relationships are valid.
+       foreach my $rel ( $self->relationships ) {
+               my $relobj = $self->get_relationship( $rel );
+               next unless $relobj && $relobj->colocated;
+               $self->_make_equivalence( @$rel );
+       }
+       $self->collation->tradition->_init_done(1);
+}
+
+=head2 equivalence_ranks 
+
+Rank all vertices in the equivalence graph, and return a hash reference with
+vertex => rank mapping.
+
+=cut
+
+sub equivalence_ranks {
+       my $self = shift;
+       my $eqstart = $self->equivalence( $self->collation->start );
+       my $eqranks = { $eqstart => 0 };
+       my $rankeqs = { 0 => [ $eqstart ] };
+       my @curr_origin = ( $eqstart );
+    # A little iterative function.
+    while( @curr_origin ) {
+        @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
+    }
+       return( $eqranks, $rankeqs );
+}
+
+sub _assign_rank {
+    my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
+    my $graph = $self->equivalence_graph;
+    # Look at each of the children of @current_nodes.  If all the child's 
+    # parents have a rank, assign it the highest rank + 1 and add it to 
+    # @next_nodes.  Otherwise skip it; we will return when the highest-ranked
+    # parent gets a rank.
+    my @next_nodes;
+    foreach my $c ( @current_nodes ) {
+        warn "Current reading $c has no rank!"
+            unless exists $node_ranks->{$c};
+        foreach my $child ( $graph->successors( $c ) ) {
+            next if exists $node_ranks->{$child};
+            my $highest_rank = -1;
+            my $skip = 0;
+            foreach my $parent ( $graph->predecessors( $child ) ) {
+                if( exists $node_ranks->{$parent} ) {
+                    $highest_rank = $node_ranks->{$parent} 
+                        if $highest_rank <= $node_ranks->{$parent};
+                } else {
+                    $skip = 1;
+                    last;
+                }
+            }
+            next if $skip;
+            my $c_rank = $highest_rank + 1;
+            # print STDERR "Assigning rank $c_rank to node $child \n";
+            $node_ranks->{$child} = $c_rank if $node_ranks;
+            push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
+            push( @next_nodes, $child );
+        }
+    }
+    return @next_nodes;
+}
+
+### Output logic
+
 sub _as_graphml { 
        my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
        
@@ -507,23 +1152,27 @@ sub _as_graphml {
        $rgraph->setAttribute( 'edgedefault', 'directed' );
     $rgraph->setAttribute( 'id', 'relationships', );
     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
-    $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
+    $rgraph->setAttribute( 'parse.edges', 0 );
     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
-    $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
+    $rgraph->setAttribute( 'parse.nodes', 0 );
     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
     
     # Add the vertices according to their XML IDs
     my %rdg_lookup = ( reverse %$node_hash );
-    foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
+    # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
+    my @nlist = sort keys( %rdg_lookup );
+    foreach my $n ( @nlist ) {
        my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
        $n_el->setAttribute( 'id', $n );
        _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
     }
+       $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
     
     # Add the relationship edges, with their object information
     my $edge_ctr = 0;
     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
        # Add an edge and fill in its relationship info.
+       next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
                my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
                $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
                $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
@@ -536,6 +1185,7 @@ sub _as_graphml {
                                if defined $value;
                }
        }
+       $rgraph->setAttribute( 'parse.edges', $edge_ctr );
 }
 
 sub _by_xmlid {