abandon 'collated' relationship removal
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
index 39bbdc0..3328b96 100644 (file)
@@ -2,13 +2,16 @@ package Text::Tradition::Collation::RelationshipStore;
 
 use strict;
 use warnings;
+use Text::Tradition::Error;
 use Text::Tradition::Collation::Relationship;
+use TryCatch;
 
 use Moose;
 
 =head1 NAME
 
-Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
+Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
+between readings in a given collation
     
 =head1 DESCRIPTION
 
@@ -17,6 +20,43 @@ texts, particularly medieval ones.  The RelationshipStore is an internal object
 of the collation, to keep track of the defined relationships (both specific and
 general) between readings.
 
+=begin testing
+
+use Text::Tradition;
+use TryCatch;
+
+use_ok( 'Text::Tradition::Collation::RelationshipStore' );
+
+# Add some relationships, and delete them
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new( 
+    'name'  => 'inline', 
+    'input' => 'CollateX',
+    'file'  => $cxfile,
+    );
+my $c = $t->collation;
+
+my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
+is( scalar @v1, 1, "Added a single relationship" );
+is( $v1[0]->[0], 'n21', "Got correct node 1" );
+is( $v1[0]->[1], 'n22', "Got correct node 2" );
+my @v2 = $c->add_relationship( 'n9', 'n23', 
+       { 'type' => 'spelling', 'scope' => 'global' } );
+is( scalar @v2, 2, "Added a global relationship with two instances" );
+@v1 = $c->del_relationship( 'n22', 'n21' );
+is( scalar @v1, 1, "Deleted first relationship" );
+@v2 = $c->del_relationship( 'n8', 'n13' );
+is( scalar @v2, 2, "Deleted second global relationship" );
+try {
+       my @v3 = $c->del_relationship( 'n1', 'n2' );
+       ok( 0, "Should have errored on non-existent relationship" );
+} catch( Text::Tradition::Error $e ) {
+       like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
+}
+
+=end testing
+
 =head1 METHODS
 
 =head2 new( collation => $collation );
@@ -46,9 +86,58 @@ has 'graph' => (
        relationships => 'edges',
        add_reading => 'add_vertex',
        delete_reading => 'delete_vertex',
+       delete_relationship => 'delete_edge',
     },
        );
        
+around 'delete_relationship' => sub {
+       my $orig = shift;
+       my $self = shift;
+       my @vector;
+       if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
+               # Dereference the edge arrayref that was passed.
+               my $edge = shift;
+               @vector = @$edge;
+       } else {
+               @vector = @_;
+       }
+       return $self->$orig( @vector );
+};
+       
+=head2 get_relationship
+
+Return the relationship object, if any, that exists between two readings.
+
+=cut
+
+sub get_relationship {
+       my $self = shift;
+       my @vector;
+       if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
+               # Dereference the edge arrayref that was passed.
+               my $edge = shift;
+               @vector = @$edge;
+       } else {
+               @vector = @_;
+       }
+       my $relationship;
+       if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
+               $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
+       }
+       return $relationship;
+}
+
+sub _set_relationship {
+       my( $self, $relationship, @vector ) = @_;
+       $self->graph->add_edge( @vector );
+       $self->graph->set_edge_attribute( @vector, 'object', $relationship );
+}
+
+sub _remove_relationship {
+       my( $self, @vector ) = @_;
+       $self->graph->delete_edge( @vector );
+}
+       
 =head2 create
 
 Create a new relationship with the given options and return it.
@@ -61,13 +150,15 @@ sub create {
        # Check to see if a relationship exists between the two given readings
        my $source = delete $options->{'orig_a'};
        my $target = delete $options->{'orig_b'};
-       my $rel;
-       if( $self->graph->has_edge( $source, $target ) ) {
-               $rel = $self->graph->get_edge_attribute( $source, $target, 'object' );
-               if( $rel->type ne $options->type ) {
-                       warn "Relationship of type " . $rel->type 
-                               . "already exists between $source and $target";
-                       return;
+       my $rel = $self->get_relationship( $source, $target );
+       if( $rel ) {
+               if( $rel->type eq 'collated' ) {
+                       # Always replace a 'collated' relationship with a more descriptive
+                       # one, if asked.
+                       $self->del_relationship( $source, $target );
+               } elsif( $rel->type ne $options->{'type'} ) {
+                       throw( "Another relationship of type " . $rel->type 
+                               . " already exists between $source and $target" );
                } else {
                        return $rel;
                }
@@ -79,8 +170,7 @@ sub create {
        if( $rel && $rel->type eq $options->{'type'} ) {
                return $rel;
        } elsif( $rel ) {
-               warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} );
-               return;
+               throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
        } else {
                $rel = Text::Tradition::Collation::Relationship->new( $options );
                $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
@@ -144,7 +234,7 @@ sub add_relationship {
        my( $is_valid, $reason ) = 
                $self->relationship_valid( $source, $target, $options->{'type'} );
     unless( $is_valid ) {
-        return ( undef, $reason );
+        throw( "Invalid relationship: $reason" );
     }
     
     # Try to create the relationship object.
@@ -152,8 +242,7 @@ sub add_relationship {
     $options->{'reading_b'} = $target_rdg->text;
     $options->{'orig_a'} = $source;
     $options->{'orig_b'} = $target;
-    my $relationship = $self->create( $options );
-       return( undef, "Relationship creation failed" ) unless $relationship;
+    my $relationship = $self->create( $options );  # Will throw on error
 
        # Find all the pairs for which we need to set the relationship.
        my @vectors = ( [ $source, $target ] ); 
@@ -181,23 +270,45 @@ sub add_relationship {
     # Now set the relationship(s).
     my @pairs_set;
     foreach my $v ( @vectors ) {
-       if( $self->graph->has_edge( @$v ) ) {
-               # Is it locally scoped?
-               my $rel = $self->graph->get_edge_attribute( @$v );
+               my $rel = $self->get_relationship( @$v );
+       if( $rel ) {
                if( $rel->nonlocal ) {
-                       # TODO I think we should not be able to get here.
-                       warn "Found conflicting relationship at @$v";
+                       throw( "Found conflicting relationship at @$v" );
                } else {
                        warn "Not overriding local relationship set at @$v";
-                       next;
                }
+               next;
        }
-       $self->graph->add_edge( @$v );
-       $self->graph->set_edge_attribute( @$v, 'object', $relationship );
+       $self->_set_relationship( $relationship, @$v );
        push( @pairs_set, $v );
     }
     
-    return( 1, @pairs_set );
+    return @pairs_set;
+}
+
+=head2 del_relationship( $source, $target )
+
+Removes the relationship between the given readings. If the relationship is
+non-local, removes the relationship everywhere in the graph.
+
+=cut
+
+sub del_relationship {
+       my( $self, $source, $target ) = @_;
+       my $rel = $self->get_relationship( $source, $target );
+       throw( "No relationship defined between $source and $target" ) unless $rel;
+       my @vectors = ( [ $source, $target ] );
+       $self->_remove_relationship( $source, $target );
+       if( $rel->nonlocal ) {
+               # Remove the relationship wherever it occurs.
+               my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
+                       $self->relationships;
+               foreach my $re ( @rel_edges ) {
+                       $self->_remove_relationship( @$re );
+                       push( @vectors, $re );
+               }
+       }
+       return @vectors;
 }
 
 =head2 relationship_valid( $source, $target, $type )
@@ -214,6 +325,7 @@ sub relationship_valid {
     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
                # Check that the two readings do (for a repetition) or do not (for
                # a transposition) appear in the same witness.
+               # TODO this might be called before witness paths are set...
                my %seen_wits;
                map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
                foreach my $w ( $c->reading_witnesses( $target ) ) {
@@ -227,7 +339,13 @@ sub relationship_valid {
                }
        } else {
                # Check that linking the source and target in a relationship won't lead
-               # to a path loop for any witness.  First make a lookup table of all the
+               # to a path loop for any witness.  If they have the same rank then fine.
+               return( 1, "ok" ) 
+                       if $c->reading( $source )->has_rank
+                               && $c->reading( $target )->has_rank
+                               && $c->reading( $source )->rank == $c->reading( $target )->rank;
+               
+               # Otherwise, first make a lookup table of all the
                # readings related to either the source or the target.
                my @proposed_related = ( $source, $target );
                push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
@@ -235,47 +353,72 @@ sub relationship_valid {
                my %pr_ids;
                map { $pr_ids{ $_ } = 1 } @proposed_related;
        
-               # None of these proposed related readings should have a neighbor that
-               # is also in proposed_related.
+               # The cumulative predecessors and successors of the proposed-related readings
+               # should not overlap.
+               my %all_pred;
+               my %all_succ;
                foreach my $pr ( keys %pr_ids ) {
-                       foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
-                               return( 0, "Would relate neighboring readings $pr and $neighbor" )
-                                       if exists $pr_ids{$neighbor};
-                       }
-               }               
+                       map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
+                       map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
+               }
+               foreach my $k ( keys %all_pred ) {
+                       return( 0, "Relationship would create witness loop" )
+                               if exists $all_succ{$k};
+               }
+               foreach my $k ( keys %pr_ids ) {
+                       return( 0, "Relationship would create witness loop" )
+                               if exists $all_pred{$k} || exists $all_succ{$k};
+               }
                return ( 1, "ok" );
        }
 }
 
-=head2 related_readings( $reading, $colocated_only )
+=head2 related_readings( $reading, $filter )
 
 Returns a list of readings that are connected via relationship links to $reading.
-If $colocated_only is true, restricts the list to those readings that are in the
-same logical location (and therefore have the same rank in the collation graph.)
+If $filter is set to a subroutine ref, returns only those related readings where
+$filter( $relationship ) returns a true value.
 
 =cut
 
 sub related_readings {
-       my( $self, $reading, $colocated ) = @_;
+       my( $self, $reading, $filter ) = @_;
        my $return_object;
        if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
                $reading = $reading->id;
                $return_object = 1;
        }
-       my @related = $self->graph->all_reachable( $reading );
-       if( $colocated ) {
-               my @colo;
-               foreach my $r ( @related ) {
-                       my $obj = $self->graph->get_edge_attribute( $reading, $r, 'object' );
-                       push( @colo, $r ) if $obj->colocated;
+       my @answer;
+       if( $filter ) {
+               # Backwards compat
+               if( $filter eq 'colocated' ) {
+                       $filter = sub { $_[0]->colocated };
+               }
+               my %found = ( $reading => 1 );
+               my $check = [ $reading ];
+               my $iter = 0;
+               while( @$check ) {
+                       my $more = [];
+                       foreach my $r ( @$check ) {
+                               foreach my $nr ( $self->graph->neighbors( $r ) ) {
+                                       if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
+                                               push( @$more, $nr ) unless exists $found{$nr};
+                                               $found{$nr} = 1;
+                                       }
+                               }
+                       }
+                       $check = $more;
                }
-               @related = @colo;
+               delete $found{$reading};
+               @answer = keys %found;
+       } else {
+               @answer = $self->graph->all_reachable( $reading );
        }
        if( $return_object ) {
                my $c = $self->collation;
-               return map { $c->reading( $_ ) } @related;
+               return map { $c->reading( $_ ) } @answer;
        } else {
-               return @related;
+               return @answer;
        }
 }
 
@@ -298,19 +441,82 @@ sub merge_readings {
                next if $combined;
                        
                # If kept / rel already has a relationship, warn and keep the old
-               if( $self->graph->has_edge( @vector ) ) {
+               my $rel = $self->get_relationship( @vector );
+               if( $rel ) {
                        warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
                        next;
                }
                
                # Otherwise, adopt the relationship that would be deleted.
-               my $rel = $self->graph->get_edge_attribute( @$edge, 'object' );
-               $self->graph->add_edge( @vector );
-               $self->graph->set_edge_attribute( @vector, 'object', $rel );
+               $rel = $self->get_relationship( @$edge );
+               $self->_set_relationship( $rel, @vector );
        }
        $self->delete_reading( $deleted );
 }
 
+sub _as_graphml { 
+       my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
+       
+    my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
+       $rgraph->setAttribute( 'edgedefault', 'directed' );
+    $rgraph->setAttribute( 'id', 'relationships', );
+    $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
+    $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
+    $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
+    $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
+    $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
+    
+    # Add the vertices according to their XML IDs
+    my %rdg_lookup = ( reverse %$node_hash );
+    foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
+       my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
+       $n_el->setAttribute( 'id', $n );
+       _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
+    }
+    
+    # Add the relationship edges, with their object information
+    my $edge_ctr = 0;
+    foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
+       # Add an edge and fill in its relationship info.
+               my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
+               $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
+               $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
+               $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
+
+               my $rel_obj = $self->get_relationship( @$e );
+               _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
+               _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
+               _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
+               _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
+                       $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
+               _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
+                       $rel_obj->non_independent ) if $rel_obj->nonind_set;
+       }
+}
+
+sub _by_xmlid {
+       my $tmp_a = $a;
+       my $tmp_b = $b;
+       $tmp_a =~ s/\D//g;
+       $tmp_b =~ s/\D//g;
+       return $tmp_a <=> $tmp_b;
+}
+
+sub _add_graphml_data {
+    my( $el, $key, $value ) = @_;
+    return unless defined $value;
+    my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
+    $data_el->setAttribute( 'key', $key );
+    $data_el->appendText( $value );
+}
+
+sub throw {
+       Text::Tradition::Error->throw( 
+               'ident' => 'Relationship error',
+               'message' => $_[0],
+               );
+}
+
 no Moose;
 __PACKAGE__->meta->make_immutable;