X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation%2FRelationshipStore.pm;h=2f8784c208c176e9db3991f20235ece854395aaf;hb=0a90079324146b05a46fadc49999d423e7d93db3;hp=39bbdc003e32e6fc841a6a2f17e2bb2859246509;hpb=cf6c01be953b7740c93a9403e0bd1dc57606e2dd;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index 39bbdc0..2f8784c 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -2,13 +2,16 @@ package Text::Tradition::Collation::RelationshipStore; use strict; use warnings; +use Text::Tradition::Error; use Text::Tradition::Collation::Relationship; +use TryCatch; use Moose; =head1 NAME -Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation. +Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships +between readings in a given collation =head1 DESCRIPTION @@ -17,6 +20,43 @@ texts, particularly medieval ones. The RelationshipStore is an internal object of the collation, to keep track of the defined relationships (both specific and general) between readings. +=begin testing + +use Text::Tradition; +use TryCatch; + +use_ok( 'Text::Tradition::Collation::RelationshipStore' ); + +# Add some relationships, and delete them + +my $cxfile = 't/data/Collatex-16.xml'; +my $t = Text::Tradition->new( + 'name' => 'inline', + 'input' => 'CollateX', + 'file' => $cxfile, + ); +my $c = $t->collation; + +my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } ); +is( scalar @v1, 1, "Added a single relationship" ); +is( $v1[0]->[0], 'n21', "Got correct node 1" ); +is( $v1[0]->[1], 'n22', "Got correct node 2" ); +my @v2 = $c->add_relationship( 'n9', 'n23', + { 'type' => 'spelling', 'scope' => 'global' } ); +is( scalar @v2, 2, "Added a global relationship with two instances" ); +@v1 = $c->del_relationship( 'n22', 'n21' ); +is( scalar @v1, 1, "Deleted first relationship" ); +@v2 = $c->del_relationship( 'n8', 'n13' ); +is( scalar @v2, 2, "Deleted second global relationship" ); +try { + my @v3 = $c->del_relationship( 'n1', 'n2' ); + ok( 0, "Should have errored on non-existent relationship" ); +} catch( Text::Tradition::Error $e ) { + like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" ); +} + +=end testing + =head1 METHODS =head2 new( collation => $collation ); @@ -49,6 +89,35 @@ has 'graph' => ( }, ); +=head2 get_relationship + +Return the relationship object, if any, that exists between two readings. + +=cut + +sub get_relationship { + my $self = shift; + my @vector; + if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) { + # Dereference the edge arrayref that was passed. + my $edge = shift; + @vector = @$edge; + } else { + @vector = @_; + } + my $relationship; + if( $self->graph->has_edge_attribute( @vector, 'object' ) ) { + $relationship = $self->graph->get_edge_attribute( @vector, 'object' ); + } + return $relationship; +} + +sub _set_relationship { + my( $self, $relationship, @vector ) = @_; + $self->graph->add_edge( @vector ); + $self->graph->set_edge_attribute( @vector, 'object', $relationship ); +} + =head2 create Create a new relationship with the given options and return it. @@ -61,13 +130,15 @@ sub create { # Check to see if a relationship exists between the two given readings my $source = delete $options->{'orig_a'}; my $target = delete $options->{'orig_b'}; - my $rel; - if( $self->graph->has_edge( $source, $target ) ) { - $rel = $self->graph->get_edge_attribute( $source, $target, 'object' ); - if( $rel->type ne $options->type ) { - warn "Relationship of type " . $rel->type - . "already exists between $source and $target"; - return; + my $rel = $self->get_relationship( $source, $target ); + if( $rel ) { + if( $rel->type eq 'collated' ) { + # Always replace a 'collated' relationship with a more descriptive + # one, if asked. + $self->del_relationship( $source, $target ); + } elsif( $rel->type ne $options->{'type'} ) { + throw( "Another relationship of type " . $rel->type + . " already exists between $source and $target" ); } else { return $rel; } @@ -79,8 +150,7 @@ sub create { if( $rel && $rel->type eq $options->{'type'} ) { return $rel; } elsif( $rel ) { - warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ); - return; + throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) ); } else { $rel = Text::Tradition::Collation::Relationship->new( $options ); $self->add_scoped_relationship( $rel ) if $rel->nonlocal; @@ -138,26 +208,43 @@ add_relationship. sub add_relationship { my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_; - # Check the options - $options->{'scope'} = 'local' unless $options->{'scope'}; - - my( $is_valid, $reason ) = - $self->relationship_valid( $source, $target, $options->{'type'} ); - unless( $is_valid ) { - return ( undef, $reason ); + my $relationship; + my $thispaironly; + if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) { + $relationship = $options; + $thispaironly = 1; # If existing rel, set only where asked. + } else { + # Check the options + $options->{'scope'} = 'local' unless $options->{'scope'}; + + my( $is_valid, $reason ) = + $self->relationship_valid( $source, $target, $options->{'type'} ); + unless( $is_valid ) { + throw( "Invalid relationship: $reason" ); + } + + # Try to create the relationship object. + $options->{'reading_a'} = $source_rdg->text; + $options->{'reading_b'} = $target_rdg->text; + $options->{'orig_a'} = $source; + $options->{'orig_b'} = $target; + if( $options->{'scope'} ne 'local' ) { + # Is there a relationship with this a & b already? + my $otherrel = $self->scoped_relationship( $options->{reading_a}, + $options->{reading_b} ); + if( $otherrel && $otherrel->type eq $options->{type} + && $otherrel->scope eq $options->{scope} ) { + warn "Applying existing scoped relationship"; + $relationship = $otherrel; + } + } + $relationship = $self->create( $options ) unless $relationship; # Will throw on error } - - # Try to create the relationship object. - $options->{'reading_a'} = $source_rdg->text; - $options->{'reading_b'} = $target_rdg->text; - $options->{'orig_a'} = $source; - $options->{'orig_b'} = $target; - my $relationship = $self->create( $options ); - return( undef, "Relationship creation failed" ) unless $relationship; + # Find all the pairs for which we need to set the relationship. my @vectors = ( [ $source, $target ] ); - if( $relationship->colocated && $relationship->nonlocal ) { + if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) { my $c = $self->collation; # Set the same relationship everywhere we can, throughout the graph. my @identical_readings = grep { $_->text eq $relationship->reading_a } @@ -181,25 +268,52 @@ sub add_relationship { # Now set the relationship(s). my @pairs_set; foreach my $v ( @vectors ) { - if( $self->graph->has_edge( @$v ) ) { - # Is it locally scoped? - my $rel = $self->graph->get_edge_attribute( @$v ); + my $rel = $self->get_relationship( @$v ); + if( $rel && $rel ne $relationship ) { if( $rel->nonlocal ) { - # TODO I think we should not be able to get here. - warn "Found conflicting relationship at @$v"; + throw( "Found conflicting relationship at @$v" ); } else { warn "Not overriding local relationship set at @$v"; - next; } + next; } - $self->graph->add_edge( @$v ); - $self->graph->set_edge_attribute( @$v, 'object', $relationship ); + $self->_set_relationship( $relationship, @$v ); push( @pairs_set, $v ); } - return( 1, @pairs_set ); + return @pairs_set; +} + +=head2 del_relationship( $source, $target ) + +Removes the relationship between the given readings. If the relationship is +non-local, removes the relationship everywhere in the graph. + +=cut + +sub del_relationship { + my( $self, $source, $target ) = @_; + my $rel = $self->get_relationship( $source, $target ); + throw( "No relationship defined between $source and $target" ) unless $rel; + my @vectors = ( [ $source, $target ] ); + $self->_remove_relationship( $source, $target ); + if( $rel->nonlocal ) { + # Remove the relationship wherever it occurs. + my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel } + $self->relationships; + foreach my $re ( @rel_edges ) { + $self->_remove_relationship( @$re ); + push( @vectors, $re ); + } + } + return @vectors; } +sub _remove_relationship { + my( $self, @vector ) = @_; + $self->graph->delete_edge( @vector ); +} + =head2 relationship_valid( $source, $target, $type ) Checks whether a relationship of type $type may exist between the readings given @@ -214,6 +328,7 @@ sub relationship_valid { if ( $rel eq 'transposition' || $rel eq 'repetition' ) { # Check that the two readings do (for a repetition) or do not (for # a transposition) appear in the same witness. + # TODO this might be called before witness paths are set... my %seen_wits; map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source ); foreach my $w ( $c->reading_witnesses( $target ) ) { @@ -227,7 +342,13 @@ sub relationship_valid { } } else { # Check that linking the source and target in a relationship won't lead - # to a path loop for any witness. First make a lookup table of all the + # to a path loop for any witness. If they have the same rank then fine. + return( 1, "ok" ) + if $c->reading( $source )->has_rank + && $c->reading( $target )->has_rank + && $c->reading( $source )->rank == $c->reading( $target )->rank; + + # Otherwise, first make a lookup table of all the # readings related to either the source or the target. my @proposed_related = ( $source, $target ); push( @proposed_related, $self->related_readings( $source, 'colocated' ) ); @@ -235,47 +356,72 @@ sub relationship_valid { my %pr_ids; map { $pr_ids{ $_ } = 1 } @proposed_related; - # None of these proposed related readings should have a neighbor that - # is also in proposed_related. + # The cumulative predecessors and successors of the proposed-related readings + # should not overlap. + my %all_pred; + my %all_succ; foreach my $pr ( keys %pr_ids ) { - foreach my $neighbor( $c->sequence->neighbors( $pr ) ) { - return( 0, "Would relate neighboring readings $pr and $neighbor" ) - if exists $pr_ids{$neighbor}; - } - } + map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr ); + map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr ); + } + foreach my $k ( keys %all_pred ) { + return( 0, "Relationship would create witness loop" ) + if exists $all_succ{$k}; + } + foreach my $k ( keys %pr_ids ) { + return( 0, "Relationship would create witness loop" ) + if exists $all_pred{$k} || exists $all_succ{$k}; + } return ( 1, "ok" ); } } -=head2 related_readings( $reading, $colocated_only ) +=head2 related_readings( $reading, $filter ) Returns a list of readings that are connected via relationship links to $reading. -If $colocated_only is true, restricts the list to those readings that are in the -same logical location (and therefore have the same rank in the collation graph.) +If $filter is set to a subroutine ref, returns only those related readings where +$filter( $relationship ) returns a true value. =cut sub related_readings { - my( $self, $reading, $colocated ) = @_; + my( $self, $reading, $filter ) = @_; my $return_object; if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) { $reading = $reading->id; $return_object = 1; } - my @related = $self->graph->all_reachable( $reading ); - if( $colocated ) { - my @colo; - foreach my $r ( @related ) { - my $obj = $self->graph->get_edge_attribute( $reading, $r, 'object' ); - push( @colo, $r ) if $obj->colocated; + my @answer; + if( $filter ) { + # Backwards compat + if( $filter eq 'colocated' ) { + $filter = sub { $_[0]->colocated }; } - @related = @colo; + my %found = ( $reading => 1 ); + my $check = [ $reading ]; + my $iter = 0; + while( @$check ) { + my $more = []; + foreach my $r ( @$check ) { + foreach my $nr ( $self->graph->neighbors( $r ) ) { + if( &$filter( $self->get_relationship( $r, $nr ) ) ) { + push( @$more, $nr ) unless exists $found{$nr}; + $found{$nr} = 1; + } + } + } + $check = $more; + } + delete $found{$reading}; + @answer = keys %found; + } else { + @answer = $self->graph->all_reachable( $reading ); } if( $return_object ) { my $c = $self->collation; - return map { $c->reading( $_ ) } @related; + return map { $c->reading( $_ ) } @answer; } else { - return @related; + return @answer; } } @@ -298,19 +444,82 @@ sub merge_readings { next if $combined; # If kept / rel already has a relationship, warn and keep the old - if( $self->graph->has_edge( @vector ) ) { + my $rel = $self->get_relationship( @vector ); + if( $rel ) { warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted ); next; } # Otherwise, adopt the relationship that would be deleted. - my $rel = $self->graph->get_edge_attribute( @$edge, 'object' ); - $self->graph->add_edge( @vector ); - $self->graph->set_edge_attribute( @vector, 'object', $rel ); + $rel = $self->get_relationship( @$edge ); + $self->_set_relationship( $rel, @vector ); } $self->delete_reading( $deleted ); } +sub _as_graphml { + my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_; + + my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' ); + $rgraph->setAttribute( 'edgedefault', 'directed' ); + $rgraph->setAttribute( 'id', 'relationships', ); + $rgraph->setAttribute( 'parse.edgeids', 'canonical' ); + $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) ); + $rgraph->setAttribute( 'parse.nodeids', 'canonical' ); + $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) ); + $rgraph->setAttribute( 'parse.order', 'nodesfirst' ); + + # Add the vertices according to their XML IDs + my %rdg_lookup = ( reverse %$node_hash ); + foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) { + my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' ); + $n_el->setAttribute( 'id', $n ); + _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} ); + } + + # Add the relationship edges, with their object information + my $edge_ctr = 0; + foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) { + # Add an edge and fill in its relationship info. + my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' ); + $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} ); + $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} ); + $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ ); + + my $rel_obj = $self->get_relationship( @$e ); + _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type ); + _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope ); + _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation ); + _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, + $rel_obj->non_correctable ) if $rel_obj->noncorr_set; + _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, + $rel_obj->non_independent ) if $rel_obj->nonind_set; + } +} + +sub _by_xmlid { + my $tmp_a = $a; + my $tmp_b = $b; + $tmp_a =~ s/\D//g; + $tmp_b =~ s/\D//g; + return $tmp_a <=> $tmp_b; +} + +sub _add_graphml_data { + my( $el, $key, $value ) = @_; + return unless defined $value; + my $data_el = $el->addNewChild( $el->namespaceURI, 'data' ); + $data_el->setAttribute( 'key', $key ); + $data_el->appendText( $value ); +} + +sub throw { + Text::Tradition::Error->throw( + 'ident' => 'Relationship error', + 'message' => $_[0], + ); +} + no Moose; __PACKAGE__->meta->make_immutable;