X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation%2FRelationshipStore.pm;h=55d69431927c78ae1ba62f887a89cd24411ef9aa;hb=679f17e1a60a81370df8cbb49b94a2b5d19e3a98;hp=9e9b718b14abee5bce5bb3742bd8e8630eef6b05;hpb=ee801e17d007001be15c1b17d4942ffb234aa14f;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index 9e9b718..55d6943 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -41,19 +41,15 @@ my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } ); is( scalar @v1, 1, "Added a single relationship" ); is( $v1[0]->[0], 'n21', "Got correct node 1" ); is( $v1[0]->[1], 'n22', "Got correct node 2" ); -my @v2 = $c->add_relationship( 'n9', 'n23', +my @v2 = $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling', 'scope' => 'global' } ); is( scalar @v2, 2, "Added a global relationship with two instances" ); @v1 = $c->del_relationship( 'n22', 'n21' ); is( scalar @v1, 1, "Deleted first relationship" ); -@v2 = $c->del_relationship( 'n8', 'n13' ); +@v2 = $c->del_relationship( 'n12', 'n13' ); is( scalar @v2, 2, "Deleted second global relationship" ); -try { - my @v3 = $c->del_relationship( 'n1', 'n2' ); - ok( 0, "Should have errored on non-existent relationship" ); -} catch( Text::Tradition::Error $e ) { - like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" ); -} +my @v3 = $c->del_relationship( 'n1', 'n2' ); +is( scalar @v3, 0, "Nothing deleted on non-existent relationship" ); =end testing @@ -96,11 +92,19 @@ Return the relationship object, if any, that exists between two readings. =cut sub get_relationship { - my( $self, @vector ) = @_; + my $self = shift; + my @vector; + if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) { + # Dereference the edge arrayref that was passed. + my $edge = shift; + @vector = @$edge; + } else { + @vector = @_; + } my $relationship; if( $self->graph->has_edge_attribute( @vector, 'object' ) ) { $relationship = $self->graph->get_edge_attribute( @vector, 'object' ); - } + } return $relationship; } @@ -110,11 +114,6 @@ sub _set_relationship { $self->graph->set_edge_attribute( @vector, 'object', $relationship ); } -sub _remove_relationship { - my( $self, @vector ) = @_; - $self->graph->delete_edge( @vector ); -} - =head2 create Create a new relationship with the given options and return it. @@ -129,7 +128,11 @@ sub create { my $target = delete $options->{'orig_b'}; my $rel = $self->get_relationship( $source, $target ); if( $rel ) { - if( $rel->type ne $options->{'type'} ) { + if( $rel->type eq 'collated' ) { + # Always replace a 'collated' relationship with a more descriptive + # one, if asked. + $self->del_relationship( $source, $target ); + } elsif( $rel->type ne $options->{'type'} ) { throw( "Another relationship of type " . $rel->type . " already exists between $source and $target" ); } else { @@ -160,13 +163,16 @@ non-locally. Key on whichever reading occurs first alphabetically. sub add_scoped_relationship { my( $self, $rel ) = @_; - my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b ); + my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a ); + my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b ); + my $r = $self->scoped_relationship( $rdga, $rdgb ); if( $r ) { warn sprintf( "Scoped relationship of type %s already exists between %s and %s", - $r->type, $rel->reading_a, $rel->reading_b ); + $r->type, $rdga, $rdgb ); return; } - $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel; + my( $first, $second ) = sort ( $rdga, $rdgb ); + $self->scopedrels->{$first}->{$second} = $rel; } =head2 scoped_relationship( $reading_a, $reading_b ) @@ -201,56 +207,63 @@ add_relationship. sub add_relationship { my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_; - # Check the options - $options->{'scope'} = 'local' unless $options->{'scope'}; - - my( $is_valid, $reason ) = - $self->relationship_valid( $source, $target, $options->{'type'} ); - unless( $is_valid ) { - throw( "Invalid relationship: $reason" ); + my $relationship; + my $thispaironly; + if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) { + $relationship = $options; + $thispaironly = 1; # If existing rel, set only where asked. + } else { + # Check the options + $options->{'scope'} = 'local' unless $options->{'scope'}; + $options->{'scope'} = 'local' if $options->{'type'} eq 'collated'; + + my( $is_valid, $reason ) = + $self->relationship_valid( $source, $target, $options->{'type'} ); + unless( $is_valid ) { + throw( "Invalid relationship: $reason" ); + } + + # Try to create the relationship object. + $options->{'reading_a'} = $source_rdg->text; + $options->{'reading_b'} = $target_rdg->text; + $options->{'orig_a'} = $source; + $options->{'orig_b'} = $target; + if( $options->{'scope'} ne 'local' ) { + # Is there a relationship with this a & b already? + # Case-insensitive for non-orthographics. + my $rdga = $options->{'type'} eq 'orthographic' + ? $options->{'reading_a'} : lc( $options->{'reading_a'} ); + my $rdgb = $options->{'type'} eq 'orthographic' + ? $options->{'reading_b'} : lc( $options->{'reading_b'} ); + my $otherrel = $self->scoped_relationship( $rdga, $rdgb ); + if( $otherrel && $otherrel->type eq $options->{type} + && $otherrel->scope eq $options->{scope} ) { + warn "Applying existing scoped relationship"; + $relationship = $otherrel; + } + } + $relationship = $self->create( $options ) unless $relationship; # Will throw on error } - - # Try to create the relationship object. - $options->{'reading_a'} = $source_rdg->text; - $options->{'reading_b'} = $target_rdg->text; - $options->{'orig_a'} = $source; - $options->{'orig_b'} = $target; - my $relationship = $self->create( $options ); # Will throw on error + # Find all the pairs for which we need to set the relationship. - my @vectors = ( [ $source, $target ] ); - if( $relationship->colocated && $relationship->nonlocal ) { - my $c = $self->collation; - # Set the same relationship everywhere we can, throughout the graph. - my @identical_readings = grep { $_->text eq $relationship->reading_a } - $c->readings; - foreach my $ir ( @identical_readings ) { - next if $ir->id eq $source; - # Check to see if there is a target reading with the same text at - # the same rank. - my @itarget = grep - { $_->rank == $ir->rank && $_->text eq $relationship->reading_b } - $c->readings; - if( @itarget ) { - # We found a hit. - warn "More than one reading with text " . $target_rdg->text - . " at rank " . $ir->rank . "!" if @itarget > 1; - push( @vectors, [ $ir->id, $itarget[0]->id ] ); - } - } + my @vectors = [ $source, $target ]; + if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) { + push( @vectors, $self->_find_applicable( $relationship ) ); } - + # Now set the relationship(s). my @pairs_set; foreach my $v ( @vectors ) { my $rel = $self->get_relationship( @$v ); - if( $rel ) { + if( $rel && $rel ne $relationship ) { if( $rel->nonlocal ) { throw( "Found conflicting relationship at @$v" ); - } else { + } elsif( $rel->type ne 'collated' ) { + # Replace a collation relationship; leave any other sort in place. warn "Not overriding local relationship set at @$v"; + next; } - next; } $self->_set_relationship( $relationship, @$v ); push( @pairs_set, $v ); @@ -259,6 +272,47 @@ sub add_relationship { return @pairs_set; } +sub _find_applicable { + my( $self, $rel ) = @_; + my $c = $self->collation; + # TODO Someday we might use a case sensitive language. + my $lang = $c->tradition->language; + my @vectors; + my @identical_readings; + if( $rel->type eq 'orthographic' ) { + @identical_readings = grep { $_->text eq $rel->reading_a } + $c->readings; + } else { + @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) } + $c->readings; + } + foreach my $ir ( @identical_readings ) { + my @itarget; + if( $rel->type eq 'orthographic' ) { + @itarget = grep { $_->rank == $ir->rank + && $_->text eq $rel->reading_b } $c->readings; + } else { + @itarget = grep { $_->rank == $ir->rank + && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings; + } + if( @itarget ) { + # Warn if there is more than one hit with no orth link between them. + my $itmain = shift @itarget; + if( @itarget ) { + my %all_targets; + map { $all_targets{$_} = 1 } @itarget; + map { delete $all_targets{$_} } + $self->related_readings( $itmain, + sub { $_[0]->type eq 'orthographic' } ); + warn "More than one unrelated reading with text " . $itmain->text + . " at rank " . $ir->rank . "!" if keys %all_targets; + } + push( @vectors, [ $ir->id, $itmain->id ] ); + } + } + return @vectors; +} + =head2 del_relationship( $source, $target ) Removes the relationship between the given readings. If the relationship is @@ -269,7 +323,7 @@ non-local, removes the relationship everywhere in the graph. sub del_relationship { my( $self, $source, $target ) = @_; my $rel = $self->get_relationship( $source, $target ); - throw( "No relationship defined between $source and $target" ) unless $rel; + return () unless $rel; # Nothing to delete; return an empty set. my @vectors = ( [ $source, $target ] ); $self->_remove_relationship( $source, $target ); if( $rel->nonlocal ) { @@ -284,6 +338,11 @@ sub del_relationship { return @vectors; } +sub _remove_relationship { + my( $self, @vector ) = @_; + $self->graph->delete_edge( @vector ); +} + =head2 relationship_valid( $source, $target, $type ) Checks whether a relationship of type $type may exist between the readings given @@ -298,6 +357,7 @@ sub relationship_valid { if ( $rel eq 'transposition' || $rel eq 'repetition' ) { # Check that the two readings do (for a repetition) or do not (for # a transposition) appear in the same witness. + # TODO this might be called before witness paths are set... my %seen_wits; map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source ); foreach my $w ( $c->reading_witnesses( $target ) ) { @@ -313,13 +373,19 @@ sub relationship_valid { # Check that linking the source and target in a relationship won't lead # to a path loop for any witness. If they have the same rank then fine. return( 1, "ok" ) - if $c->reading( $source )->rank == $c->reading( $target )->rank; + if $c->reading( $source )->has_rank + && $c->reading( $target )->has_rank + && $c->reading( $source )->rank == $c->reading( $target )->rank; # Otherwise, first make a lookup table of all the # readings related to either the source or the target. my @proposed_related = ( $source, $target ); - push( @proposed_related, $self->related_readings( $source, 'colocated' ) ); - push( @proposed_related, $self->related_readings( $target, 'colocated' ) ); + # Drop the collation links of source and target, unless we want to + # add a collation relationship. + foreach my $r ( ( $source, $target ) ) { + $self->_drop_collations( $r ) unless $rel eq 'collated'; + push( @proposed_related, $self->related_readings( $r, 'colocated' ) ); + } my %pr_ids; map { $pr_ids{ $_ } = 1 } @proposed_related; @@ -343,23 +409,36 @@ sub relationship_valid { } } -=head2 related_readings( $reading, $colocated_only ) +sub _drop_collations { + my( $self, $reading ) = @_; + foreach my $n ( $self->graph->neighbors( $reading ) ) { + if( $self->get_relationship( $reading, $n )->type eq 'collated' ) { + $self->del_relationship( $reading, $n ); + } + } +} + +=head2 related_readings( $reading, $filter ) Returns a list of readings that are connected via relationship links to $reading. -If $colocated_only is true, restricts the list to those readings that are in the -same logical location (and therefore have the same rank in the collation graph.) +If $filter is set to a subroutine ref, returns only those related readings where +$filter( $relationship ) returns a true value. =cut sub related_readings { - my( $self, $reading, $colocated ) = @_; + my( $self, $reading, $filter ) = @_; my $return_object; if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) { $reading = $reading->id; $return_object = 1; } my @answer; - if( $colocated ) { + if( $filter ) { + # Backwards compat + if( $filter eq 'colocated' ) { + $filter = sub { $_[0]->colocated }; + } my %found = ( $reading => 1 ); my $check = [ $reading ]; my $iter = 0; @@ -367,7 +446,7 @@ sub related_readings { my $more = []; foreach my $r ( @$check ) { foreach my $nr ( $self->graph->neighbors( $r ) ) { - if( $self->get_relationship( $r, $nr )->colocated ) { + if( &$filter( $self->get_relationship( $r, $nr ) ) ) { push( @$more, $nr ) unless exists $found{$nr}; $found{$nr} = 1; } @@ -375,6 +454,7 @@ sub related_readings { } $check = $more; } + delete $found{$reading}; @answer = keys %found; } else { @answer = $self->graph->all_reachable( $reading ); @@ -405,12 +485,9 @@ sub merge_readings { # If kept changes its text, drop the relationship. next if $combined; - # If kept / rel already has a relationship, warn and keep the old + # If kept / rel already has a relationship, just keep the old my $rel = $self->get_relationship( @vector ); - if( $rel ) { - warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted ); - next; - } + next if $rel; # Otherwise, adopt the relationship that would be deleted. $rel = $self->get_relationship( @$edge ); @@ -449,12 +526,11 @@ sub _as_graphml { $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ ); my $rel_obj = $self->get_relationship( @$e ); - _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type ); - _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope ); - _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, - $rel_obj->non_correctable ) if $rel_obj->noncorr_set; - _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, - $rel_obj->non_independent ) if $rel_obj->nonind_set; + foreach my $key ( keys %$edge_keys ) { + my $value = $rel_obj->$key; + _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) + if defined $value; + } } }