X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FText%2FTradition%2FCollation%2FRelationshipStore.pm;h=ddad4db2752805704326b6901d256eda6bad624a;hb=826d8773c081da08f139d1e48fa5ca1abb725f8e;hp=2f8784c208c176e9db3991f20235ece854395aaf;hpb=0a90079324146b05a46fadc49999d423e7d93db3;p=scpubgit%2Fstemmatology.git diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index 2f8784c..ddad4db 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -37,23 +37,19 @@ my $t = Text::Tradition->new( ); my $c = $t->collation; -my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } ); +my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } ); is( scalar @v1, 1, "Added a single relationship" ); is( $v1[0]->[0], 'n21', "Got correct node 1" ); is( $v1[0]->[1], 'n22', "Got correct node 2" ); -my @v2 = $c->add_relationship( 'n9', 'n23', +my @v2 = $c->add_relationship( 'n24', 'n23', { 'type' => 'spelling', 'scope' => 'global' } ); is( scalar @v2, 2, "Added a global relationship with two instances" ); @v1 = $c->del_relationship( 'n22', 'n21' ); is( scalar @v1, 1, "Deleted first relationship" ); -@v2 = $c->del_relationship( 'n8', 'n13' ); +@v2 = $c->del_relationship( 'n12', 'n13' ); is( scalar @v2, 2, "Deleted second global relationship" ); -try { - my @v3 = $c->del_relationship( 'n1', 'n2' ); - ok( 0, "Should have errored on non-existent relationship" ); -} catch( Text::Tradition::Error $e ) { - like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" ); -} +my @v3 = $c->del_relationship( 'n1', 'n2' ); +is( scalar @v3, 0, "Nothing deleted on non-existent relationship" ); =end testing @@ -167,13 +163,16 @@ non-locally. Key on whichever reading occurs first alphabetically. sub add_scoped_relationship { my( $self, $rel ) = @_; - my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b ); + my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a ); + my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b ); + my $r = $self->scoped_relationship( $rdga, $rdgb ); if( $r ) { warn sprintf( "Scoped relationship of type %s already exists between %s and %s", - $r->type, $rel->reading_a, $rel->reading_b ); + $r->type, $rdga, $rdgb ); return; } - $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel; + my( $first, $second ) = sort ( $rdga, $rdgb ); + $self->scopedrels->{$first}->{$second} = $rel; } =head2 scoped_relationship( $reading_a, $reading_b ) @@ -203,35 +202,104 @@ scoped non-locally. Returns a status boolean and a list of all reading pairs connected by the call to add_relationship. +=begin testing + +use Text::Tradition; +use TryCatch; + +my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); +# Test 1: try to equate nodes that are prevented with an intermediate collation +ok( $t1, "Parsed test fragment file" ); +my $c1 = $t1->collation; +## HACK +$c1->calculate_ranks(); +my $trel = $c1->get_relationship( '9,2', '9,3' ); +is( ref( $trel ), 'Text::Tradition::Collation::Relationship', + "Troublesome relationship exists" ); +is( $trel->type, 'collated', "Troublesome relationship is a collation" ); + +# Try to make the link we want +try { + $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } ); + ok( 1, "Added cross-collation relationship as expected" ); +} catch { + ok( 0, "Existing collation blocked equivalence relationship" ); +} + +try { + $c1->calculate_ranks(); + ok( 1, "Successfully calculated ranks" ); +} catch { + ok( 0, "Collation now has a cycle" ); +} + +# Test 2: try to equate nodes that are prevented with a real intermediate +# equivalence + +my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); +# Test 1: try to equate nodes that are prevented with an intermediate collation +my $c2 = $t2->collation; +## HACK +$c2->calculate_ranks(); +$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } ); +my $trel2 = $c2->get_relationship( '9,2', '9,3' ); +is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship', + "Created blocking relationship" ); +is( $trel2->type, 'lexical', "Blocking relationship is not a collation" ); +# This time the link ought to fail +try { + $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } ); + ok( 0, "Added cross-equivalent bad relationship" ); +} catch { + ok( 1, "Existing equivalence blocked crossing relationship" ); +} + +try { + $c2->calculate_ranks(); + ok( 1, "Successfully calculated ranks" ); +} catch { + ok( 0, "Collation now has a cycle" ); +} + +=end testing + =cut sub add_relationship { - my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_; + my( $self, $source, $target, $options ) = @_; + my $c = $self->collation; my $relationship; my $thispaironly; + my $droppedcolls = []; if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) { $relationship = $options; $thispaironly = 1; # If existing rel, set only where asked. } else { # Check the options $options->{'scope'} = 'local' unless $options->{'scope'}; + $options->{'scope'} = 'local' if $options->{'type'} eq 'collated'; + $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition'; - my( $is_valid, $reason ) = - $self->relationship_valid( $source, $target, $options->{'type'} ); + my( $is_valid, $reason ) = $self->relationship_valid( $source, $target, + $options->{'type'}, $droppedcolls ); unless( $is_valid ) { throw( "Invalid relationship: $reason" ); } # Try to create the relationship object. - $options->{'reading_a'} = $source_rdg->text; - $options->{'reading_b'} = $target_rdg->text; + $options->{'reading_a'} = $c->reading( $source )->text; + $options->{'reading_b'} = $c->reading( $target )->text; $options->{'orig_a'} = $source; $options->{'orig_b'} = $target; if( $options->{'scope'} ne 'local' ) { # Is there a relationship with this a & b already? - my $otherrel = $self->scoped_relationship( $options->{reading_a}, - $options->{reading_b} ); + # Case-insensitive for non-orthographics. + my $rdga = $options->{'type'} eq 'orthographic' + ? $options->{'reading_a'} : lc( $options->{'reading_a'} ); + my $rdgb = $options->{'type'} eq 'orthographic' + ? $options->{'reading_b'} : lc( $options->{'reading_b'} ); + my $otherrel = $self->scoped_relationship( $rdga, $rdgb ); if( $otherrel && $otherrel->type eq $options->{type} && $otherrel->scope eq $options->{scope} ) { warn "Applying existing scoped relationship"; @@ -243,47 +311,99 @@ sub add_relationship { # Find all the pairs for which we need to set the relationship. - my @vectors = ( [ $source, $target ] ); + my @vectors; if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) { - my $c = $self->collation; - # Set the same relationship everywhere we can, throughout the graph. - my @identical_readings = grep { $_->text eq $relationship->reading_a } - $c->readings; - foreach my $ir ( @identical_readings ) { - next if $ir->id eq $source; - # Check to see if there is a target reading with the same text at - # the same rank. - my @itarget = grep - { $_->rank == $ir->rank && $_->text eq $relationship->reading_b } - $c->readings; - if( @itarget ) { - # We found a hit. - warn "More than one reading with text " . $target_rdg->text - . " at rank " . $ir->rank . "!" if @itarget > 1; - push( @vectors, [ $ir->id, $itarget[0]->id ] ); - } - } + push( @vectors, $self->_find_applicable( $relationship ) ); } - + # Now set the relationship(s). my @pairs_set; + my $rel = $self->get_relationship( $source, $target ); + if( $rel && $rel ne $relationship ) { + if( $rel->nonlocal ) { + throw( "Found conflicting relationship at $source - $target" ); + } elsif( $rel->type ne 'collated' ) { + # Replace a collation relationship; leave any other sort in place. + my $r1ann = $rel->has_annotation ? $rel->annotation : ''; + my $r2ann = $relationship->has_annotation ? $relationship->annotation : ''; + unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) { + warn sprintf( "Not overriding local relationship %s with global %s " + . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type, + $source, $target, $rel->reading_a, $rel->reading_b ); + next; + } + } + } + $self->_set_relationship( $relationship, $source, $target ); + push( @pairs_set, [ $source, $target ] ); + + # Set any additional relationships that might be in @vectors. foreach my $v ( @vectors ) { - my $rel = $self->get_relationship( @$v ); - if( $rel && $rel ne $relationship ) { - if( $rel->nonlocal ) { - throw( "Found conflicting relationship at @$v" ); - } else { - warn "Not overriding local relationship set at @$v"; - } - next; - } - $self->_set_relationship( $relationship, @$v ); - push( @pairs_set, $v ); + next if $v->[0] eq $source && $v->[1] eq $target; + next if $v->[1] eq $source && $v->[0] eq $target; + my @added = $self->add_relationship( @$v, $relationship ); + push( @pairs_set, @added ); } + # Finally, restore whatever collations we can, and return. + $self->_restore_collations( @$droppedcolls ); return @pairs_set; } +=head2 del_scoped_relationship( $reading_a, $reading_b ) + +Returns the general (document-level or global) relationship that has been defined +between the two reading strings. Returns undef if there is no general relationship. + +=cut + +sub del_scoped_relationship { + my( $self, $rdga, $rdgb ) = @_; + my( $first, $second ) = sort( $rdga, $rdgb ); + return delete $self->scopedrels->{$first}->{$second}; +} + +sub _find_applicable { + my( $self, $rel ) = @_; + my $c = $self->collation; + # TODO Someday we might use a case sensitive language. + my $lang = $c->tradition->language; + my @vectors; + my @identical_readings; + if( $rel->type eq 'orthographic' ) { + @identical_readings = grep { $_->text eq $rel->reading_a } + $c->readings; + } else { + @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) } + $c->readings; + } + foreach my $ir ( @identical_readings ) { + my @itarget; + if( $rel->type eq 'orthographic' ) { + @itarget = grep { $_->rank == $ir->rank + && $_->text eq $rel->reading_b } $c->readings; + } else { + @itarget = grep { $_->rank == $ir->rank + && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings; + } + if( @itarget ) { + # Warn if there is more than one hit with no orth link between them. + my $itmain = shift @itarget; + if( @itarget ) { + my %all_targets; + map { $all_targets{$_} = 1 } @itarget; + map { delete $all_targets{$_} } + $self->related_readings( $itmain, + sub { $_[0]->type eq 'orthographic' } ); + warn "More than one unrelated reading with text " . $itmain->text + . " at rank " . $ir->rank . "!" if keys %all_targets; + } + push( @vectors, [ $ir->id, $itmain->id ] ); + } + } + return @vectors; +} + =head2 del_relationship( $source, $target ) Removes the relationship between the given readings. If the relationship is @@ -294,17 +414,19 @@ non-local, removes the relationship everywhere in the graph. sub del_relationship { my( $self, $source, $target ) = @_; my $rel = $self->get_relationship( $source, $target ); - throw( "No relationship defined between $source and $target" ) unless $rel; + return () unless $rel; # Nothing to delete; return an empty set. my @vectors = ( [ $source, $target ] ); $self->_remove_relationship( $source, $target ); if( $rel->nonlocal ) { # Remove the relationship wherever it occurs. + # Remove the relationship wherever it occurs. my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel } $self->relationships; foreach my $re ( @rel_edges ) { $self->_remove_relationship( @$re ); push( @vectors, $re ); } + $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b ); } return @vectors; } @@ -323,7 +445,8 @@ a yes/no boolean and, if the answer is no, message gives the reason why. =cut sub relationship_valid { - my( $self, $source, $target, $rel ) = @_; + my( $self, $source, $target, $rel, $mustdrop ) = @_; + $mustdrop = [] unless $mustdrop; # in case we were passed nothing my $c = $self->collation; if ( $rel eq 'transposition' || $rel eq 'repetition' ) { # Check that the two readings do (for a repetition) or do not (for @@ -336,46 +459,69 @@ sub relationship_valid { return ( 0, "Readings both occur in witness $w" ) if $rel eq 'transposition'; return ( 1, "ok" ) if $rel eq 'repetition'; + } } return $rel eq 'transposition' ? ( 1, "ok" ) : ( 0, "Readings occur only in distinct witnesses" ); - } } else { # Check that linking the source and target in a relationship won't lead - # to a path loop for any witness. If they have the same rank then fine. - return( 1, "ok" ) - if $c->reading( $source )->has_rank - && $c->reading( $target )->has_rank - && $c->reading( $source )->rank == $c->reading( $target )->rank; - - # Otherwise, first make a lookup table of all the - # readings related to either the source or the target. - my @proposed_related = ( $source, $target ); - push( @proposed_related, $self->related_readings( $source, 'colocated' ) ); - push( @proposed_related, $self->related_readings( $target, 'colocated' ) ); - my %pr_ids; - map { $pr_ids{ $_ } = 1 } @proposed_related; - - # The cumulative predecessors and successors of the proposed-related readings - # should not overlap. - my %all_pred; - my %all_succ; - foreach my $pr ( keys %pr_ids ) { - map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr ); - map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr ); + # to a path loop for any witness. + # First, drop/stash any collations that might interfere + my $sourceobj = $c->reading( $source ); + my $targetobj = $c->reading( $target ); + my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1; + my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1; + unless( $rel eq 'collated' || $sourcerank == $targetrank ) { + push( @$mustdrop, $self->_drop_collations( $source ) ); + push( @$mustdrop, $self->_drop_collations( $target ) ); } - foreach my $k ( keys %all_pred ) { - return( 0, "Relationship would create witness loop" ) - if exists $all_succ{$k}; + my $map = {}; + my( $startrank, $endrank ); + if( $c->end->has_rank ) { + my $cpred = $c->common_predecessor( $source, $target ); + my $csucc = $c->common_successor( $source, $target ); + $startrank = $cpred->rank; + $endrank = $csucc->rank; + unless( $rel eq 'collated' || $sourcerank == $targetrank ) { + foreach my $rk ( $startrank+1 .. $endrank-1 ) { + map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) } + $c->readings_at_rank( $rk ); + } + } } - foreach my $k ( keys %pr_ids ) { - return( 0, "Relationship would create witness loop" ) - if exists $all_pred{$k} || exists $all_succ{$k}; + my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank, + $source, $target ); + if( $eqgraph->has_a_cycle ) { + $self->_restore_collations( @$mustdrop ); + return( 0, "Relationship would create witness loop" ); } return ( 1, "ok" ); } } +sub _drop_collations { + my( $self, $reading ) = @_; + my @dropped; + foreach my $n ( $self->graph->neighbors( $reading ) ) { + if( $self->get_relationship( $reading, $n )->type eq 'collated' ) { + push( @dropped, [ $reading, $n ] ); + $self->del_relationship( $reading, $n ); + } + } + return @dropped; +} + +sub _restore_collations { + my( $self, @vectors ) = @_; + foreach my $v ( @vectors ) { + try { + $self->add_relationship( @$v, { 'type' => 'collated' } ); + } catch { + print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n"; + } + } +} + =head2 related_readings( $reading, $filter ) Returns a list of readings that are connected via relationship links to $reading. @@ -443,12 +589,9 @@ sub merge_readings { # If kept changes its text, drop the relationship. next if $combined; - # If kept / rel already has a relationship, warn and keep the old + # If kept / rel already has a relationship, just keep the old my $rel = $self->get_relationship( @vector ); - if( $rel ) { - warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted ); - next; - } + next if $rel; # Otherwise, adopt the relationship that would be deleted. $rel = $self->get_relationship( @$edge ); @@ -471,7 +614,8 @@ sub _as_graphml { # Add the vertices according to their XML IDs my %rdg_lookup = ( reverse %$node_hash ); - foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) { + my @nlist = sort keys( %rdg_lookup ); + foreach my $n ( @nlist ) { my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' ); $n_el->setAttribute( 'id', $n ); _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} ); @@ -481,19 +625,18 @@ sub _as_graphml { my $edge_ctr = 0; foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) { # Add an edge and fill in its relationship info. + next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} ); my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' ); $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} ); $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} ); $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ ); my $rel_obj = $self->get_relationship( @$e ); - _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type ); - _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope ); - _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation ); - _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, - $rel_obj->non_correctable ) if $rel_obj->noncorr_set; - _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, - $rel_obj->non_independent ) if $rel_obj->nonind_set; + foreach my $key ( keys %$edge_keys ) { + my $value = $rel_obj->$key; + _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) + if defined $value; + } } }