From: Tara L Andrews Date: Thu, 19 Apr 2012 22:56:49 +0000 (+0200) Subject: overhaul of colocated-relationship validation, still segfaulting X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=414cc046fb54e81ac48d607b5e05c11623934533;p=scpubgit%2Fstemmatology.git overhaul of colocated-relationship validation, still segfaulting --- diff --git a/lib/Text/Tradition/Collation.pm b/lib/Text/Tradition/Collation.pm index f0901a6..e854128 100644 --- a/lib/Text/Tradition/Collation.pm +++ b/lib/Text/Tradition/Collation.pm @@ -495,8 +495,7 @@ sub clear_witness { sub add_relationship { my $self = shift; my( $source, $target, $opts ) = $self->_stringify_args( @_ ); - my( @vectors ) = $self->relations->add_relationship( $source, - $self->reading( $source ), $target, $self->reading( $target ), $opts ); + my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts ); $self->_graphcalc_done(0); return @vectors; } @@ -895,6 +894,10 @@ sub as_graphml { # The readings need to be ranked for this to work. $start = $self->start unless $start->has_rank; $end = $self->end unless $end->has_rank; + my $rankoffset = 0; + unless( $start eq $self->start ) { + $rankoffset = $start->rank - 1; + } my %use_readings; # Some namespaces @@ -1034,6 +1037,10 @@ sub as_graphml { $node_el->setAttribute( 'id', $node_xmlid ); foreach my $d ( keys %reading_attributes ) { my $nval = $n->$d; + if( $rankoffset && $d eq 'rank' ) { + # Adjust the ranks within the subgraph. + $nval = $n eq $self->end ? $end->rank + 1 : $nval - $rankoffset; + } _add_graphml_data( $node_el, $node_data_keys{$d}, $nval ) if defined $nval; } @@ -1073,8 +1080,8 @@ sub as_graphml { # Add the relationship graph to the XML map { delete $edge_data_keys{$_} } @path_attributes; - $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash, - $node_data_keys{'id'}, \%edge_data_keys ); + # $self->relations->_as_graphml( $graphml_ns, $root, \%node_hash, + # $node_data_keys{'id'}, \%edge_data_keys ); # Save and return the thing my $result = decode_utf8( $graphml->toString(1) ); @@ -1428,7 +1435,7 @@ sub make_witness_path { $wit->clear_uncorrected_path; } -=head2 equivalence_graph( \%readingmap, $startrank, $endrank ) +=head2 equivalence_graph( \%readingmap, $startrank, $endrank, @testvector ) Returns an equivalence graph of the collation, in which all readings related via a 'colocated' relationship are transformed into a single @@ -1438,13 +1445,15 @@ hash whose reference is passed as readingmap. For a subset of the graph, pass in a start and/or an ending rank (this only works if L has been called at least once.) +It is also possible to pass in a test relationship in @testvector, and get +the resulting equivalence graph before the relationship has been made. + =cut sub equivalence_graph { - my( $self, $map, $start, $end ) = @_; + my( $self, $map, $start, $end, @newvector ) = @_; $start = undef unless $self->end->has_rank; $end = undef unless $self->end->has_rank; - my $eqgraph = Graph->new(); my $rel_ctr = 0; # Add the nodes @@ -1454,21 +1463,16 @@ sub equivalence_graph { next if $end && $r->rank > $end; } next if exists $map->{$r->id}; - my @rels = $r->related_readings( 'colocated' ); - if( @rels ) { - # Make an equivalence vertex - my $rn = 'equivalence_' . $rel_ctr++; - $eqgraph->add_vertex( $rn ); - # Note which readings belong to this vertex. - push( @rels, $r ); - foreach( @rels ) { - $map->{$_->id} = $rn; - } - } else { - # Add a new node to mirror the old node. - $map->{$r->id} = $r->id; - $eqgraph->add_vertex( $r->id ); - } + my @rels = $self->related_readings( $r->id, 'colocated' ); + push( @rels, $r->id ); + # Make an equivalence vertex + my $rn = 'equivalence_' . $rel_ctr++; + $eqgraph->add_vertex( $rn ); + # Note which readings belong to this vertex. + push( @rels, $r->id ); + foreach( @rels ) { + $map->{$_} = $rn; + } } # Add the edges. @@ -1479,6 +1483,18 @@ sub equivalence_graph { : $map->{$self->end->id}; $eqgraph->add_edge( $efrom, $eto ); } + + # Collapse the vertices in @newvector if applicable. + if( @newvector ) { + my( $eqs, $eqt ) = map { $map->{$_} } @newvector; + $DB::single = 1 unless $eqs && $eqt; + unless( $eqs eq $eqt ) { + # Combine the vertices. + map { $eqgraph->add_edge( $eqs, $_ ) } $eqgraph->successors( $eqt ); + map { $eqgraph->add_edge( $_, $eqs ) } $eqgraph->predecessors( $eqt ); + $eqgraph->delete_vertex( $eqt ); + } + } return $eqgraph; } @@ -1724,10 +1740,12 @@ sub text_from_paths { =head2 common_predecessor( $reading_a, $reading_b ) Find the last reading that occurs in sequence before both the given readings. +At the very least this should be $self->start. =head2 common_successor( $reading_a, $reading_b ) Find the first reading that occurs in sequence after both the given readings. +At the very least this should be $self->end. =begin testing @@ -1768,26 +1786,45 @@ sub common_successor { return $self->_common_in_path( $r1, $r2, 'successors' ); } + +# TODO think about how to do this without ranks... sub _common_in_path { my( $self, $r1, $r2, $dir ) = @_; - my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank; - $iter = $self->end->rank - $iter if $dir eq 'successors'; + my $iter = $self->end->rank; my @candidates; - my @last_checked = ( $r1, $r2 ); + my @last_r1 = ( $r1 ); + my @last_r2 = ( $r2 ); + # my %all_seen = ( $r1 => 'r1', $r2 => 'r2' ); my %all_seen; + # print STDERR "Finding common $dir for $r1, $r2\n"; while( !@candidates ) { - my @new_lc; - foreach my $lc ( @last_checked ) { + last unless $iter--; # Avoid looping infinitely + # Iterate separately down the graph from r1 and r2 + my( @new_lc1, @new_lc2 ); + foreach my $lc ( @last_r1 ) { + foreach my $p ( $lc->$dir ) { + if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r1' ) { + # print STDERR "Path candidate $p from $lc\n"; + push( @candidates, $p ); + } else { + $all_seen{$p->id} = 'r1'; + push( @new_lc1, $p ); + } + } + } + foreach my $lc ( @last_r2 ) { foreach my $p ( $lc->$dir ) { - if( $all_seen{$p->id} ) { + if( $all_seen{$p->id} && $all_seen{$p->id} ne 'r2' ) { + # print STDERR "Path candidate $p from $lc\n"; push( @candidates, $p ); } else { - $all_seen{$p->id} = 1; - push( @new_lc, $p ); + $all_seen{$p->id} = 'r2'; + push( @new_lc2, $p ); } } } - @last_checked = @new_lc; + @last_r1 = @new_lc1; + @last_r2 = @new_lc2; } my @answer = sort { $a->rank <=> $b->rank } @candidates; return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer ); diff --git a/lib/Text/Tradition/Collation/RelationshipStore.pm b/lib/Text/Tradition/Collation/RelationshipStore.pm index d894568..f2d3359 100644 --- a/lib/Text/Tradition/Collation/RelationshipStore.pm +++ b/lib/Text/Tradition/Collation/RelationshipStore.pm @@ -211,6 +211,8 @@ my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.x # Test 1: try to equate nodes that are prevented with an intermediate collation ok( $t1, "Parsed test fragment file" ); my $c1 = $t1->collation; +## HACK +$c1->calculate_ranks(); my $trel = $c1->get_relationship( '9,2', '9,3' ); is( ref( $trel ), 'Text::Tradition::Collation::Relationship', "Troublesome relationship exists" ); @@ -237,6 +239,8 @@ try { my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); # Test 1: try to equate nodes that are prevented with an intermediate collation my $c2 = $t2->collation; +## HACK +$c2->calculate_ranks(); $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } ); my $trel2 = $c2->get_relationship( '9,2', '9,3' ); is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship', @@ -245,9 +249,9 @@ is( $trel2->type, 'lexical', "Blocking relationship is not a collation" ); # This time the link ought to fail try { $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } ); - ok( 0, "Existing equivalence blocked crossing relationship" ); + ok( 0, "Added cross-equivalent bad relationship" ); } catch { - ok( 1, "Added cross-equivalent bad relationship" ); + ok( 1, "Existing equivalence blocked crossing relationship" ); } try { @@ -262,10 +266,12 @@ try { =cut sub add_relationship { - my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_; + my( $self, $source, $target, $options ) = @_; + my $c = $self->collation; my $relationship; my $thispaironly; + my $droppedcolls = []; if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) { $relationship = $options; $thispaironly = 1; # If existing rel, set only where asked. @@ -275,15 +281,15 @@ sub add_relationship { $options->{'scope'} = 'local' if $options->{'type'} eq 'collated'; $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition'; - my( $is_valid, $reason ) = - $self->relationship_valid( $source, $target, $options->{'type'} ); + my( $is_valid, $reason ) = $self->relationship_valid( $source, $target, + $options->{'type'}, $droppedcolls ); unless( $is_valid ) { throw( "Invalid relationship: $reason" ); } # Try to create the relationship object. - $options->{'reading_a'} = $source_rdg->text; - $options->{'reading_b'} = $target_rdg->text; + $options->{'reading_a'} = $c->reading( $source )->text; + $options->{'reading_b'} = $c->reading( $target )->text; $options->{'orig_a'} = $source; $options->{'orig_b'} = $target; if( $options->{'scope'} ne 'local' ) { @@ -305,35 +311,42 @@ sub add_relationship { # Find all the pairs for which we need to set the relationship. - my @vectors = [ $source, $target ]; + my @vectors; if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) { push( @vectors, $self->_find_applicable( $relationship ) ); } # Now set the relationship(s). my @pairs_set; + my $rel = $self->get_relationship( $source, $target ); + if( $rel && $rel ne $relationship ) { + if( $rel->nonlocal ) { + throw( "Found conflicting relationship at $source - $target" ); + } elsif( $rel->type ne 'collated' ) { + # Replace a collation relationship; leave any other sort in place. + my $r1ann = $rel->has_annotation ? $rel->annotation : ''; + my $r2ann = $relationship->has_annotation ? $relationship->annotation : ''; + unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) { + warn sprintf( "Not overriding local relationship %s with global %s " + . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type, + $source, $target, $rel->reading_a, $rel->reading_b ); + next; + } + } + } + $self->_set_relationship( $relationship, $source, $target ); + push( @pairs_set, [ $source, $target ] ); + + # Set any additional relationships that might be in @vectors. foreach my $v ( @vectors ) { - my $rel = $self->get_relationship( @$v ); - if( $rel && $rel ne $relationship ) { - if( $rel->nonlocal ) { - throw( "Found conflicting relationship at @$v" ); - } elsif( $rel->type ne 'collated' ) { - # Replace a collation relationship; leave any other sort in place. - my $r1ann = $rel->has_annotation ? $rel->annotation : ''; - my $r2ann = $relationship->has_annotation ? $relationship->annotation : ''; - unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) { - warn sprintf( "Not overriding local relationship %s with global %s " - . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type, - @$v, $rel->reading_a, $rel->reading_b ); - next; - } - } - } - map { $self->_drop_collations( $_ ) } @$v; - $self->_set_relationship( $relationship, @$v ); - push( @pairs_set, $v ); + next if $v->[0] eq $source && $v->[1] eq $target; + next if $v->[1] eq $source && $v->[0] eq $target; + my @added = $self->add_relationship( @$v, $relationship ); + push( @pairs_set, @added ); } + # Finally, restore whatever collations we can, and return. + $self->_restore_collations( @$droppedcolls ); return @pairs_set; } @@ -432,7 +445,8 @@ a yes/no boolean and, if the answer is no, message gives the reason why. =cut sub relationship_valid { - my( $self, $source, $target, $rel ) = @_; + my( $self, $source, $target, $rel, $mustdrop ) = @_; + $mustdrop = [] unless $mustdrop; # in case we were passed nothing my $c = $self->collation; if ( $rel eq 'transposition' || $rel eq 'repetition' ) { # Check that the two readings do (for a repetition) or do not (for @@ -451,39 +465,35 @@ sub relationship_valid { : ( 0, "Readings occur only in distinct witnesses" ); } else { # Check that linking the source and target in a relationship won't lead - # to a path loop for any witness. If they have the same rank then fine. - return( 1, "ok" ) - if $c->reading( $source )->has_rank - && $c->reading( $target )->has_rank - && $c->reading( $source )->rank == $c->reading( $target )->rank; - - # Otherwise, first make a lookup table of all the - # readings related to either the source or the target. - my @proposed_related = ( $source, $target ); - # Drop the collation links of source and target, unless we want to - # add a collation relationship. - foreach my $r ( ( $source, $target ) ) { - $self->_drop_collations( $r ) unless $rel eq 'collated'; - push( @proposed_related, $self->related_readings( $r, 'colocated' ) ); - } - my %pr_ids; - map { $pr_ids{ $_ } = 1 } @proposed_related; - - # The cumulative predecessors and successors of the proposed-related readings - # should not overlap. - my %all_pred; - my %all_succ; - foreach my $pr ( keys %pr_ids ) { - map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr ); - map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr ); + # to a path loop for any witness. + # First, drop/stash any collations that might interfere + my $sourceobj = $c->reading( $source ); + my $targetobj = $c->reading( $target ); + my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1; + my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1; + unless( $rel eq 'collated' || $sourcerank == $targetrank ) { + push( @$mustdrop, $self->_drop_collations( $source ) ); + push( @$mustdrop, $self->_drop_collations( $target ) ); } - foreach my $k ( keys %all_pred ) { - return( 0, "Relationship would create witness loop" ) - if exists $all_succ{$k}; + my $map = {}; + my( $startrank, $endrank ); + if( $c->end->has_rank ) { + my $cpred = $c->common_predecessor( $source, $target ); + my $csucc = $c->common_successor( $source, $target ); + $startrank = $cpred->rank; + $endrank = $csucc->rank; + unless( $rel eq 'collated' || $sourcerank == $targetrank ) { + foreach my $rk ( $startrank+1 .. $endrank-1 ) { + map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) } + $c->readings_at_rank( $rk ); + } + } } - foreach my $k ( keys %pr_ids ) { - return( 0, "Relationship would create witness loop" ) - if exists $all_pred{$k} || exists $all_succ{$k}; + my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank, + $source, $target ); + if( $eqgraph->has_a_cycle ) { + $self->_restore_collations( @$mustdrop ); + return( 0, "Relationship would create witness loop" ); } return ( 1, "ok" ); } @@ -491,11 +501,25 @@ sub relationship_valid { sub _drop_collations { my( $self, $reading ) = @_; + my @dropped; foreach my $n ( $self->graph->neighbors( $reading ) ) { if( $self->get_relationship( $reading, $n )->type eq 'collated' ) { + push( @dropped, [ $reading, $n ] ); $self->del_relationship( $reading, $n ); } } + return @dropped; +} + +sub _restore_collations { + my( $self, @vectors ) = @_; + foreach my $v ( @vectors ) { + try { + $self->add_relationship( @$v, { 'type' => 'collated' } ); + } catch { + print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n"; + } + } } =head2 related_readings( $reading, $filter ) @@ -590,7 +614,8 @@ sub _as_graphml { # Add the vertices according to their XML IDs my %rdg_lookup = ( reverse %$node_hash ); - foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) { + my @nlist = sort _by_xmlid keys( %rdg_lookup ); + foreach my $n ( @nlist ) { my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' ); $n_el->setAttribute( 'id', $n ); _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} ); diff --git a/t/text_tradition_collation_relationshipstore.t b/t/text_tradition_collation_relationshipstore.t index 320832e..48b6423 100644 --- a/t/text_tradition_collation_relationshipstore.t +++ b/t/text_tradition_collation_relationshipstore.t @@ -49,6 +49,8 @@ my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.x # Test 1: try to equate nodes that are prevented with an intermediate collation ok( $t1, "Parsed test fragment file" ); my $c1 = $t1->collation; +## HACK +$c1->calculate_ranks(); my $trel = $c1->get_relationship( '9,2', '9,3' ); is( ref( $trel ), 'Text::Tradition::Collation::Relationship', "Troublesome relationship exists" ); @@ -75,6 +77,8 @@ try { my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' ); # Test 1: try to equate nodes that are prevented with an intermediate collation my $c2 = $t2->collation; +## HACK +$c2->calculate_ranks(); $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } ); my $trel2 = $c2->get_relationship( '9,2', '9,3' ); is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',