1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
102 has '_node_equivalences' => (
106 equivalence => 'get',
107 set_equivalence => 'set',
108 remove_equivalence => 'delete',
112 has '_equivalence_readings' => (
117 set_eqreadings => 'set',
118 remove_eqreadings => 'delete',
122 around add_reading => sub {
126 $self->equivalence_graph->add_vertex( @_ );
127 $self->set_equivalence( $_[0], $_[0] );
128 $self->set_eqreadings( $_[0], [ $_[0] ] );
132 around delete_reading => sub {
136 $self->_remove_equivalence_node( @_ );
140 =head2 get_relationship
142 Return the relationship object, if any, that exists between two readings.
146 sub get_relationship {
149 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
150 # Dereference the edge arrayref that was passed.
157 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
158 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
160 return $relationship;
163 sub _set_relationship {
164 my( $self, $relationship, @vector ) = @_;
165 $self->graph->add_edge( @vector );
166 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
167 $self->_make_equivalence( @vector ) if $relationship->colocated;
172 Create a new relationship with the given options and return it.
173 Warn and return undef if the relationship cannot be created.
178 my( $self, $options ) = @_;
179 # Check to see if a relationship exists between the two given readings
180 my $source = delete $options->{'orig_a'};
181 my $target = delete $options->{'orig_b'};
182 my $rel = $self->get_relationship( $source, $target );
184 if( $rel->type eq 'collated' ) {
185 # Always replace a 'collated' relationship with a more descriptive
187 $self->del_relationship( $source, $target );
188 } elsif( $rel->type ne $options->{'type'} ) {
189 throw( "Another relationship of type " . $rel->type
190 . " already exists between $source and $target" );
196 # Check to see if a nonlocal relationship is defined for the two readings
197 $rel = $self->scoped_relationship( $options->{'reading_a'},
198 $options->{'reading_b'} );
199 if( $rel && $rel->type eq $options->{'type'} ) {
202 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
204 $rel = Text::Tradition::Collation::Relationship->new( $options );
205 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
210 =head2 add_scoped_relationship( $rel )
212 Keep track of relationships defined between specific readings that are scoped
213 non-locally. Key on whichever reading occurs first alphabetically.
217 sub add_scoped_relationship {
218 my( $self, $rel ) = @_;
219 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
220 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
221 my $r = $self->scoped_relationship( $rdga, $rdgb );
223 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
224 $r->type, $rdga, $rdgb );
227 my( $first, $second ) = sort ( $rdga, $rdgb );
228 $self->scopedrels->{$first}->{$second} = $rel;
231 =head2 scoped_relationship( $reading_a, $reading_b )
233 Returns the general (document-level or global) relationship that has been defined
234 between the two reading strings. Returns undef if there is no general relationship.
238 sub scoped_relationship {
239 my( $self, $rdga, $rdgb ) = @_;
240 my( $first, $second ) = sort( $rdga, $rdgb );
241 if( exists $self->scopedrels->{$first}->{$second} ) {
242 return $self->scopedrels->{$first}->{$second};
248 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
250 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
251 for the possible options) between the readings given in $source and $target. Sets
252 up a scoped relationship between $sourcetext and $targettext if the relationship is
255 Returns a status boolean and a list of all reading pairs connected by the call to
263 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
264 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
265 ok( $t1, "Parsed test fragment file" );
266 my $c1 = $t1->collation;
267 my $trel = $c1->get_relationship( '9,2', '9,3' );
268 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
269 "Troublesome relationship exists" );
270 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
272 # Try to make the link we want
274 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
275 ok( 1, "Added cross-collation relationship as expected" );
276 } catch( Text::Tradition::Error $e ) {
277 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
281 $c1->calculate_ranks();
282 ok( 1, "Successfully calculated ranks" );
283 } catch ( Text::Tradition::Error $e ) {
284 ok( 0, "Collation now has a cycle: " . $e->message );
287 # Test 1.2: attempt merge of an identical reading
289 $c1->merge_readings( '9,3', '11,5' );
290 ok( 1, "Successfully merged reading 'pontifex'" );
291 } catch ( Text::Tradition::Error $e ) {
292 ok( 0, "Merge of mergeable readings failed: $e->message" );
296 # Test 1.3: attempt relationship with a meta reading (should fail)
298 $c1->add_relationship( '8,1', '9,2', { 'type' => 'collated' } );
299 ok( 0, "Allowed a meta-reading to be used in a relationship" );
300 } catch ( Text::Tradition::Error $e ) {
301 is( $e->message, 'Cannot set relationship on a meta reading',
302 "Relationship link prevented for a meta reading" );
305 # Test 2.1: try to equate nodes that are prevented with a real intermediate
307 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
308 my $c2 = $t2->collation;
309 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
310 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
311 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
312 "Created blocking relationship" );
313 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
314 # This time the link ought to fail
316 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
317 ok( 0, "Added cross-equivalent bad relationship" );
318 } catch ( Text::Tradition::Error $e ) {
319 like( $e->message, qr/witness loop/,
320 "Existing equivalence blocked crossing relationship" );
324 $c2->calculate_ranks();
325 ok( 1, "Successfully calculated ranks" );
326 } catch ( Text::Tradition::Error $e ) {
327 ok( 0, "Collation now has a cycle: " . $e->message );
330 # Test 3.1: make a straightforward pair of transpositions.
331 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
332 # Test 1: try to equate nodes that are prevented with an intermediate collation
333 my $c3 = $t3->collation;
335 $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
336 ok( 1, "Added straightforward transposition" );
337 } catch ( Text::Tradition::Error $e ) {
338 ok( 0, "Failed to add normal transposition: " . $e->message );
341 $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
342 ok( 1, "Added straightforward transposition complement" );
343 } catch ( Text::Tradition::Error $e ) {
344 ok( 0, "Failed to add normal transposition complement: " . $e->message );
347 # Test 3.2: try to make a transposition that could be a parallel.
349 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
350 ok( 0, "Added bad colocated transposition" );
351 } catch ( Text::Tradition::Error $e ) {
352 like( $e->message, qr/Readings appear to be colocated/,
353 "Prevented bad colocated transposition" );
356 # Test 3.3: make the parallel, and then make the transposition again.
358 $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
359 ok( 1, "Equated identical readings for transposition" );
360 } catch ( Text::Tradition::Error $e ) {
361 ok( 0, "Failed to equate identical readings: " . $e->message );
364 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
365 ok( 1, "Added straightforward transposition complement" );
366 } catch ( Text::Tradition::Error $e ) {
367 ok( 0, "Failed to add normal transposition complement: " . $e->message );
374 sub add_relationship {
375 my( $self, $source, $target, $options ) = @_;
376 my $c = $self->collation;
377 my $sourceobj = $c->reading( $source );
378 my $targetobj = $c->reading( $target );
379 throw( "Adding self relationship at $source" ) if $source eq $target;
380 throw( "Cannot set relationship on a meta reading" )
381 if( $sourceobj->is_meta || $targetobj->is_meta );
384 my $droppedcolls = [];
385 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
386 $relationship = $options;
387 $thispaironly = 1; # If existing rel, set only where asked.
390 $options->{'scope'} = 'local' unless $options->{'scope'};
391 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
392 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
394 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
395 $options->{'type'}, $droppedcolls );
396 unless( $is_valid ) {
397 throw( "Invalid relationship: $reason" );
400 # Try to create the relationship object.
401 $options->{'reading_a'} = $sourceobj->text;
402 $options->{'reading_b'} = $targetobj->text;
403 $options->{'orig_a'} = $source;
404 $options->{'orig_b'} = $target;
405 if( $options->{'scope'} ne 'local' ) {
406 # Is there a relationship with this a & b already?
407 # Case-insensitive for non-orthographics.
408 my $rdga = $options->{'type'} eq 'orthographic'
409 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
410 my $rdgb = $options->{'type'} eq 'orthographic'
411 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
412 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
413 if( $otherrel && $otherrel->type eq $options->{type}
414 && $otherrel->scope eq $options->{scope} ) {
415 warn "Applying existing scoped relationship";
416 $relationship = $otherrel;
419 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
423 # Find all the pairs for which we need to set the relationship.
425 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
426 push( @vectors, $self->_find_applicable( $relationship ) );
429 # Now set the relationship(s).
431 my $rel = $self->get_relationship( $source, $target );
433 if( $rel && $rel ne $relationship ) {
434 if( $rel->nonlocal ) {
435 throw( "Found conflicting relationship at $source - $target" );
436 } elsif( $rel->type ne 'collated' ) {
437 # Replace a collation relationship; leave any other sort in place.
438 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
439 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
440 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
441 warn sprintf( "Not overriding local relationship %s with global %s "
442 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
443 $source, $target, $rel->reading_a, $rel->reading_b );
448 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
449 push( @pairs_set, [ $source, $target ] );
451 # Set any additional relationships that might be in @vectors.
452 foreach my $v ( @vectors ) {
453 next if $v->[0] eq $source && $v->[1] eq $target;
454 next if $v->[1] eq $source && $v->[0] eq $target;
455 my @added = $self->add_relationship( @$v, $relationship );
456 push( @pairs_set, @added );
459 # Finally, restore whatever collations we can, and return.
460 $self->_restore_collations( @$droppedcolls );
464 =head2 del_scoped_relationship( $reading_a, $reading_b )
466 Returns the general (document-level or global) relationship that has been defined
467 between the two reading strings. Returns undef if there is no general relationship.
471 sub del_scoped_relationship {
472 my( $self, $rdga, $rdgb ) = @_;
473 my( $first, $second ) = sort( $rdga, $rdgb );
474 return delete $self->scopedrels->{$first}->{$second};
477 sub _find_applicable {
478 my( $self, $rel ) = @_;
479 my $c = $self->collation;
480 # TODO Someday we might use a case sensitive language.
481 my $lang = $c->tradition->language;
483 my @identical_readings;
484 if( $rel->type eq 'orthographic' ) {
485 @identical_readings = grep { $_->text eq $rel->reading_a }
488 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
491 foreach my $ir ( @identical_readings ) {
493 if( $rel->type eq 'orthographic' ) {
494 @itarget = grep { $_->rank == $ir->rank
495 && $_->text eq $rel->reading_b } $c->readings;
497 @itarget = grep { $_->rank == $ir->rank
498 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
501 # Warn if there is more than one hit with no orth link between them.
502 my $itmain = shift @itarget;
505 map { $all_targets{$_} = 1 } @itarget;
506 map { delete $all_targets{$_} }
507 $self->related_readings( $itmain,
508 sub { $_[0]->type eq 'orthographic' } );
509 warn "More than one unrelated reading with text " . $itmain->text
510 . " at rank " . $ir->rank . "!" if keys %all_targets;
512 push( @vectors, [ $ir->id, $itmain->id ] );
518 =head2 del_relationship( $source, $target )
520 Removes the relationship between the given readings. If the relationship is
521 non-local, removes the relationship everywhere in the graph.
525 sub del_relationship {
526 my( $self, $source, $target ) = @_;
527 my $rel = $self->get_relationship( $source, $target );
528 return () unless $rel; # Nothing to delete; return an empty set.
529 my $colo = $rel->colocated;
530 my @vectors = ( [ $source, $target ] );
531 $self->_remove_relationship( $colo, $source, $target );
532 if( $rel->nonlocal ) {
533 # Remove the relationship wherever it occurs.
534 # Remove the relationship wherever it occurs.
535 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
536 $self->relationships;
537 foreach my $re ( @rel_edges ) {
538 $self->_remove_relationship( $colo, @$re );
539 push( @vectors, $re );
541 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
546 sub _remove_relationship {
547 my( $self, $equiv, @vector ) = @_;
548 $self->graph->delete_edge( @vector );
549 $self->_break_equivalence( @vector ) if $equiv;
552 =head2 relationship_valid( $source, $target, $type )
554 Checks whether a relationship of type $type may exist between the readings given
555 in $source and $target. Returns a tuple of ( status, message ) where status is
556 a yes/no boolean and, if the answer is no, message gives the reason why.
560 sub relationship_valid {
561 my( $self, $source, $target, $rel, $mustdrop ) = @_;
562 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
563 my $c = $self->collation;
564 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
565 # Check that the two readings do (for a repetition) or do not (for
566 # a transposition) appear in the same witness.
567 # TODO this might be called before witness paths are set...
569 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
570 foreach my $w ( $c->reading_witnesses( $target ) ) {
571 if( $seen_wits{$w} ) {
572 return ( 0, "Readings both occur in witness $w" )
573 if $rel eq 'transposition';
574 return ( 1, "ok" ) if $rel eq 'repetition';
577 return ( 0, "Readings occur only in distinct witnesses" )
578 if $rel eq 'repetition';
580 if ( $rel eq 'transposition' ) {
581 # We also need to check both that the readings occur in distinct
582 # witnesses, and that they are not in the same place. That is,
583 # proposing to link them should cause a witness loop.
584 if( $self->test_equivalence( $source, $target ) ) {
585 return ( 0, "Readings appear to be colocated, not transposed" );
590 } elsif( $rel ne 'repetition' ) {
591 # Check that linking the source and target in a relationship won't lead
592 # to a path loop for any witness.
593 # First, drop/stash any collations that might interfere
594 my $sourceobj = $c->reading( $source );
595 my $targetobj = $c->reading( $target );
596 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
597 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
598 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
599 push( @$mustdrop, $self->_drop_collations( $source ) );
600 push( @$mustdrop, $self->_drop_collations( $target ) );
601 if( $c->end->has_rank ) {
602 foreach my $rk ( $sourcerank .. $targetrank ) {
603 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
604 $c->readings_at_rank( $rk );
608 unless( $self->test_equivalence( $source, $target ) ) {
609 $self->_restore_collations( @$mustdrop );
610 return( 0, "Relationship would create witness loop" );
616 sub _drop_collations {
617 my( $self, $reading ) = @_;
619 foreach my $n ( $self->graph->neighbors( $reading ) ) {
620 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
621 push( @dropped, [ $reading, $n ] );
622 $self->del_relationship( $reading, $n );
623 #print STDERR "Dropped collation $reading -> $n\n";
629 sub _restore_collations {
630 my( $self, @vectors ) = @_;
631 foreach my $v ( @vectors ) {
633 $self->add_relationship( @$v, { 'type' => 'collated' } );
634 #print STDERR "Restored collation @$v\n";
636 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
641 =head2 filter_collations()
643 Utility function. Removes any redundant 'collated' relationships from the graph.
644 A collated relationship is redundant if the readings in question would occupy
645 the same rank regardless of the existence of the relationship.
649 sub filter_collations {
651 my $c = $self->collation;
652 foreach my $r ( 1 .. $c->end->rank - 1 ) {
655 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
656 next if $rdg->is_meta;
658 foreach my $pred ( $rdg->predecessors ) {
659 if( $pred->rank == $r - 1 ) {
661 $anchor = $rdg unless( $anchor );
665 push( @need_collations, $rdg ) unless $ip;
666 $c->relations->_drop_collations( "$rdg" );
669 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
670 unless $c->get_relationship( $anchor, $_ ) } @need_collations
671 : warn "No anchor found at $r";
675 =head2 related_readings( $reading, $filter )
677 Returns a list of readings that are connected via relationship links to $reading.
678 If $filter is set to a subroutine ref, returns only those related readings where
679 $filter( $relationship ) returns a true value.
683 sub related_readings {
684 my( $self, $reading, $filter ) = @_;
686 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
687 $reading = $reading->id;
693 if( $filter eq 'colocated' ) {
694 $filter = sub { $_[0]->colocated };
696 my %found = ( $reading => 1 );
697 my $check = [ $reading ];
701 foreach my $r ( @$check ) {
702 foreach my $nr ( $self->graph->neighbors( $r ) ) {
703 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
704 push( @$more, $nr ) unless exists $found{$nr};
711 delete $found{$reading};
712 @answer = keys %found;
714 @answer = $self->graph->all_reachable( $reading );
716 if( $return_object ) {
717 my $c = $self->collation;
718 return map { $c->reading( $_ ) } @answer;
724 =head2 merge_readings( $kept, $deleted );
726 Makes a best-effort merge of the relationship links between the given readings, and
727 stops tracking the to-be-deleted reading.
732 my( $self, $kept, $deleted, $combined ) = @_;
733 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
734 # Get the pair of kept / rel
735 my @vector = ( $kept );
736 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
737 next if $vector[0] eq $vector[1]; # Don't add a self loop
739 # If kept changes its text, drop the relationship.
742 # If kept / rel already has a relationship, just keep the old
743 my $rel = $self->get_relationship( @vector );
746 # Otherwise, adopt the relationship that would be deleted.
747 $rel = $self->get_relationship( @$edge );
748 $self->_set_relationship( $rel, @vector );
750 $self->_make_equivalence( $deleted, $kept );
753 ### Equivalence logic
755 sub _remove_equivalence_node {
756 my( $self, $node ) = @_;
757 my $group = $self->equivalence( $node );
758 my $nodelist = $self->eqreadings( $group );
759 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
760 print STDERR "Removing equivalence $group for $node\n" if $node eq '451,2';
761 $self->remove_eqreadings( $group );
762 } elsif( @$nodelist == 1 ) {
763 warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
764 " in group that should have only $node";
766 print STDERR "Removing $node from equivalence $group\n" if $node eq '451,2';
767 my @newlist = grep { $_ ne $node } @$nodelist;
768 $self->set_eqreadings( $group, \@newlist );
769 $self->remove_equivalence( $node );
773 =head2 add_equivalence_edge
775 Add an edge in the equivalence graph corresponding to $source -> $target in the
776 collation. Should only be called by Collation.
780 sub add_equivalence_edge {
781 my( $self, $source, $target ) = @_;
782 my $seq = $self->equivalence( $source );
783 my $teq = $self->equivalence( $target );
784 print STDERR "Adding equivalence edge $seq -> $teq for $source -> $target\n"
785 if grep { $_ eq '451,2' } @_;
786 $self->equivalence_graph->add_edge( $seq, $teq );
789 =head2 delete_equivalence_edge
791 Remove an edge in the equivalence graph corresponding to $source -> $target in the
792 collation. Should only be called by Collation.
796 sub delete_equivalence_edge {
797 my( $self, $source, $target ) = @_;
798 my $seq = $self->equivalence( $source );
799 my $teq = $self->equivalence( $target );
800 print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n"
801 if grep { $_ eq '451,2' } @_;
802 $self->equivalence_graph->delete_edge( $seq, $teq );
805 sub _is_disconnected {
807 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
808 || scalar $self->equivalence_graph->successorless_vertices > 1 );
811 # Equate two readings in the equivalence graph
812 sub _make_equivalence {
813 my( $self, $source, $target ) = @_;
814 # Get the source equivalent readings
815 my $seq = $self->equivalence( $source );
816 my $teq = $self->equivalence( $target );
817 # Nothing to do if they are already equivalent...
818 return if $seq eq $teq;
819 print STDERR "Making equivalence for $source -> $target\n"
820 if grep { $_ eq '451,2' } @_;
821 my $sourcepool = $self->eqreadings( $seq );
822 # and add them to the target readings.
823 print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n"
824 if grep { $_ eq '451,2' } @_;
825 push( @{$self->eqreadings( $teq )}, @$sourcepool );
826 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
827 # Then merge the nodes in the equivalence graph.
828 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
829 $self->equivalence_graph->add_edge( $pred, $teq );
831 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
832 $self->equivalence_graph->add_edge( $teq, $succ );
834 $self->equivalence_graph->delete_vertex( $seq );
835 # TODO enable this after collation parsing is done
836 # throw( "Graph got disconnected making $source / $target equivalence" )
837 # if $self->_is_disconnected;
840 =head2 test_equivalence
842 Test whether, if two readings were equated with a 'colocated' relationship,
843 the graph would still be valid.
847 sub test_equivalence {
848 my( $self, $source, $target ) = @_;
849 # Try merging the nodes in the equivalence graph; return a true value if
850 # no cycle is introduced thereby. Restore the original graph first.
852 # Keep track of edges we add
855 # Get the reading equivalents
856 my $seq = $self->equivalence( $source );
857 my $teq = $self->equivalence( $target );
858 # Maybe this is easy?
859 return 1 if $seq eq $teq;
861 # Save the first graph
862 my $checkstr = $self->equivalence_graph->stringify();
863 # Add and save relevant edges
864 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
865 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
866 $added_pred{$pred} = 0;
868 $self->equivalence_graph->add_edge( $pred, $teq );
869 $added_pred{$pred} = 1;
872 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
873 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
874 $added_succ{$succ} = 0;
876 $self->equivalence_graph->add_edge( $teq, $succ );
877 $added_succ{$succ} = 1;
880 # Delete source equivalent and test
881 $self->equivalence_graph->delete_vertex( $seq );
882 my $ret = !$self->equivalence_graph->has_a_cycle;
884 # Restore what we changed
885 $self->equivalence_graph->add_vertex( $seq );
886 foreach my $pred ( keys %added_pred ) {
887 $self->equivalence_graph->add_edge( $pred, $seq );
888 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
890 foreach my $succ ( keys %added_succ ) {
891 $self->equivalence_graph->add_edge( $seq, $succ );
892 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
894 unless( $self->equivalence_graph->eq( $checkstr ) ) {
895 warn "GRAPH CHANGED after testing";
901 # Unmake an equivalence link between two readings. Should only be called internally.
902 sub _break_equivalence {
903 my( $self, $source, $target ) = @_;
905 # This is the hard one. Need to reconstruct the equivalence groups without
908 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
909 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
910 # If these groups intersect, they are still connected; do nothing.
911 foreach my $el ( keys %tng ) {
912 if( exists $sng{$el} ) {
913 print STDERR "Equivalence break $source / $target is a noop\n"
914 if grep { $_ eq '451,2' } @_;
918 print STDERR "Breaking equivalence $source / $target\n"
919 if grep { $_ eq '451,2' } @_;
920 # If they don't intersect, then we split the nodes in the graph and in
921 # the hashes. First figure out which group has which name
922 my $oldgroup = $self->equivalence( $source ); # same as $target
923 my $keepsource = $sng{$oldgroup};
924 my $newgroup = $keepsource ? $target : $source;
925 my( $oldmembers, $newmembers );
927 $oldmembers = [ keys %sng ];
928 $newmembers = [ keys %tng ];
930 $oldmembers = [ keys %tng ];
931 $newmembers = [ keys %sng ];
934 # First alter the old group in the hash
935 $self->set_eqreadings( $oldgroup, $oldmembers );
936 foreach my $el ( @$oldmembers ) {
937 $self->set_equivalence( $el, $oldgroup );
940 # then add the new group back to the hash with its new key
941 $self->set_eqreadings( $newgroup, $newmembers );
942 foreach my $el ( @$newmembers ) {
943 $self->set_equivalence( $el, $newgroup );
946 # Now add the new group back to the equivalence graph
947 $self->equivalence_graph->add_vertex( $newgroup );
948 # ...add the appropriate edges to the source group vertext
949 my $c = $self->collation;
950 foreach my $rdg ( @$newmembers ) {
951 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
952 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
954 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
955 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
959 # ...and figure out which edges on the old group vertex to delete.
960 my( %old_pred, %old_succ );
961 foreach my $rdg ( @$oldmembers ) {
962 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
963 $old_pred{$self->equivalence( $rp )} = 1;
965 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
966 $old_succ{$self->equivalence( $rs )} = 1;
969 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
970 unless( $old_pred{$p} ) {
971 $self->equivalence_graph->delete_edge( $p, $oldgroup );
974 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
975 unless( $old_succ{$s} ) {
976 $self->equivalence_graph->delete_edge( $oldgroup, $s );
979 # TODO enable this after collation parsing is done
980 # throw( "Graph got disconnected breaking $source / $target equivalence" )
981 # if $self->_is_disconnected;
984 sub _find_equiv_without {
985 my( $self, $first, $second ) = @_;
986 my %found = ( $first => 1 );
987 my $check = [ $first ];
991 foreach my $r ( @$check ) {
992 foreach my $nr ( $self->graph->neighbors( $r ) ) {
993 next if $r eq $second;
994 if( $self->get_relationship( $r, $nr )->colocated ) {
995 push( @$more, $nr ) unless exists $found{$nr};
1008 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1010 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1011 $rgraph->setAttribute( 'edgedefault', 'directed' );
1012 $rgraph->setAttribute( 'id', 'relationships', );
1013 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1014 $rgraph->setAttribute( 'parse.edges', 0 );
1015 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1016 $rgraph->setAttribute( 'parse.nodes', 0 );
1017 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1019 # Add the vertices according to their XML IDs
1020 my %rdg_lookup = ( reverse %$node_hash );
1021 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1022 my @nlist = sort keys( %rdg_lookup );
1023 foreach my $n ( @nlist ) {
1024 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1025 $n_el->setAttribute( 'id', $n );
1026 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1028 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1030 # Add the relationship edges, with their object information
1032 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1033 # Add an edge and fill in its relationship info.
1034 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1035 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1036 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1037 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1038 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1040 my $rel_obj = $self->get_relationship( @$e );
1041 foreach my $key ( keys %$edge_keys ) {
1042 my $value = $rel_obj->$key;
1043 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1047 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1055 return $tmp_a <=> $tmp_b;
1058 sub _add_graphml_data {
1059 my( $el, $key, $value ) = @_;
1060 return unless defined $value;
1061 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1062 $data_el->setAttribute( 'key', $key );
1063 $data_el->appendText( $value );
1067 Text::Tradition::Error->throw(
1068 'ident' => 'Relationship error',
1074 __PACKAGE__->meta->make_immutable;