1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
113 has '_equivalence_readings' => (
118 set_eqreadings => 'set',
119 remove_eqreadings => 'delete',
123 around add_reading => sub {
127 $self->equivalence_graph->add_vertex( @_ );
128 $self->set_equivalence( $_[0], $_[0] );
129 $self->set_eqreadings( $_[0], [ $_[0] ] );
133 around delete_reading => sub {
137 $self->_remove_equivalence_node( @_ );
141 =head2 get_relationship
143 Return the relationship object, if any, that exists between two readings.
147 sub get_relationship {
150 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
151 # Dereference the edge arrayref that was passed.
158 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
159 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
161 return $relationship;
164 sub _set_relationship {
165 my( $self, $relationship, @vector ) = @_;
166 $self->graph->add_edge( @vector );
167 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
168 $self->_make_equivalence( @vector ) if $relationship->colocated;
173 Create a new relationship with the given options and return it.
174 Warn and return undef if the relationship cannot be created.
179 my( $self, $options ) = @_;
180 # Check to see if a relationship exists between the two given readings
181 my $source = delete $options->{'orig_a'};
182 my $target = delete $options->{'orig_b'};
183 my $rel = $self->get_relationship( $source, $target );
185 if( $rel->type eq 'collated' ) {
186 # Always replace a 'collated' relationship with a more descriptive
188 $self->del_relationship( $source, $target );
189 } elsif( $rel->type ne $options->{'type'} ) {
190 throw( "Another relationship of type " . $rel->type
191 . " already exists between $source and $target" );
197 # Check to see if a nonlocal relationship is defined for the two readings
198 $rel = $self->scoped_relationship( $options->{'reading_a'},
199 $options->{'reading_b'} );
200 if( $rel && $rel->type eq $options->{'type'} ) {
203 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
205 $rel = Text::Tradition::Collation::Relationship->new( $options );
206 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
211 =head2 add_scoped_relationship( $rel )
213 Keep track of relationships defined between specific readings that are scoped
214 non-locally. Key on whichever reading occurs first alphabetically.
218 sub add_scoped_relationship {
219 my( $self, $rel ) = @_;
220 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
221 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
222 my $r = $self->scoped_relationship( $rdga, $rdgb );
224 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
225 $r->type, $rdga, $rdgb );
228 my( $first, $second ) = sort ( $rdga, $rdgb );
229 $self->scopedrels->{$first}->{$second} = $rel;
232 =head2 scoped_relationship( $reading_a, $reading_b )
234 Returns the general (document-level or global) relationship that has been defined
235 between the two reading strings. Returns undef if there is no general relationship.
239 sub scoped_relationship {
240 my( $self, $rdga, $rdgb ) = @_;
241 my( $first, $second ) = sort( $rdga, $rdgb );
242 if( exists $self->scopedrels->{$first}->{$second} ) {
243 return $self->scopedrels->{$first}->{$second};
249 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
251 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
252 for the possible options) between the readings given in $source and $target. Sets
253 up a scoped relationship between $sourcetext and $targettext if the relationship is
256 Returns a status boolean and a list of all reading pairs connected by the call to
264 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
265 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
266 ok( $t1, "Parsed test fragment file" );
267 my $c1 = $t1->collation;
268 my $trel = $c1->get_relationship( '9,2', '9,3' );
269 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
270 "Troublesome relationship exists" );
271 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
273 # Try to make the link we want
275 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
276 ok( 1, "Added cross-collation relationship as expected" );
277 } catch( Text::Tradition::Error $e ) {
278 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
282 $c1->calculate_ranks();
283 ok( 1, "Successfully calculated ranks" );
284 } catch ( Text::Tradition::Error $e ) {
285 ok( 0, "Collation now has a cycle: " . $e->message );
288 # Test 1.2: attempt merge of an identical reading
290 $c1->merge_readings( '9,3', '11,5' );
291 ok( 1, "Successfully merged reading 'pontifex'" );
292 } catch ( Text::Tradition::Error $e ) {
293 ok( 0, "Merge of mergeable readings failed: $e->message" );
297 # Test 1.3: attempt relationship with a meta reading (should fail)
299 $c1->add_relationship( '8,1', '9,2', { 'type' => 'collated' } );
300 ok( 0, "Allowed a meta-reading to be used in a relationship" );
301 } catch ( Text::Tradition::Error $e ) {
302 is( $e->message, 'Cannot set relationship on a meta reading',
303 "Relationship link prevented for a meta reading" );
306 # Test 2.1: try to equate nodes that are prevented with a real intermediate
308 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
309 my $c2 = $t2->collation;
310 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
311 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
312 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
313 "Created blocking relationship" );
314 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
315 # This time the link ought to fail
317 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
318 ok( 0, "Added cross-equivalent bad relationship" );
319 } catch ( Text::Tradition::Error $e ) {
320 like( $e->message, qr/witness loop/,
321 "Existing equivalence blocked crossing relationship" );
325 $c2->calculate_ranks();
326 ok( 1, "Successfully calculated ranks" );
327 } catch ( Text::Tradition::Error $e ) {
328 ok( 0, "Collation now has a cycle: " . $e->message );
331 # Test 3.1: make a straightforward pair of transpositions.
332 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
333 # Test 1: try to equate nodes that are prevented with an intermediate collation
334 my $c3 = $t3->collation;
336 $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
337 ok( 1, "Added straightforward transposition" );
338 } catch ( Text::Tradition::Error $e ) {
339 ok( 0, "Failed to add normal transposition: " . $e->message );
342 $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
343 ok( 1, "Added straightforward transposition complement" );
344 } catch ( Text::Tradition::Error $e ) {
345 ok( 0, "Failed to add normal transposition complement: " . $e->message );
348 # Test 3.2: try to make a transposition that could be a parallel.
350 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
351 ok( 0, "Added bad colocated transposition" );
352 } catch ( Text::Tradition::Error $e ) {
353 like( $e->message, qr/Readings appear to be colocated/,
354 "Prevented bad colocated transposition" );
357 # Test 3.3: make the parallel, and then make the transposition again.
359 $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
360 ok( 1, "Equated identical readings for transposition" );
361 } catch ( Text::Tradition::Error $e ) {
362 ok( 0, "Failed to equate identical readings: " . $e->message );
365 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
366 ok( 1, "Added straightforward transposition complement" );
367 } catch ( Text::Tradition::Error $e ) {
368 ok( 0, "Failed to add normal transposition complement: " . $e->message );
375 sub add_relationship {
376 my( $self, $source, $target, $options ) = @_;
377 my $c = $self->collation;
378 my $sourceobj = $c->reading( $source );
379 my $targetobj = $c->reading( $target );
380 throw( "Adding self relationship at $source" ) if $source eq $target;
381 throw( "Cannot set relationship on a meta reading" )
382 if( $sourceobj->is_meta || $targetobj->is_meta );
385 my $droppedcolls = [];
386 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
387 $relationship = $options;
388 $thispaironly = 1; # If existing rel, set only where asked.
391 $options->{'scope'} = 'local' unless $options->{'scope'};
392 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
393 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
395 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
396 $options->{'type'}, $droppedcolls );
397 unless( $is_valid ) {
398 throw( "Invalid relationship: $reason" );
401 # Try to create the relationship object.
402 $options->{'reading_a'} = $sourceobj->text;
403 $options->{'reading_b'} = $targetobj->text;
404 $options->{'orig_a'} = $source;
405 $options->{'orig_b'} = $target;
406 if( $options->{'scope'} ne 'local' ) {
407 # Is there a relationship with this a & b already?
408 # Case-insensitive for non-orthographics.
409 my $rdga = $options->{'type'} eq 'orthographic'
410 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
411 my $rdgb = $options->{'type'} eq 'orthographic'
412 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
413 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
414 if( $otherrel && $otherrel->type eq $options->{type}
415 && $otherrel->scope eq $options->{scope} ) {
416 warn "Applying existing scoped relationship";
417 $relationship = $otherrel;
420 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
424 # Find all the pairs for which we need to set the relationship.
426 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
427 push( @vectors, $self->_find_applicable( $relationship ) );
430 # Now set the relationship(s).
432 my $rel = $self->get_relationship( $source, $target );
434 if( $rel && $rel ne $relationship ) {
435 if( $rel->nonlocal ) {
436 throw( "Found conflicting relationship at $source - $target" );
437 } elsif( $rel->type ne 'collated' ) {
438 # Replace a collation relationship; leave any other sort in place.
439 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
440 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
441 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
442 warn sprintf( "Not overriding local relationship %s with global %s "
443 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
444 $source, $target, $rel->reading_a, $rel->reading_b );
449 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
450 push( @pairs_set, [ $source, $target ] );
452 # Set any additional relationships that might be in @vectors.
453 foreach my $v ( @vectors ) {
454 next if $v->[0] eq $source && $v->[1] eq $target;
455 next if $v->[1] eq $source && $v->[0] eq $target;
456 my @added = $self->add_relationship( @$v, $relationship );
457 push( @pairs_set, @added );
460 # Finally, restore whatever collations we can, and return.
461 $self->_restore_collations( @$droppedcolls );
465 =head2 del_scoped_relationship( $reading_a, $reading_b )
467 Returns the general (document-level or global) relationship that has been defined
468 between the two reading strings. Returns undef if there is no general relationship.
472 sub del_scoped_relationship {
473 my( $self, $rdga, $rdgb ) = @_;
474 my( $first, $second ) = sort( $rdga, $rdgb );
475 return delete $self->scopedrels->{$first}->{$second};
478 sub _find_applicable {
479 my( $self, $rel ) = @_;
480 my $c = $self->collation;
481 # TODO Someday we might use a case sensitive language.
482 my $lang = $c->tradition->language;
484 my @identical_readings;
485 if( $rel->type eq 'orthographic' ) {
486 @identical_readings = grep { $_->text eq $rel->reading_a }
489 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
492 foreach my $ir ( @identical_readings ) {
494 if( $rel->type eq 'orthographic' ) {
495 @itarget = grep { $_->rank == $ir->rank
496 && $_->text eq $rel->reading_b } $c->readings;
498 @itarget = grep { $_->rank == $ir->rank
499 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
502 # Warn if there is more than one hit with no orth link between them.
503 my $itmain = shift @itarget;
506 map { $all_targets{$_} = 1 } @itarget;
507 map { delete $all_targets{$_} }
508 $self->related_readings( $itmain,
509 sub { $_[0]->type eq 'orthographic' } );
510 warn "More than one unrelated reading with text " . $itmain->text
511 . " at rank " . $ir->rank . "!" if keys %all_targets;
513 push( @vectors, [ $ir->id, $itmain->id ] );
519 =head2 del_relationship( $source, $target )
521 Removes the relationship between the given readings. If the relationship is
522 non-local, removes the relationship everywhere in the graph.
526 sub del_relationship {
527 my( $self, $source, $target ) = @_;
528 my $rel = $self->get_relationship( $source, $target );
529 return () unless $rel; # Nothing to delete; return an empty set.
530 my $colo = $rel->colocated;
531 my @vectors = ( [ $source, $target ] );
532 $self->_remove_relationship( $colo, $source, $target );
533 if( $rel->nonlocal ) {
534 # Remove the relationship wherever it occurs.
535 # Remove the relationship wherever it occurs.
536 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
537 $self->relationships;
538 foreach my $re ( @rel_edges ) {
539 $self->_remove_relationship( $colo, @$re );
540 push( @vectors, $re );
542 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
547 sub _remove_relationship {
548 my( $self, $equiv, @vector ) = @_;
549 $self->graph->delete_edge( @vector );
550 $self->_break_equivalence( @vector ) if $equiv;
553 =head2 relationship_valid( $source, $target, $type )
555 Checks whether a relationship of type $type may exist between the readings given
556 in $source and $target. Returns a tuple of ( status, message ) where status is
557 a yes/no boolean and, if the answer is no, message gives the reason why.
561 sub relationship_valid {
562 my( $self, $source, $target, $rel, $mustdrop ) = @_;
563 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
564 my $c = $self->collation;
565 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
566 # Check that the two readings do (for a repetition) or do not (for
567 # a transposition) appear in the same witness.
568 # TODO this might be called before witness paths are set...
570 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
571 foreach my $w ( $c->reading_witnesses( $target ) ) {
572 if( $seen_wits{$w} ) {
573 return ( 0, "Readings both occur in witness $w" )
574 if $rel eq 'transposition';
575 return ( 1, "ok" ) if $rel eq 'repetition';
578 return ( 0, "Readings occur only in distinct witnesses" )
579 if $rel eq 'repetition';
581 if ( $rel eq 'transposition' ) {
582 # We also need to check both that the readings occur in distinct
583 # witnesses, and that they are not in the same place. That is,
584 # proposing to link them should cause a witness loop.
585 if( $self->test_equivalence( $source, $target ) ) {
586 return ( 0, "Readings appear to be colocated, not transposed" );
591 } elsif( $rel ne 'repetition' ) {
592 # Check that linking the source and target in a relationship won't lead
593 # to a path loop for any witness.
594 # First, drop/stash any collations that might interfere
595 my $sourceobj = $c->reading( $source );
596 my $targetobj = $c->reading( $target );
597 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
598 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
599 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
600 push( @$mustdrop, $self->_drop_collations( $source ) );
601 push( @$mustdrop, $self->_drop_collations( $target ) );
602 if( $c->end->has_rank ) {
603 foreach my $rk ( $sourcerank .. $targetrank ) {
604 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
605 $c->readings_at_rank( $rk );
609 unless( $self->test_equivalence( $source, $target ) ) {
610 $self->_restore_collations( @$mustdrop );
611 return( 0, "Relationship would create witness loop" );
617 sub _drop_collations {
618 my( $self, $reading ) = @_;
620 foreach my $n ( $self->graph->neighbors( $reading ) ) {
621 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
622 push( @dropped, [ $reading, $n ] );
623 $self->del_relationship( $reading, $n );
624 #print STDERR "Dropped collation $reading -> $n\n";
630 sub _restore_collations {
631 my( $self, @vectors ) = @_;
632 foreach my $v ( @vectors ) {
634 $self->add_relationship( @$v, { 'type' => 'collated' } );
635 #print STDERR "Restored collation @$v\n";
637 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
642 =head2 filter_collations()
644 Utility function. Removes any redundant 'collated' relationships from the graph.
645 A collated relationship is redundant if the readings in question would occupy
646 the same rank regardless of the existence of the relationship.
650 sub filter_collations {
652 my $c = $self->collation;
653 foreach my $r ( 1 .. $c->end->rank - 1 ) {
656 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
657 next if $rdg->is_meta;
659 foreach my $pred ( $rdg->predecessors ) {
660 if( $pred->rank == $r - 1 ) {
662 $anchor = $rdg unless( $anchor );
666 push( @need_collations, $rdg ) unless $ip;
667 $c->relations->_drop_collations( "$rdg" );
670 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
671 unless $c->get_relationship( $anchor, $_ ) } @need_collations
672 : warn "No anchor found at $r";
676 =head2 related_readings( $reading, $filter )
678 Returns a list of readings that are connected via relationship links to $reading.
679 If $filter is set to a subroutine ref, returns only those related readings where
680 $filter( $relationship ) returns a true value.
684 sub related_readings {
685 my( $self, $reading, $filter ) = @_;
687 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
688 $reading = $reading->id;
694 if( $filter eq 'colocated' ) {
695 $filter = sub { $_[0]->colocated };
697 my %found = ( $reading => 1 );
698 my $check = [ $reading ];
702 foreach my $r ( @$check ) {
703 foreach my $nr ( $self->graph->neighbors( $r ) ) {
704 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
705 push( @$more, $nr ) unless exists $found{$nr};
712 delete $found{$reading};
713 @answer = keys %found;
715 @answer = $self->graph->all_reachable( $reading );
717 if( $return_object ) {
718 my $c = $self->collation;
719 return map { $c->reading( $_ ) } @answer;
725 =head2 merge_readings( $kept, $deleted );
727 Makes a best-effort merge of the relationship links between the given readings, and
728 stops tracking the to-be-deleted reading.
733 my( $self, $kept, $deleted, $combined ) = @_;
734 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
735 # Get the pair of kept / rel
736 my @vector = ( $kept );
737 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
738 next if $vector[0] eq $vector[1]; # Don't add a self loop
740 # If kept changes its text, drop the relationship.
743 # If kept / rel already has a relationship, just keep the old
744 my $rel = $self->get_relationship( @vector );
747 # Otherwise, adopt the relationship that would be deleted.
748 $rel = $self->get_relationship( @$edge );
749 $self->_set_relationship( $rel, @vector );
751 $self->_make_equivalence( $deleted, $kept );
754 ### Equivalence logic
756 sub _remove_equivalence_node {
757 my( $self, $node ) = @_;
758 my $group = $self->equivalence( $node );
759 my $nodelist = $self->eqreadings( $group );
760 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
761 print STDERR "Removing equivalence $group for $node\n" if $node eq '451,2';
762 $self->remove_eqreadings( $group );
763 } elsif( @$nodelist == 1 ) {
764 warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
765 " in group that should have only $node";
767 print STDERR "Removing $node from equivalence $group\n" if $node eq '451,2';
768 my @newlist = grep { $_ ne $node } @$nodelist;
769 $self->set_eqreadings( $group, \@newlist );
770 $self->remove_equivalence( $node );
774 =head2 add_equivalence_edge
776 Add an edge in the equivalence graph corresponding to $source -> $target in the
777 collation. Should only be called by Collation.
781 sub add_equivalence_edge {
782 my( $self, $source, $target ) = @_;
783 my $seq = $self->equivalence( $source );
784 my $teq = $self->equivalence( $target );
785 print STDERR "Adding equivalence edge $seq -> $teq for $source -> $target\n"
786 if grep { $_ eq '451,2' } @_;
787 $self->equivalence_graph->add_edge( $seq, $teq );
790 =head2 delete_equivalence_edge
792 Remove an edge in the equivalence graph corresponding to $source -> $target in the
793 collation. Should only be called by Collation.
797 sub delete_equivalence_edge {
798 my( $self, $source, $target ) = @_;
799 my $seq = $self->equivalence( $source );
800 my $teq = $self->equivalence( $target );
801 print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n"
802 if grep { $_ eq '451,2' } @_;
803 $self->equivalence_graph->delete_edge( $seq, $teq );
806 sub _is_disconnected {
808 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
809 || scalar $self->equivalence_graph->successorless_vertices > 1 );
812 # Equate two readings in the equivalence graph
813 sub _make_equivalence {
814 my( $self, $source, $target ) = @_;
815 # Get the source equivalent readings
816 my $seq = $self->equivalence( $source );
817 my $teq = $self->equivalence( $target );
818 # Nothing to do if they are already equivalent...
819 return if $seq eq $teq;
820 print STDERR "Making equivalence for $source -> $target\n"
821 if grep { $_ eq '451,2' } @_;
822 my $sourcepool = $self->eqreadings( $seq );
823 # and add them to the target readings.
824 print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n"
825 if grep { $_ eq '451,2' } @_;
826 push( @{$self->eqreadings( $teq )}, @$sourcepool );
827 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
828 # Then merge the nodes in the equivalence graph.
829 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
830 $self->equivalence_graph->add_edge( $pred, $teq );
832 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
833 $self->equivalence_graph->add_edge( $teq, $succ );
835 $self->equivalence_graph->delete_vertex( $seq );
836 # TODO enable this after collation parsing is done
837 # throw( "Graph got disconnected making $source / $target equivalence" )
838 # if $self->_is_disconnected;
841 =head2 test_equivalence
843 Test whether, if two readings were equated with a 'colocated' relationship,
844 the graph would still be valid.
848 sub test_equivalence {
849 my( $self, $source, $target ) = @_;
850 # Try merging the nodes in the equivalence graph; return a true value if
851 # no cycle is introduced thereby. Restore the original graph first.
853 # Keep track of edges we add
856 # Get the reading equivalents
857 my $seq = $self->equivalence( $source );
858 my $teq = $self->equivalence( $target );
859 # Maybe this is easy?
860 return 1 if $seq eq $teq;
862 # Save the first graph
863 my $checkstr = $self->equivalence_graph->stringify();
864 # Add and save relevant edges
865 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
866 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
867 $added_pred{$pred} = 0;
869 $self->equivalence_graph->add_edge( $pred, $teq );
870 $added_pred{$pred} = 1;
873 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
874 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
875 $added_succ{$succ} = 0;
877 $self->equivalence_graph->add_edge( $teq, $succ );
878 $added_succ{$succ} = 1;
881 # Delete source equivalent and test
882 $self->equivalence_graph->delete_vertex( $seq );
883 my $ret = !$self->equivalence_graph->has_a_cycle;
885 # Restore what we changed
886 $self->equivalence_graph->add_vertex( $seq );
887 foreach my $pred ( keys %added_pred ) {
888 $self->equivalence_graph->add_edge( $pred, $seq );
889 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
891 foreach my $succ ( keys %added_succ ) {
892 $self->equivalence_graph->add_edge( $seq, $succ );
893 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
895 unless( $self->equivalence_graph->eq( $checkstr ) ) {
896 warn "GRAPH CHANGED after testing";
902 # Unmake an equivalence link between two readings. Should only be called internally.
903 sub _break_equivalence {
904 my( $self, $source, $target ) = @_;
906 # This is the hard one. Need to reconstruct the equivalence groups without
909 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
910 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
911 # If these groups intersect, they are still connected; do nothing.
912 foreach my $el ( keys %tng ) {
913 if( exists $sng{$el} ) {
914 print STDERR "Equivalence break $source / $target is a noop\n"
915 if grep { $_ eq '451,2' } @_;
919 print STDERR "Breaking equivalence $source / $target\n"
920 if grep { $_ eq '451,2' } @_;
921 # If they don't intersect, then we split the nodes in the graph and in
922 # the hashes. First figure out which group has which name
923 my $oldgroup = $self->equivalence( $source ); # same as $target
924 my $keepsource = $sng{$oldgroup};
925 my $newgroup = $keepsource ? $target : $source;
926 my( $oldmembers, $newmembers );
928 $oldmembers = [ keys %sng ];
929 $newmembers = [ keys %tng ];
931 $oldmembers = [ keys %tng ];
932 $newmembers = [ keys %sng ];
935 # First alter the old group in the hash
936 $self->set_eqreadings( $oldgroup, $oldmembers );
937 foreach my $el ( @$oldmembers ) {
938 $self->set_equivalence( $el, $oldgroup );
941 # then add the new group back to the hash with its new key
942 $self->set_eqreadings( $newgroup, $newmembers );
943 foreach my $el ( @$newmembers ) {
944 $self->set_equivalence( $el, $newgroup );
947 # Now add the new group back to the equivalence graph
948 $self->equivalence_graph->add_vertex( $newgroup );
949 # ...add the appropriate edges to the source group vertext
950 my $c = $self->collation;
951 foreach my $rdg ( @$newmembers ) {
952 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
953 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
955 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
956 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
960 # ...and figure out which edges on the old group vertex to delete.
961 my( %old_pred, %old_succ );
962 foreach my $rdg ( @$oldmembers ) {
963 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
964 $old_pred{$self->equivalence( $rp )} = 1;
966 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
967 $old_succ{$self->equivalence( $rs )} = 1;
970 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
971 unless( $old_pred{$p} ) {
972 $self->equivalence_graph->delete_edge( $p, $oldgroup );
975 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
976 unless( $old_succ{$s} ) {
977 $self->equivalence_graph->delete_edge( $oldgroup, $s );
980 # TODO enable this after collation parsing is done
981 # throw( "Graph got disconnected breaking $source / $target equivalence" )
982 # if $self->_is_disconnected;
985 sub _find_equiv_without {
986 my( $self, $first, $second ) = @_;
987 my %found = ( $first => 1 );
988 my $check = [ $first ];
992 foreach my $r ( @$check ) {
993 foreach my $nr ( $self->graph->neighbors( $r ) ) {
994 next if $r eq $second;
995 if( $self->get_relationship( $r, $nr )->colocated ) {
996 push( @$more, $nr ) unless exists $found{$nr};
1006 =head2 rebuild_equivalence
1008 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1009 adds all readings and edges, then makes an equivalence for all relationships.
1013 sub rebuild_equivalence {
1015 my $newgraph = Graph->new();
1016 foreach my $r ( $self->collation->readings ) {
1017 $newgraph->add_vertex( $r->id );
1019 foreach my $e ( $self->collation->paths ) {
1020 $newgraph->add_edge( @$e );
1022 # Set this as the new equivalence graph
1023 $self->_reset_equivalence( $newgraph );
1025 # Now collapse some nodes. This does no testing; it assumes that all
1026 # preexisting relationships are valid.
1027 foreach my $rel ( $self->relationships ) {
1028 my $relobj = $self->get_relationship( $rel );
1029 next unless $relobj && $relobj->colocated;
1030 $self->_make_equivalence( @$rel );
1037 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1039 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1040 $rgraph->setAttribute( 'edgedefault', 'directed' );
1041 $rgraph->setAttribute( 'id', 'relationships', );
1042 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1043 $rgraph->setAttribute( 'parse.edges', 0 );
1044 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1045 $rgraph->setAttribute( 'parse.nodes', 0 );
1046 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1048 # Add the vertices according to their XML IDs
1049 my %rdg_lookup = ( reverse %$node_hash );
1050 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1051 my @nlist = sort keys( %rdg_lookup );
1052 foreach my $n ( @nlist ) {
1053 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1054 $n_el->setAttribute( 'id', $n );
1055 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1057 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1059 # Add the relationship edges, with their object information
1061 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1062 # Add an edge and fill in its relationship info.
1063 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1064 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1065 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1066 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1067 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1069 my $rel_obj = $self->get_relationship( @$e );
1070 foreach my $key ( keys %$edge_keys ) {
1071 my $value = $rel_obj->$key;
1072 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1076 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1084 return $tmp_a <=> $tmp_b;
1087 sub _add_graphml_data {
1088 my( $el, $key, $value ) = @_;
1089 return unless defined $value;
1090 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1091 $data_el->setAttribute( 'key', $key );
1092 $data_el->appendText( $value );
1096 Text::Tradition::Error->throw(
1097 'ident' => 'Relationship error',
1103 __PACKAGE__->meta->make_immutable;