1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 _clear_equivalence => 'clear',
114 has '_equivalence_readings' => (
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
121 _clear_eqreadings => 'clear',
125 around add_reading => sub {
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
135 around delete_reading => sub {
139 $self->_remove_equivalence_node( @_ );
143 =head2 get_relationship
145 Return the relationship object, if any, that exists between two readings.
149 sub get_relationship {
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
163 return $relationship;
166 sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
170 $self->_make_equivalence( @vector ) if $relationship->colocated;
175 Create a new relationship with the given options and return it.
176 Warn and return undef if the relationship cannot be created.
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
185 my $rel = $self->get_relationship( $source, $target );
187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
199 # Check to see if a nonlocal relationship is defined for the two readings
200 $rel = $self->scoped_relationship( $options->{'reading_a'},
201 $options->{'reading_b'} );
202 if( $rel && $rel->type eq $options->{'type'} ) {
205 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
207 $rel = Text::Tradition::Collation::Relationship->new( $options );
208 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
213 =head2 add_scoped_relationship( $rel )
215 Keep track of relationships defined between specific readings that are scoped
216 non-locally. Key on whichever reading occurs first alphabetically.
220 sub add_scoped_relationship {
221 my( $self, $rel ) = @_;
222 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
223 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
224 my $r = $self->scoped_relationship( $rdga, $rdgb );
226 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
227 $r->type, $rdga, $rdgb );
230 my( $first, $second ) = sort ( $rdga, $rdgb );
231 $self->scopedrels->{$first}->{$second} = $rel;
234 =head2 scoped_relationship( $reading_a, $reading_b )
236 Returns the general (document-level or global) relationship that has been defined
237 between the two reading strings. Returns undef if there is no general relationship.
241 sub scoped_relationship {
242 my( $self, $rdga, $rdgb ) = @_;
243 my( $first, $second ) = sort( $rdga, $rdgb );
244 if( exists $self->scopedrels->{$first}->{$second} ) {
245 return $self->scopedrels->{$first}->{$second};
251 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
253 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
254 for the possible options) between the readings given in $source and $target. Sets
255 up a scoped relationship between $sourcetext and $targettext if the relationship is
258 Returns a status boolean and a list of all reading pairs connected by the call to
266 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
267 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
268 ok( $t1, "Parsed test fragment file" );
269 my $c1 = $t1->collation;
270 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
271 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
272 "Troublesome relationship exists" );
273 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
275 # Try to make the link we want
277 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
278 ok( 1, "Added cross-collation relationship as expected" );
279 } catch( Text::Tradition::Error $e ) {
280 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
284 $c1->calculate_ranks();
285 ok( 1, "Successfully calculated ranks" );
286 } catch ( Text::Tradition::Error $e ) {
287 ok( 0, "Collation now has a cycle: " . $e->message );
290 # Test 1.2: attempt merge of an identical reading
292 $c1->merge_readings( 'r9.3', 'r11.5' );
293 ok( 1, "Successfully merged reading 'pontifex'" );
294 } catch ( Text::Tradition::Error $e ) {
295 ok( 0, "Merge of mergeable readings failed: $e->message" );
299 # Test 1.3: attempt relationship with a meta reading (should fail)
301 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
302 ok( 0, "Allowed a meta-reading to be used in a relationship" );
303 } catch ( Text::Tradition::Error $e ) {
304 is( $e->message, 'Cannot set relationship on a meta reading',
305 "Relationship link prevented for a meta reading" );
308 # Test 2.1: try to equate nodes that are prevented with a real intermediate
310 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
311 my $c2 = $t2->collation;
312 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
313 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
314 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
315 "Created blocking relationship" );
316 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
317 # This time the link ought to fail
319 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
320 ok( 0, "Added cross-equivalent bad relationship" );
321 } catch ( Text::Tradition::Error $e ) {
322 like( $e->message, qr/witness loop/,
323 "Existing equivalence blocked crossing relationship" );
327 $c2->calculate_ranks();
328 ok( 1, "Successfully calculated ranks" );
329 } catch ( Text::Tradition::Error $e ) {
330 ok( 0, "Collation now has a cycle: " . $e->message );
333 # Test 3.1: make a straightforward pair of transpositions.
334 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
335 # Test 1: try to equate nodes that are prevented with an intermediate collation
336 my $c3 = $t3->collation;
338 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
339 ok( 1, "Added straightforward transposition" );
340 } catch ( Text::Tradition::Error $e ) {
341 ok( 0, "Failed to add normal transposition: " . $e->message );
344 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
345 ok( 1, "Added straightforward transposition complement" );
346 } catch ( Text::Tradition::Error $e ) {
347 ok( 0, "Failed to add normal transposition complement: " . $e->message );
350 # Test 3.2: try to make a transposition that could be a parallel.
352 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
353 ok( 0, "Added bad colocated transposition" );
354 } catch ( Text::Tradition::Error $e ) {
355 like( $e->message, qr/Readings appear to be colocated/,
356 "Prevented bad colocated transposition" );
359 # Test 3.3: make the parallel, and then make the transposition again.
361 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
362 ok( 1, "Equated identical readings for transposition" );
363 } catch ( Text::Tradition::Error $e ) {
364 ok( 0, "Failed to equate identical readings: " . $e->message );
367 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
368 ok( 1, "Added straightforward transposition complement" );
369 } catch ( Text::Tradition::Error $e ) {
370 ok( 0, "Failed to add normal transposition complement: " . $e->message );
377 sub add_relationship {
378 my( $self, $source, $target, $options ) = @_;
379 my $c = $self->collation;
380 my $sourceobj = $c->reading( $source );
381 my $targetobj = $c->reading( $target );
382 throw( "Adding self relationship at $source" ) if $source eq $target;
383 throw( "Cannot set relationship on a meta reading" )
384 if( $sourceobj->is_meta || $targetobj->is_meta );
387 my $droppedcolls = [];
388 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
389 $relationship = $options;
390 $thispaironly = 1; # If existing rel, set only where asked.
393 $options->{'scope'} = 'local' unless $options->{'scope'};
394 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
395 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
397 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
398 $options->{'type'}, $droppedcolls );
399 unless( $is_valid ) {
400 throw( "Invalid relationship: $reason" );
403 # Try to create the relationship object.
404 $options->{'reading_a'} = $sourceobj->text;
405 $options->{'reading_b'} = $targetobj->text;
406 $options->{'orig_a'} = $source;
407 $options->{'orig_b'} = $target;
408 if( $options->{'scope'} ne 'local' ) {
409 # Is there a relationship with this a & b already?
410 # Case-insensitive for non-orthographics.
411 my $rdga = $options->{'type'} eq 'orthographic'
412 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
413 my $rdgb = $options->{'type'} eq 'orthographic'
414 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
415 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
416 if( $otherrel && $otherrel->type eq $options->{type}
417 && $otherrel->scope eq $options->{scope} ) {
418 warn "Applying existing scoped relationship";
419 $relationship = $otherrel;
422 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
426 # Find all the pairs for which we need to set the relationship.
428 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
429 push( @vectors, $self->_find_applicable( $relationship ) );
432 # Now set the relationship(s).
434 my $rel = $self->get_relationship( $source, $target );
436 if( $rel && $rel ne $relationship ) {
437 if( $rel->nonlocal ) {
438 throw( "Found conflicting relationship at $source - $target" );
439 } elsif( $rel->type ne 'collated' ) {
440 # Replace a collation relationship; leave any other sort in place.
441 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
442 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
443 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
444 warn sprintf( "Not overriding local relationship %s with global %s "
445 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
446 $source, $target, $rel->reading_a, $rel->reading_b );
451 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
452 push( @pairs_set, [ $source, $target ] );
454 # Set any additional relationships that might be in @vectors.
455 foreach my $v ( @vectors ) {
456 next if $v->[0] eq $source && $v->[1] eq $target;
457 next if $v->[1] eq $source && $v->[0] eq $target;
458 my @added = $self->add_relationship( @$v, $relationship );
459 push( @pairs_set, @added );
462 # Finally, restore whatever collations we can, and return.
463 $self->_restore_collations( @$droppedcolls );
467 =head2 del_scoped_relationship( $reading_a, $reading_b )
469 Returns the general (document-level or global) relationship that has been defined
470 between the two reading strings. Returns undef if there is no general relationship.
474 sub del_scoped_relationship {
475 my( $self, $rdga, $rdgb ) = @_;
476 my( $first, $second ) = sort( $rdga, $rdgb );
477 return delete $self->scopedrels->{$first}->{$second};
480 sub _find_applicable {
481 my( $self, $rel ) = @_;
482 my $c = $self->collation;
483 # TODO Someday we might use a case sensitive language.
484 my $lang = $c->tradition->language;
486 my @identical_readings;
487 if( $rel->type eq 'orthographic' ) {
488 @identical_readings = grep { $_->text eq $rel->reading_a }
491 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
494 foreach my $ir ( @identical_readings ) {
496 if( $rel->type eq 'orthographic' ) {
497 @itarget = grep { $_->rank == $ir->rank
498 && $_->text eq $rel->reading_b } $c->readings;
500 @itarget = grep { $_->rank == $ir->rank
501 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
504 # Warn if there is more than one hit with no orth link between them.
505 my $itmain = shift @itarget;
508 map { $all_targets{$_} = 1 } @itarget;
509 map { delete $all_targets{$_} }
510 $self->related_readings( $itmain,
511 sub { $_[0]->type eq 'orthographic' } );
512 warn "More than one unrelated reading with text " . $itmain->text
513 . " at rank " . $ir->rank . "!" if keys %all_targets;
515 push( @vectors, [ $ir->id, $itmain->id ] );
521 =head2 del_relationship( $source, $target )
523 Removes the relationship between the given readings. If the relationship is
524 non-local, removes the relationship everywhere in the graph.
528 sub del_relationship {
529 my( $self, $source, $target ) = @_;
530 my $rel = $self->get_relationship( $source, $target );
531 return () unless $rel; # Nothing to delete; return an empty set.
532 my $colo = $rel->colocated;
533 my @vectors = ( [ $source, $target ] );
534 $self->_remove_relationship( $colo, $source, $target );
535 if( $rel->nonlocal ) {
536 # Remove the relationship wherever it occurs.
537 # Remove the relationship wherever it occurs.
538 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
539 $self->relationships;
540 foreach my $re ( @rel_edges ) {
541 $self->_remove_relationship( $colo, @$re );
542 push( @vectors, $re );
544 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
549 sub _remove_relationship {
550 my( $self, $equiv, @vector ) = @_;
551 $self->graph->delete_edge( @vector );
552 $self->_break_equivalence( @vector ) if $equiv;
555 =head2 relationship_valid( $source, $target, $type )
557 Checks whether a relationship of type $type may exist between the readings given
558 in $source and $target. Returns a tuple of ( status, message ) where status is
559 a yes/no boolean and, if the answer is no, message gives the reason why.
563 sub relationship_valid {
564 my( $self, $source, $target, $rel, $mustdrop ) = @_;
565 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
566 my $c = $self->collation;
567 ## Assume validity is okay if we are initializing from scratch.
568 return ( 1, "initializing" ) unless $c->tradition->_initialized;
570 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
571 # Check that the two readings do (for a repetition) or do not (for
572 # a transposition) appear in the same witness.
574 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
575 foreach my $w ( $c->reading_witnesses( $target ) ) {
576 if( $seen_wits{$w} ) {
577 return ( 0, "Readings both occur in witness $w" )
578 if $rel eq 'transposition';
579 return ( 1, "ok" ) if $rel eq 'repetition';
582 return ( 0, "Readings occur only in distinct witnesses" )
583 if $rel eq 'repetition';
585 if ( $rel eq 'transposition' ) {
586 # We also need to check both that the readings occur in distinct
587 # witnesses, and that they are not in the same place. That is,
588 # proposing to link them should cause a witness loop.
589 if( $self->test_equivalence( $source, $target ) ) {
590 return ( 0, "Readings appear to be colocated, not transposed" );
595 } elsif( $rel ne 'repetition' ) {
596 # Check that linking the source and target in a relationship won't lead
597 # to a path loop for any witness.
598 # First, drop/stash any collations that might interfere
599 my $sourceobj = $c->reading( $source );
600 my $targetobj = $c->reading( $target );
601 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
602 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
603 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
604 push( @$mustdrop, $self->_drop_collations( $source ) );
605 push( @$mustdrop, $self->_drop_collations( $target ) );
606 if( $c->end->has_rank ) {
607 foreach my $rk ( $sourcerank .. $targetrank ) {
608 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
609 $c->readings_at_rank( $rk );
613 unless( $self->test_equivalence( $source, $target ) ) {
614 $self->_restore_collations( @$mustdrop );
615 return( 0, "Relationship would create witness loop" );
621 sub _drop_collations {
622 my( $self, $reading ) = @_;
624 foreach my $n ( $self->graph->neighbors( $reading ) ) {
625 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
626 push( @dropped, [ $reading, $n ] );
627 $self->del_relationship( $reading, $n );
628 #print STDERR "Dropped collation $reading -> $n\n";
634 sub _restore_collations {
635 my( $self, @vectors ) = @_;
636 foreach my $v ( @vectors ) {
638 $self->add_relationship( @$v, { 'type' => 'collated' } );
639 #print STDERR "Restored collation @$v\n";
641 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
646 =head2 filter_collations()
648 Utility function. Removes any redundant 'collated' relationships from the graph.
649 A collated relationship is redundant if the readings in question would occupy
650 the same rank regardless of the existence of the relationship.
654 sub filter_collations {
656 my $c = $self->collation;
657 foreach my $r ( 1 .. $c->end->rank - 1 ) {
660 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
661 next if $rdg->is_meta;
663 foreach my $pred ( $rdg->predecessors ) {
664 if( $pred->rank == $r - 1 ) {
666 $anchor = $rdg unless( $anchor );
670 push( @need_collations, $rdg ) unless $ip;
671 $c->relations->_drop_collations( "$rdg" );
674 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
675 unless $c->get_relationship( $anchor, $_ ) } @need_collations
676 : warn "No anchor found at $r";
680 =head2 related_readings( $reading, $filter )
682 Returns a list of readings that are connected via relationship links to $reading.
683 If $filter is set to a subroutine ref, returns only those related readings where
684 $filter( $relationship ) returns a true value.
688 sub related_readings {
689 my( $self, $reading, $filter ) = @_;
691 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
692 $reading = $reading->id;
698 if( $filter eq 'colocated' ) {
699 $filter = sub { $_[0]->colocated };
701 my %found = ( $reading => 1 );
702 my $check = [ $reading ];
706 foreach my $r ( @$check ) {
707 foreach my $nr ( $self->graph->neighbors( $r ) ) {
708 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
709 push( @$more, $nr ) unless exists $found{$nr};
716 delete $found{$reading};
717 @answer = keys %found;
719 @answer = $self->graph->all_reachable( $reading );
721 if( $return_object ) {
722 my $c = $self->collation;
723 return map { $c->reading( $_ ) } @answer;
729 =head2 merge_readings( $kept, $deleted );
731 Makes a best-effort merge of the relationship links between the given readings, and
732 stops tracking the to-be-deleted reading.
737 my( $self, $kept, $deleted, $combined ) = @_;
738 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
739 # Get the pair of kept / rel
740 my @vector = ( $kept );
741 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
742 next if $vector[0] eq $vector[1]; # Don't add a self loop
744 # If kept changes its text, drop the relationship.
747 # If kept / rel already has a relationship, just keep the old
748 my $rel = $self->get_relationship( @vector );
751 # Otherwise, adopt the relationship that would be deleted.
752 $rel = $self->get_relationship( @$edge );
753 $self->_set_relationship( $rel, @vector );
755 $self->_make_equivalence( $deleted, $kept, 1 );
758 ### Equivalence logic
760 sub _remove_equivalence_node {
761 my( $self, $node ) = @_;
762 my $group = $self->equivalence( $node );
763 my $nodelist = $self->eqreadings( $group );
764 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
765 $self->equivalence_graph->delete_vertex( $group );
766 $self->remove_eqreadings( $group );
767 $self->remove_equivalence( $group );
768 } elsif( @$nodelist == 1 ) {
769 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
770 " in group that should have only $node" );
772 my @newlist = grep { $_ ne $node } @$nodelist;
773 $self->set_eqreadings( $group, \@newlist );
774 $self->remove_equivalence( $node );
778 =head2 add_equivalence_edge
780 Add an edge in the equivalence graph corresponding to $source -> $target in the
781 collation. Should only be called by Collation.
785 sub add_equivalence_edge {
786 my( $self, $source, $target ) = @_;
787 my $seq = $self->equivalence( $source );
788 my $teq = $self->equivalence( $target );
789 $self->equivalence_graph->add_edge( $seq, $teq );
792 =head2 delete_equivalence_edge
794 Remove an edge in the equivalence graph corresponding to $source -> $target in the
795 collation. Should only be called by Collation.
799 sub delete_equivalence_edge {
800 my( $self, $source, $target ) = @_;
801 my $seq = $self->equivalence( $source );
802 my $teq = $self->equivalence( $target );
803 $self->equivalence_graph->delete_edge( $seq, $teq );
806 sub _is_disconnected {
808 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
809 || scalar $self->equivalence_graph->successorless_vertices > 1 );
812 # Equate two readings in the equivalence graph
813 sub _make_equivalence {
814 my( $self, $source, $target, $removing ) = @_;
815 # Get the source equivalent readings
816 my $seq = $self->equivalence( $source );
817 my $teq = $self->equivalence( $target );
818 # Nothing to do if they are already equivalent...
819 return if $seq eq $teq;
820 # Get the readings equivalent to source
821 my @sourcepool = @{$self->eqreadings( $seq )};
822 # If we are removing the source reading entirely, don't push
823 # it into the target pool.
824 @sourcepool = grep { $_ ne $seq } @sourcepool if $removing;
825 # and add them to the target readings.
826 push( @{$self->eqreadings( $teq )}, @sourcepool );
827 map { $self->set_equivalence( $_, $teq ) } @sourcepool;
828 # Then merge the nodes in the equivalence graph.
829 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
830 $self->equivalence_graph->add_edge( $pred, $teq )
831 unless $teq eq $pred;
833 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
834 $self->equivalence_graph->add_edge( $teq, $succ )
835 unless $teq eq $succ;
837 $self->equivalence_graph->delete_vertex( $seq );
838 # TODO enable this after collation parsing is done
839 throw( "Graph got disconnected making $source / $target equivalence" )
840 if $self->_is_disconnected && $self->collation->tradition->_initialized;
843 =head2 test_equivalence
845 Test whether, if two readings were equated with a 'colocated' relationship,
846 the graph would still be valid.
850 sub test_equivalence {
851 my( $self, $source, $target ) = @_;
852 # Try merging the nodes in the equivalence graph; return a true value if
853 # no cycle is introduced thereby. Restore the original graph first.
855 # Keep track of edges we add
858 # Get the reading equivalents
859 my $seq = $self->equivalence( $source );
860 my $teq = $self->equivalence( $target );
861 # Maybe this is easy?
862 return 1 if $seq eq $teq;
864 # Save the first graph
865 my $checkstr = $self->equivalence_graph->stringify();
866 # Add and save relevant edges
867 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
868 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
869 $added_pred{$pred} = 0;
871 $self->equivalence_graph->add_edge( $pred, $teq );
872 $added_pred{$pred} = 1;
875 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
876 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
877 $added_succ{$succ} = 0;
879 $self->equivalence_graph->add_edge( $teq, $succ );
880 $added_succ{$succ} = 1;
883 # Delete source equivalent and test
884 $self->equivalence_graph->delete_vertex( $seq );
885 my $ret = !$self->equivalence_graph->has_a_cycle;
887 # Restore what we changed
888 $self->equivalence_graph->add_vertex( $seq );
889 foreach my $pred ( keys %added_pred ) {
890 $self->equivalence_graph->add_edge( $pred, $seq );
891 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
893 foreach my $succ ( keys %added_succ ) {
894 $self->equivalence_graph->add_edge( $seq, $succ );
895 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
897 unless( $self->equivalence_graph->eq( $checkstr ) ) {
898 warn "GRAPH CHANGED after testing";
904 # Unmake an equivalence link between two readings. Should only be called internally.
905 sub _break_equivalence {
906 my( $self, $source, $target ) = @_;
908 # This is the hard one. Need to reconstruct the equivalence groups without
911 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
912 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
913 # If these groups intersect, they are still connected; do nothing.
914 foreach my $el ( keys %tng ) {
915 return if( exists $sng{$el} );
917 # If they don't intersect, then we split the nodes in the graph and in
918 # the hashes. First figure out which group has which name
919 my $oldgroup = $self->equivalence( $source ); # same as $target
920 my $keepsource = $sng{$oldgroup};
921 my $newgroup = $keepsource ? $target : $source;
922 my( $oldmembers, $newmembers );
924 $oldmembers = [ keys %sng ];
925 $newmembers = [ keys %tng ];
927 $oldmembers = [ keys %tng ];
928 $newmembers = [ keys %sng ];
931 # First alter the old group in the hash
932 $self->set_eqreadings( $oldgroup, $oldmembers );
933 foreach my $el ( @$oldmembers ) {
934 $self->set_equivalence( $el, $oldgroup );
937 # then add the new group back to the hash with its new key
938 $self->set_eqreadings( $newgroup, $newmembers );
939 foreach my $el ( @$newmembers ) {
940 $self->set_equivalence( $el, $newgroup );
943 # Now add the new group back to the equivalence graph
944 $self->equivalence_graph->add_vertex( $newgroup );
945 # ...add the appropriate edges to the source group vertext
946 my $c = $self->collation;
947 foreach my $rdg ( @$newmembers ) {
948 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
949 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
951 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
952 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
956 # ...and figure out which edges on the old group vertex to delete.
957 my( %old_pred, %old_succ );
958 foreach my $rdg ( @$oldmembers ) {
959 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
960 $old_pred{$self->equivalence( $rp )} = 1;
962 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
963 $old_succ{$self->equivalence( $rs )} = 1;
966 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
967 unless( $old_pred{$p} ) {
968 $self->equivalence_graph->delete_edge( $p, $oldgroup );
971 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
972 unless( $old_succ{$s} ) {
973 $self->equivalence_graph->delete_edge( $oldgroup, $s );
976 # TODO enable this after collation parsing is done
977 throw( "Graph got disconnected breaking $source / $target equivalence" )
978 if $self->_is_disconnected && $self->collation->tradition->_initialized;
981 sub _find_equiv_without {
982 my( $self, $first, $second ) = @_;
983 my %found = ( $first => 1 );
984 my $check = [ $first ];
988 foreach my $r ( @$check ) {
989 foreach my $nr ( $self->graph->neighbors( $r ) ) {
990 next if $r eq $second;
991 if( $self->get_relationship( $r, $nr )->colocated ) {
992 push( @$more, $nr ) unless exists $found{$nr};
1002 =head2 rebuild_equivalence
1004 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1005 adds all readings and edges, then makes an equivalence for all relationships.
1009 sub rebuild_equivalence {
1011 my $newgraph = Graph->new();
1012 # Set this as the new equivalence graph
1013 $self->_reset_equivalence( $newgraph );
1014 # Clear out the data hashes
1015 $self->_clear_equivalence;
1016 $self->_clear_eqreadings;
1019 foreach my $r ( $self->collation->readings ) {
1021 $newgraph->add_vertex( $rid );
1022 $self->set_equivalence( $rid, $rid );
1023 $self->set_eqreadings( $rid, [ $rid ] );
1027 foreach my $e ( $self->collation->paths ) {
1028 $self->add_equivalence_edge( @$e );
1031 # Now equate the colocated readings. This does no testing;
1032 # it assumes that all preexisting relationships are valid.
1033 foreach my $rel ( $self->relationships ) {
1034 my $relobj = $self->get_relationship( $rel );
1035 next unless $relobj && $relobj->colocated;
1036 $self->_make_equivalence( @$rel );
1043 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1045 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1046 $rgraph->setAttribute( 'edgedefault', 'directed' );
1047 $rgraph->setAttribute( 'id', 'relationships', );
1048 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1049 $rgraph->setAttribute( 'parse.edges', 0 );
1050 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1051 $rgraph->setAttribute( 'parse.nodes', 0 );
1052 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1054 # Add the vertices according to their XML IDs
1055 my %rdg_lookup = ( reverse %$node_hash );
1056 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1057 my @nlist = sort keys( %rdg_lookup );
1058 foreach my $n ( @nlist ) {
1059 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1060 $n_el->setAttribute( 'id', $n );
1061 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1063 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1065 # Add the relationship edges, with their object information
1067 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1068 # Add an edge and fill in its relationship info.
1069 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1070 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1071 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1072 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1073 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1075 my $rel_obj = $self->get_relationship( @$e );
1076 foreach my $key ( keys %$edge_keys ) {
1077 my $value = $rel_obj->$key;
1078 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1082 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1090 return $tmp_a <=> $tmp_b;
1093 sub _add_graphml_data {
1094 my( $el, $key, $value ) = @_;
1095 return unless defined $value;
1096 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1097 $data_el->setAttribute( 'key', $key );
1098 $data_el->appendText( $value );
1102 Text::Tradition::Error->throw(
1103 'ident' => 'Relationship error',
1109 __PACKAGE__->meta->make_immutable;