1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 _clear_equivalence => 'clear',
114 has '_equivalence_readings' => (
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
121 _clear_eqreadings => 'clear',
125 around add_reading => sub {
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
135 around delete_reading => sub {
139 $self->_remove_equivalence_node( @_ );
143 =head2 get_relationship
145 Return the relationship object, if any, that exists between two readings.
149 sub get_relationship {
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
163 return $relationship;
166 sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
170 $self->_make_equivalence( @vector ) if $relationship->colocated;
175 Create a new relationship with the given options and return it.
176 Warn and return undef if the relationship cannot be created.
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
185 my $rel = $self->get_relationship( $source, $target );
187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
199 # Check to see if a nonlocal relationship is defined for the two readings
200 $rel = $self->scoped_relationship( $options->{'reading_a'},
201 $options->{'reading_b'} );
202 if( $rel && $rel->type eq $options->{'type'} ) {
205 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
207 $rel = Text::Tradition::Collation::Relationship->new( $options );
208 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
213 =head2 add_scoped_relationship( $rel )
215 Keep track of relationships defined between specific readings that are scoped
216 non-locally. Key on whichever reading occurs first alphabetically.
220 sub add_scoped_relationship {
221 my( $self, $rel ) = @_;
222 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
223 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
224 my $r = $self->scoped_relationship( $rdga, $rdgb );
226 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
227 $r->type, $rdga, $rdgb );
230 my( $first, $second ) = sort ( $rdga, $rdgb );
231 $self->scopedrels->{$first}->{$second} = $rel;
234 =head2 scoped_relationship( $reading_a, $reading_b )
236 Returns the general (document-level or global) relationship that has been defined
237 between the two reading strings. Returns undef if there is no general relationship.
241 sub scoped_relationship {
242 my( $self, $rdga, $rdgb ) = @_;
243 my( $first, $second ) = sort( $rdga, $rdgb );
244 if( exists $self->scopedrels->{$first}->{$second} ) {
245 return $self->scopedrels->{$first}->{$second};
251 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
253 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
254 for the possible options) between the readings given in $source and $target. Sets
255 up a scoped relationship between $sourcetext and $targettext if the relationship is
258 Returns a status boolean and a list of all reading pairs connected by the call to
269 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
270 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
271 "Got expected relationship drop warning on parse";
273 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
274 ok( $t1, "Parsed test fragment file" );
275 my $c1 = $t1->collation;
276 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
277 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
278 "Troublesome relationship exists" );
279 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
281 # Try to make the link we want
283 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
284 ok( 1, "Added cross-collation relationship as expected" );
285 } catch( Text::Tradition::Error $e ) {
286 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
290 $c1->calculate_ranks();
291 ok( 1, "Successfully calculated ranks" );
292 } catch ( Text::Tradition::Error $e ) {
293 ok( 0, "Collation now has a cycle: " . $e->message );
296 # Test 1.2: attempt merge of an identical reading
298 $c1->merge_readings( 'r9.3', 'r11.5' );
299 ok( 1, "Successfully merged reading 'pontifex'" );
300 } catch ( Text::Tradition::Error $e ) {
301 ok( 0, "Merge of mergeable readings failed: $e->message" );
305 # Test 1.3: attempt relationship with a meta reading (should fail)
307 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
308 ok( 0, "Allowed a meta-reading to be used in a relationship" );
309 } catch ( Text::Tradition::Error $e ) {
310 is( $e->message, 'Cannot set relationship on a meta reading',
311 "Relationship link prevented for a meta reading" );
314 # Test 2.1: try to equate nodes that are prevented with a real intermediate
318 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
319 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
320 "Got expected relationship drop warning on parse";
321 my $c2 = $t2->collation;
322 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
323 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
324 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
325 "Created blocking relationship" );
326 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
327 # This time the link ought to fail
329 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
330 ok( 0, "Added cross-equivalent bad relationship" );
331 } catch ( Text::Tradition::Error $e ) {
332 like( $e->message, qr/witness loop/,
333 "Existing equivalence blocked crossing relationship" );
337 $c2->calculate_ranks();
338 ok( 1, "Successfully calculated ranks" );
339 } catch ( Text::Tradition::Error $e ) {
340 ok( 0, "Collation now has a cycle: " . $e->message );
343 # Test 3.1: make a straightforward pair of transpositions.
344 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
345 # Test 1: try to equate nodes that are prevented with an intermediate collation
346 my $c3 = $t3->collation;
348 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
349 ok( 1, "Added straightforward transposition" );
350 } catch ( Text::Tradition::Error $e ) {
351 ok( 0, "Failed to add normal transposition: " . $e->message );
354 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
355 ok( 1, "Added straightforward transposition complement" );
356 } catch ( Text::Tradition::Error $e ) {
357 ok( 0, "Failed to add normal transposition complement: " . $e->message );
360 # Test 3.2: try to make a transposition that could be a parallel.
362 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
363 ok( 0, "Added bad colocated transposition" );
364 } catch ( Text::Tradition::Error $e ) {
365 like( $e->message, qr/Readings appear to be colocated/,
366 "Prevented bad colocated transposition" );
369 # Test 3.3: make the parallel, and then make the transposition again.
371 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
372 ok( 1, "Equated identical readings for transposition" );
373 } catch ( Text::Tradition::Error $e ) {
374 ok( 0, "Failed to equate identical readings: " . $e->message );
377 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
378 ok( 1, "Added straightforward transposition complement" );
379 } catch ( Text::Tradition::Error $e ) {
380 ok( 0, "Failed to add normal transposition complement: " . $e->message );
387 sub add_relationship {
388 my( $self, $source, $target, $options ) = @_;
389 my $c = $self->collation;
390 my $sourceobj = $c->reading( $source );
391 my $targetobj = $c->reading( $target );
392 throw( "Adding self relationship at $source" ) if $source eq $target;
393 throw( "Cannot set relationship on a meta reading" )
394 if( $sourceobj->is_meta || $targetobj->is_meta );
397 my $droppedcolls = [];
398 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
399 $relationship = $options;
400 $thispaironly = 1; # If existing rel, set only where asked.
403 $options->{'scope'} = 'local' unless $options->{'scope'};
404 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
405 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
407 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
408 $options->{'type'}, $droppedcolls );
409 unless( $is_valid ) {
410 throw( "Invalid relationship: $reason" );
413 # Try to create the relationship object.
414 $options->{'reading_a'} = $sourceobj->text;
415 $options->{'reading_b'} = $targetobj->text;
416 $options->{'orig_a'} = $source;
417 $options->{'orig_b'} = $target;
418 if( $options->{'scope'} ne 'local' ) {
419 # Is there a relationship with this a & b already?
420 # Case-insensitive for non-orthographics.
421 my $rdga = $options->{'type'} eq 'orthographic'
422 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
423 my $rdgb = $options->{'type'} eq 'orthographic'
424 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
425 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
426 if( $otherrel && $otherrel->type eq $options->{type}
427 && $otherrel->scope eq $options->{scope} ) {
428 warn "Applying existing scoped relationship";
429 $relationship = $otherrel;
432 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
436 # Find all the pairs for which we need to set the relationship.
438 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
439 push( @vectors, $self->_find_applicable( $relationship ) );
442 # Now set the relationship(s).
444 my $rel = $self->get_relationship( $source, $target );
446 if( $rel && $rel ne $relationship ) {
447 if( $rel->nonlocal ) {
448 throw( "Found conflicting relationship at $source - $target" );
449 } elsif( $rel->type ne 'collated' ) {
450 # Replace a collation relationship; leave any other sort in place.
451 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
452 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
453 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
454 warn sprintf( "Not overriding local relationship %s with global %s "
455 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
456 $source, $target, $rel->reading_a, $rel->reading_b );
461 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
462 push( @pairs_set, [ $source, $target ] );
464 # Set any additional relationships that might be in @vectors.
465 foreach my $v ( @vectors ) {
466 next if $v->[0] eq $source && $v->[1] eq $target;
467 next if $v->[1] eq $source && $v->[0] eq $target;
468 my @added = $self->add_relationship( @$v, $relationship );
469 push( @pairs_set, @added );
472 # Finally, restore whatever collations we can, and return.
473 $self->_restore_collations( @$droppedcolls );
477 =head2 del_scoped_relationship( $reading_a, $reading_b )
479 Returns the general (document-level or global) relationship that has been defined
480 between the two reading strings. Returns undef if there is no general relationship.
484 sub del_scoped_relationship {
485 my( $self, $rdga, $rdgb ) = @_;
486 my( $first, $second ) = sort( $rdga, $rdgb );
487 return delete $self->scopedrels->{$first}->{$second};
490 sub _find_applicable {
491 my( $self, $rel ) = @_;
492 my $c = $self->collation;
493 # TODO Someday we might use a case sensitive language.
494 my $lang = $c->tradition->language;
496 my @identical_readings;
497 if( $rel->type eq 'orthographic' ) {
498 @identical_readings = grep { $_->text eq $rel->reading_a }
501 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
504 foreach my $ir ( @identical_readings ) {
506 if( $rel->type eq 'orthographic' ) {
507 @itarget = grep { $_->rank == $ir->rank
508 && $_->text eq $rel->reading_b } $c->readings;
510 @itarget = grep { $_->rank == $ir->rank
511 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
514 # Warn if there is more than one hit with no orth link between them.
515 my $itmain = shift @itarget;
518 map { $all_targets{$_} = 1 } @itarget;
519 map { delete $all_targets{$_} }
520 $self->related_readings( $itmain,
521 sub { $_[0]->type eq 'orthographic' } );
522 warn "More than one unrelated reading with text " . $itmain->text
523 . " at rank " . $ir->rank . "!" if keys %all_targets;
525 push( @vectors, [ $ir->id, $itmain->id ] );
531 =head2 del_relationship( $source, $target )
533 Removes the relationship between the given readings. If the relationship is
534 non-local, removes the relationship everywhere in the graph.
538 sub del_relationship {
539 my( $self, $source, $target ) = @_;
540 my $rel = $self->get_relationship( $source, $target );
541 return () unless $rel; # Nothing to delete; return an empty set.
542 my $colo = $rel->colocated;
543 my @vectors = ( [ $source, $target ] );
544 $self->_remove_relationship( $colo, $source, $target );
545 if( $rel->nonlocal ) {
546 # Remove the relationship wherever it occurs.
547 # Remove the relationship wherever it occurs.
548 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
549 $self->relationships;
550 foreach my $re ( @rel_edges ) {
551 $self->_remove_relationship( $colo, @$re );
552 push( @vectors, $re );
554 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
559 sub _remove_relationship {
560 my( $self, $equiv, @vector ) = @_;
561 $self->graph->delete_edge( @vector );
562 $self->_break_equivalence( @vector ) if $equiv;
565 =head2 relationship_valid( $source, $target, $type )
567 Checks whether a relationship of type $type may exist between the readings given
568 in $source and $target. Returns a tuple of ( status, message ) where status is
569 a yes/no boolean and, if the answer is no, message gives the reason why.
573 sub relationship_valid {
574 my( $self, $source, $target, $rel, $mustdrop ) = @_;
575 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
576 my $c = $self->collation;
577 ## Assume validity is okay if we are initializing from scratch.
578 return ( 1, "initializing" ) unless $c->tradition->_initialized;
579 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
580 # Check that the two readings do (for a repetition) or do not (for
581 # a transposition) appear in the same witness.
582 # TODO this might be called before witness paths are set...
584 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
585 foreach my $w ( $c->reading_witnesses( $target ) ) {
586 if( $seen_wits{$w} ) {
587 return ( 0, "Readings both occur in witness $w" )
588 if $rel eq 'transposition';
589 return ( 1, "ok" ) if $rel eq 'repetition';
592 return ( 0, "Readings occur only in distinct witnesses" )
593 if $rel eq 'repetition';
595 if ( $rel eq 'transposition' ) {
596 # We also need to check both that the readings occur in distinct
597 # witnesses, and that they are not in the same place. That is,
598 # proposing to link them should cause a witness loop.
599 if( $self->test_equivalence( $source, $target ) ) {
600 return ( 0, "Readings appear to be colocated, not transposed" );
605 } elsif( $rel ne 'repetition' ) {
606 # Check that linking the source and target in a relationship won't lead
607 # to a path loop for any witness.
608 # First, drop/stash any collations that might interfere
609 my $sourceobj = $c->reading( $source );
610 my $targetobj = $c->reading( $target );
611 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
612 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
613 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
614 push( @$mustdrop, $self->_drop_collations( $source ) );
615 push( @$mustdrop, $self->_drop_collations( $target ) );
616 if( $c->end->has_rank ) {
617 foreach my $rk ( $sourcerank .. $targetrank ) {
618 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
619 $c->readings_at_rank( $rk );
623 unless( $self->test_equivalence( $source, $target ) ) {
624 $self->_restore_collations( @$mustdrop );
625 return( 0, "Relationship would create witness loop" );
631 sub _drop_collations {
632 my( $self, $reading ) = @_;
634 foreach my $n ( $self->graph->neighbors( $reading ) ) {
635 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
636 push( @dropped, [ $reading, $n ] );
637 $self->del_relationship( $reading, $n );
638 #print STDERR "Dropped collation $reading -> $n\n";
644 sub _restore_collations {
645 my( $self, @vectors ) = @_;
646 foreach my $v ( @vectors ) {
648 $self->add_relationship( @$v, { 'type' => 'collated' } );
649 #print STDERR "Restored collation @$v\n";
651 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
656 =head2 filter_collations()
658 Utility function. Removes any redundant 'collated' relationships from the graph.
659 A collated relationship is redundant if the readings in question would occupy
660 the same rank regardless of the existence of the relationship.
664 sub filter_collations {
666 my $c = $self->collation;
667 foreach my $r ( 1 .. $c->end->rank - 1 ) {
670 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
671 next if $rdg->is_meta;
673 foreach my $pred ( $rdg->predecessors ) {
674 if( $pred->rank == $r - 1 ) {
676 $anchor = $rdg unless( $anchor );
680 push( @need_collations, $rdg ) unless $ip;
681 $c->relations->_drop_collations( "$rdg" );
684 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
685 unless $c->get_relationship( $anchor, $_ ) } @need_collations
686 : warn "No anchor found at $r";
690 =head2 related_readings( $reading, $filter )
692 Returns a list of readings that are connected via relationship links to $reading.
693 If $filter is set to a subroutine ref, returns only those related readings where
694 $filter( $relationship ) returns a true value.
698 sub related_readings {
699 my( $self, $reading, $filter ) = @_;
701 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
702 $reading = $reading->id;
708 if( $filter eq 'colocated' ) {
709 $filter = sub { $_[0]->colocated };
711 my %found = ( $reading => 1 );
712 my $check = [ $reading ];
716 foreach my $r ( @$check ) {
717 foreach my $nr ( $self->graph->neighbors( $r ) ) {
718 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
719 push( @$more, $nr ) unless exists $found{$nr};
726 delete $found{$reading};
727 @answer = keys %found;
729 @answer = $self->graph->all_reachable( $reading );
731 if( $return_object ) {
732 my $c = $self->collation;
733 return map { $c->reading( $_ ) } @answer;
739 =head2 merge_readings( $kept, $deleted );
741 Makes a best-effort merge of the relationship links between the given readings, and
742 stops tracking the to-be-deleted reading.
747 my( $self, $kept, $deleted, $combined ) = @_;
748 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
749 # Get the pair of kept / rel
750 my @vector = ( $kept );
751 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
752 next if $vector[0] eq $vector[1]; # Don't add a self loop
754 # If kept changes its text, drop the relationship.
757 # If kept / rel already has a relationship, just keep the old
758 my $rel = $self->get_relationship( @vector );
761 # Otherwise, adopt the relationship that would be deleted.
762 $rel = $self->get_relationship( @$edge );
763 $self->_set_relationship( $rel, @vector );
765 $self->_make_equivalence( $deleted, $kept );
768 ### Equivalence logic
770 sub _remove_equivalence_node {
771 my( $self, $node ) = @_;
772 my $group = $self->equivalence( $node );
773 my $nodelist = $self->eqreadings( $group );
774 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
775 $self->equivalence_graph->delete_vertex( $group );
776 $self->remove_eqreadings( $group );
777 $self->remove_equivalence( $group );
778 } elsif( @$nodelist == 1 ) {
779 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
780 " in group that should have only $node" );
782 my @newlist = grep { $_ ne $node } @$nodelist;
783 $self->set_eqreadings( $group, \@newlist );
784 $self->remove_equivalence( $node );
788 =head2 add_equivalence_edge
790 Add an edge in the equivalence graph corresponding to $source -> $target in the
791 collation. Should only be called by Collation.
795 sub add_equivalence_edge {
796 my( $self, $source, $target ) = @_;
797 my $seq = $self->equivalence( $source );
798 my $teq = $self->equivalence( $target );
799 return unless $seq && $teq;
800 $self->equivalence_graph->add_edge( $seq, $teq );
803 =head2 delete_equivalence_edge
805 Remove an edge in the equivalence graph corresponding to $source -> $target in the
806 collation. Should only be called by Collation.
810 sub delete_equivalence_edge {
811 my( $self, $source, $target ) = @_;
812 my $seq = $self->equivalence( $source );
813 my $teq = $self->equivalence( $target );
814 $self->equivalence_graph->delete_edge( $seq, $teq );
817 sub _is_disconnected {
819 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
820 || scalar $self->equivalence_graph->successorless_vertices > 1 );
823 # Equate two readings in the equivalence graph
824 sub _make_equivalence {
825 my( $self, $source, $target ) = @_;
826 # Get the source equivalent readings
827 my $seq = $self->equivalence( $source );
828 my $teq = $self->equivalence( $target );
829 # Nothing to do if they are already equivalent...
830 return if $seq eq $teq;
831 my $sourcepool = $self->eqreadings( $seq );
832 # and add them to the target readings.
833 push( @{$self->eqreadings( $teq )}, @$sourcepool );
834 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
835 # Then merge the nodes in the equivalence graph.
836 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
837 $self->equivalence_graph->add_edge( $pred, $teq );
839 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
840 $self->equivalence_graph->add_edge( $teq, $succ );
842 $self->equivalence_graph->delete_vertex( $seq );
843 # TODO enable this after collation parsing is done
844 throw( "Graph got disconnected making $source / $target equivalence" )
845 if $self->_is_disconnected && $self->collation->tradition->_initialized;
848 =head2 test_equivalence
850 Test whether, if two readings were equated with a 'colocated' relationship,
851 the graph would still be valid.
855 sub test_equivalence {
856 my( $self, $source, $target ) = @_;
857 # Try merging the nodes in the equivalence graph; return a true value if
858 # no cycle is introduced thereby. Restore the original graph first.
860 # Keep track of edges we add
863 # Get the reading equivalents
864 my $seq = $self->equivalence( $source );
865 my $teq = $self->equivalence( $target );
866 # Maybe this is easy?
867 return 1 if $seq eq $teq;
869 # Save the first graph
870 my $checkstr = $self->equivalence_graph->stringify();
871 # Add and save relevant edges
872 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
873 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
874 $added_pred{$pred} = 0;
876 $self->equivalence_graph->add_edge( $pred, $teq );
877 $added_pred{$pred} = 1;
880 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
881 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
882 $added_succ{$succ} = 0;
884 $self->equivalence_graph->add_edge( $teq, $succ );
885 $added_succ{$succ} = 1;
888 # Delete source equivalent and test
889 $self->equivalence_graph->delete_vertex( $seq );
890 my $ret = !$self->equivalence_graph->has_a_cycle;
892 # Restore what we changed
893 $self->equivalence_graph->add_vertex( $seq );
894 foreach my $pred ( keys %added_pred ) {
895 $self->equivalence_graph->add_edge( $pred, $seq );
896 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
898 foreach my $succ ( keys %added_succ ) {
899 $self->equivalence_graph->add_edge( $seq, $succ );
900 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
902 unless( $self->equivalence_graph->eq( $checkstr ) ) {
903 warn "GRAPH CHANGED after testing";
909 # Unmake an equivalence link between two readings. Should only be called internally.
910 sub _break_equivalence {
911 my( $self, $source, $target ) = @_;
913 # This is the hard one. Need to reconstruct the equivalence groups without
916 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
917 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
918 # If these groups intersect, they are still connected; do nothing.
919 foreach my $el ( keys %tng ) {
920 return if( exists $sng{$el} );
922 # If they don't intersect, then we split the nodes in the graph and in
923 # the hashes. First figure out which group has which name
924 my $oldgroup = $self->equivalence( $source ); # same as $target
925 my $keepsource = $sng{$oldgroup};
926 my $newgroup = $keepsource ? $target : $source;
927 my( $oldmembers, $newmembers );
929 $oldmembers = [ keys %sng ];
930 $newmembers = [ keys %tng ];
932 $oldmembers = [ keys %tng ];
933 $newmembers = [ keys %sng ];
936 # First alter the old group in the hash
937 $self->set_eqreadings( $oldgroup, $oldmembers );
938 foreach my $el ( @$oldmembers ) {
939 $self->set_equivalence( $el, $oldgroup );
942 # then add the new group back to the hash with its new key
943 $self->set_eqreadings( $newgroup, $newmembers );
944 foreach my $el ( @$newmembers ) {
945 $self->set_equivalence( $el, $newgroup );
948 # Now add the new group back to the equivalence graph
949 $self->equivalence_graph->add_vertex( $newgroup );
950 # ...add the appropriate edges to the source group vertext
951 my $c = $self->collation;
952 foreach my $rdg ( @$newmembers ) {
953 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
954 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
956 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
957 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
961 # ...and figure out which edges on the old group vertex to delete.
962 my( %old_pred, %old_succ );
963 foreach my $rdg ( @$oldmembers ) {
964 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
965 $old_pred{$self->equivalence( $rp )} = 1;
967 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
968 $old_succ{$self->equivalence( $rs )} = 1;
971 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
972 unless( $old_pred{$p} ) {
973 $self->equivalence_graph->delete_edge( $p, $oldgroup );
976 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
977 unless( $old_succ{$s} ) {
978 $self->equivalence_graph->delete_edge( $oldgroup, $s );
981 # TODO enable this after collation parsing is done
982 throw( "Graph got disconnected breaking $source / $target equivalence" )
983 if $self->_is_disconnected && $self->collation->tradition->_initialized;
986 sub _find_equiv_without {
987 my( $self, $first, $second ) = @_;
988 my %found = ( $first => 1 );
989 my $check = [ $first ];
993 foreach my $r ( @$check ) {
994 foreach my $nr ( $self->graph->neighbors( $r ) ) {
995 next if $r eq $second;
996 if( $self->get_relationship( $r, $nr )->colocated ) {
997 push( @$more, $nr ) unless exists $found{$nr};
1007 =head2 rebuild_equivalence
1009 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1010 adds all readings and edges, then makes an equivalence for all relationships.
1014 sub rebuild_equivalence {
1016 my $newgraph = Graph->new();
1017 # Set this as the new equivalence graph
1018 $self->_reset_equivalence( $newgraph );
1019 # Clear out the data hashes
1020 $self->_clear_equivalence;
1021 $self->_clear_eqreadings;
1024 foreach my $r ( $self->collation->readings ) {
1026 $newgraph->add_vertex( $rid );
1027 $self->set_equivalence( $rid, $rid );
1028 $self->set_eqreadings( $rid, [ $rid ] );
1032 foreach my $e ( $self->collation->paths ) {
1033 $self->add_equivalence_edge( @$e );
1036 # Now equate the colocated readings. This does no testing;
1037 # it assumes that all preexisting relationships are valid.
1038 foreach my $rel ( $self->relationships ) {
1039 my $relobj = $self->get_relationship( $rel );
1040 next unless $relobj && $relobj->colocated;
1041 $self->_make_equivalence( @$rel );
1045 =head2 equivalence_ranks
1047 Rank all vertices in the equivalence graph, and return a hash reference with
1048 vertex => rank mapping.
1052 sub equivalence_ranks {
1054 my $eqstart = $self->equivalence( $self->collation->start );
1055 my $eqranks = { $eqstart => 0 };
1056 my $rankeqs = { 0 => [ $eqstart ] };
1057 my @curr_origin = ( $eqstart );
1058 # A little iterative function.
1059 while( @curr_origin ) {
1060 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1062 return( $eqranks, $rankeqs );
1066 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1067 my $graph = $self->equivalence_graph;
1068 # Look at each of the children of @current_nodes. If all the child's
1069 # parents have a rank, assign it the highest rank + 1 and add it to
1070 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1071 # parent gets a rank.
1073 foreach my $c ( @current_nodes ) {
1074 warn "Current reading $c has no rank!"
1075 unless exists $node_ranks->{$c};
1076 foreach my $child ( $graph->successors( $c ) ) {
1077 next if exists $node_ranks->{$child};
1078 my $highest_rank = -1;
1080 foreach my $parent ( $graph->predecessors( $child ) ) {
1081 if( exists $node_ranks->{$parent} ) {
1082 $highest_rank = $node_ranks->{$parent}
1083 if $highest_rank <= $node_ranks->{$parent};
1090 my $c_rank = $highest_rank + 1;
1091 # print STDERR "Assigning rank $c_rank to node $child \n";
1092 $node_ranks->{$child} = $c_rank if $node_ranks;
1093 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1094 push( @next_nodes, $child );
1103 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1105 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1106 $rgraph->setAttribute( 'edgedefault', 'directed' );
1107 $rgraph->setAttribute( 'id', 'relationships', );
1108 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1109 $rgraph->setAttribute( 'parse.edges', 0 );
1110 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1111 $rgraph->setAttribute( 'parse.nodes', 0 );
1112 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1114 # Add the vertices according to their XML IDs
1115 my %rdg_lookup = ( reverse %$node_hash );
1116 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1117 my @nlist = sort keys( %rdg_lookup );
1118 foreach my $n ( @nlist ) {
1119 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1120 $n_el->setAttribute( 'id', $n );
1121 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1123 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1125 # Add the relationship edges, with their object information
1127 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1128 # Add an edge and fill in its relationship info.
1129 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1130 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1131 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1132 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1133 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1135 my $rel_obj = $self->get_relationship( @$e );
1136 foreach my $key ( keys %$edge_keys ) {
1137 my $value = $rel_obj->$key;
1138 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1142 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1150 return $tmp_a <=> $tmp_b;
1153 sub _add_graphml_data {
1154 my( $el, $key, $value ) = @_;
1155 return unless defined $value;
1156 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1157 $data_el->setAttribute( 'key', $key );
1158 $data_el->appendText( $value );
1162 Text::Tradition::Error->throw(
1163 'ident' => 'Relationship error',
1169 __PACKAGE__->meta->make_immutable;