1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 _clear_equivalence => 'clear',
114 has '_equivalence_readings' => (
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
121 _clear_eqreadings => 'clear',
125 around add_reading => sub {
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
135 around delete_reading => sub {
139 $self->_remove_equivalence_node( @_ );
143 =head2 get_relationship
145 Return the relationship object, if any, that exists between two readings.
149 sub get_relationship {
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
163 return $relationship;
166 sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
170 $self->_make_equivalence( @vector ) if $relationship->colocated;
175 Create a new relationship with the given options and return it.
176 Warn and return undef if the relationship cannot be created.
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
185 my $rel = $self->get_relationship( $source, $target );
187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
199 $rel = Text::Tradition::Collation::Relationship->new( $options );
200 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
204 =head2 add_scoped_relationship( $rel )
206 Keep track of relationships defined between specific readings that are scoped
207 non-locally. Key on whichever reading occurs first alphabetically.
211 sub add_scoped_relationship {
212 my( $self, $rel ) = @_;
213 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
214 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
215 my $r = $self->scoped_relationship( $rdga, $rdgb );
217 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
218 $r->type, $rdga, $rdgb );
221 my( $first, $second ) = sort ( $rdga, $rdgb );
222 $self->scopedrels->{$first}->{$second} = $rel;
225 =head2 scoped_relationship( $reading_a, $reading_b )
227 Returns the general (document-level or global) relationship that has been defined
228 between the two reading strings. Returns undef if there is no general relationship.
232 sub scoped_relationship {
233 my( $self, $rdga, $rdgb ) = @_;
234 my( $first, $second ) = sort( $rdga, $rdgb );
235 if( exists $self->scopedrels->{$first}->{$second} ) {
236 return $self->scopedrels->{$first}->{$second};
242 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
244 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
245 for the possible options) between the readings given in $source and $target. Sets
246 up a scoped relationship between $sourcetext and $targettext if the relationship is
249 Returns a status boolean and a list of all reading pairs connected by the call to
260 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
261 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
262 "Got expected relationship drop warning on parse";
264 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
265 ok( $t1, "Parsed test fragment file" );
266 my $c1 = $t1->collation;
267 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
268 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
269 "Troublesome relationship exists" );
270 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
272 # Try to make the link we want
274 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
275 ok( 1, "Added cross-collation relationship as expected" );
276 } catch( Text::Tradition::Error $e ) {
277 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
281 $c1->calculate_ranks();
282 ok( 1, "Successfully calculated ranks" );
283 } catch ( Text::Tradition::Error $e ) {
284 ok( 0, "Collation now has a cycle: " . $e->message );
287 # Test 1.2: attempt merge of an identical reading
289 $c1->merge_readings( 'r9.3', 'r11.5' );
290 ok( 1, "Successfully merged reading 'pontifex'" );
291 } catch ( Text::Tradition::Error $e ) {
292 ok( 0, "Merge of mergeable readings failed: $e->message" );
296 # Test 1.3: attempt relationship with a meta reading (should fail)
298 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
299 ok( 0, "Allowed a meta-reading to be used in a relationship" );
300 } catch ( Text::Tradition::Error $e ) {
301 is( $e->message, 'Cannot set relationship on a meta reading',
302 "Relationship link prevented for a meta reading" );
305 # Test 1.4: try to break a relationship near a meta reading
306 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
308 $c1->del_relationship( 'r7.6', 'r7.7' );
309 $c1->del_relationship( 'r7.6', 'r7.3' );
310 ok( 1, "Relationship broken with a meta reading as neighbor" );
312 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
315 # Test 2.1: try to equate nodes that are prevented with a real intermediate
319 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
320 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
321 "Got expected relationship drop warning on parse";
322 my $c2 = $t2->collation;
323 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
324 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
325 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
326 "Created blocking relationship" );
327 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
328 # This time the link ought to fail
330 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
331 ok( 0, "Added cross-equivalent bad relationship" );
332 } catch ( Text::Tradition::Error $e ) {
333 like( $e->message, qr/witness loop/,
334 "Existing equivalence blocked crossing relationship" );
338 $c2->calculate_ranks();
339 ok( 1, "Successfully calculated ranks" );
340 } catch ( Text::Tradition::Error $e ) {
341 ok( 0, "Collation now has a cycle: " . $e->message );
344 # Test 3.1: make a straightforward pair of transpositions.
345 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
346 # Test 1: try to equate nodes that are prevented with an intermediate collation
347 my $c3 = $t3->collation;
349 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
350 ok( 1, "Added straightforward transposition" );
351 } catch ( Text::Tradition::Error $e ) {
352 ok( 0, "Failed to add normal transposition: " . $e->message );
355 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
356 ok( 1, "Added straightforward transposition complement" );
357 } catch ( Text::Tradition::Error $e ) {
358 ok( 0, "Failed to add normal transposition complement: " . $e->message );
361 # Test 3.2: try to make a transposition that could be a parallel.
363 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
364 ok( 0, "Added bad colocated transposition" );
365 } catch ( Text::Tradition::Error $e ) {
366 like( $e->message, qr/Readings appear to be colocated/,
367 "Prevented bad colocated transposition" );
370 # Test 3.3: make the parallel, and then make the transposition again.
372 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
373 ok( 1, "Equated identical readings for transposition" );
374 } catch ( Text::Tradition::Error $e ) {
375 ok( 0, "Failed to equate identical readings: " . $e->message );
378 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
379 ok( 1, "Added straightforward transposition complement" );
380 } catch ( Text::Tradition::Error $e ) {
381 ok( 0, "Failed to add normal transposition complement: " . $e->message );
388 sub add_relationship {
389 my( $self, $source, $target, $options ) = @_;
390 my $c = $self->collation;
391 my $sourceobj = $c->reading( $source );
392 my $targetobj = $c->reading( $target );
393 throw( "Adding self relationship at $source" ) if $source eq $target;
394 throw( "Cannot set relationship on a meta reading" )
395 if( $sourceobj->is_meta || $targetobj->is_meta );
398 my $droppedcolls = [];
399 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
400 $relationship = $options;
401 $thispaironly = 1; # If existing rel, set only where asked.
404 $options->{'scope'} = 'local' unless $options->{'scope'};
405 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
406 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
408 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
409 $options->{'type'}, $droppedcolls );
410 unless( $is_valid ) {
411 throw( "Invalid relationship: $reason" );
414 # Try to create the relationship object.
415 $options->{'reading_a'} = $sourceobj->text;
416 $options->{'reading_b'} = $targetobj->text;
417 $options->{'orig_a'} = $source;
418 $options->{'orig_b'} = $target;
419 if( $options->{'scope'} ne 'local' ) {
420 # Is there a relationship with this a & b already?
421 # Case-insensitive for non-orthographics.
422 my $rdga = $options->{'type'} eq 'orthographic'
423 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
424 my $rdgb = $options->{'type'} eq 'orthographic'
425 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
426 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
427 if( $otherrel && $otherrel->type eq $options->{type}
428 && $otherrel->scope eq $options->{scope} ) {
429 warn "Applying existing scoped relationship for $rdga / $rdgb";
430 $relationship = $otherrel;
431 } elsif( $otherrel ) {
432 throw( "Conflicting scoped relationship for $rdga / $rdgb at $source / $target" );
435 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
439 # Find all the pairs for which we need to set the relationship.
441 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
442 push( @vectors, $self->_find_applicable( $relationship ) );
445 # Now set the relationship(s).
447 my $rel = $self->get_relationship( $source, $target );
449 if( $rel && $rel ne $relationship ) {
450 if( $rel->nonlocal ) {
451 throw( "Found conflicting relationship at $source - $target" );
452 } elsif( $rel->type ne 'collated' ) {
453 # Replace a collation relationship; leave any other sort in place.
454 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
455 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
456 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
457 warn sprintf( "Not overriding local relationship %s with global %s "
458 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
459 $source, $target, $rel->reading_a, $rel->reading_b );
464 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
465 push( @pairs_set, [ $source, $target ] );
467 # Set any additional relationships that might be in @vectors.
468 foreach my $v ( @vectors ) {
469 next if $v->[0] eq $source && $v->[1] eq $target;
470 next if $v->[1] eq $source && $v->[0] eq $target;
471 my @added = $self->add_relationship( @$v, $relationship );
472 push( @pairs_set, @added );
475 # Finally, restore whatever collations we can, and return.
476 $self->_restore_collations( @$droppedcolls );
480 =head2 del_scoped_relationship( $reading_a, $reading_b )
482 Returns the general (document-level or global) relationship that has been defined
483 between the two reading strings. Returns undef if there is no general relationship.
487 sub del_scoped_relationship {
488 my( $self, $rdga, $rdgb ) = @_;
489 my( $first, $second ) = sort( $rdga, $rdgb );
490 return delete $self->scopedrels->{$first}->{$second};
493 sub _find_applicable {
494 my( $self, $rel ) = @_;
495 my $c = $self->collation;
496 # TODO Someday we might use a case sensitive language.
497 my $lang = $c->tradition->language;
499 my @identical_readings;
500 if( $rel->type eq 'orthographic' ) {
501 @identical_readings = grep { $_->text eq $rel->reading_a }
504 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
507 foreach my $ir ( @identical_readings ) {
509 if( $rel->type eq 'orthographic' ) {
510 @itarget = grep { $_->rank == $ir->rank
511 && $_->text eq $rel->reading_b } $c->readings;
513 @itarget = grep { $_->rank == $ir->rank
514 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
517 # Warn if there is more than one hit with no orth link between them.
518 my $itmain = shift @itarget;
521 map { $all_targets{$_} = 1 } @itarget;
522 map { delete $all_targets{$_} }
523 $self->related_readings( $itmain,
524 sub { $_[0]->type eq 'orthographic' } );
525 warn "More than one unrelated reading with text " . $itmain->text
526 . " at rank " . $ir->rank . "!" if keys %all_targets;
528 push( @vectors, [ $ir->id, $itmain->id ] );
534 =head2 del_relationship( $source, $target )
536 Removes the relationship between the given readings. If the relationship is
537 non-local, removes the relationship everywhere in the graph.
541 sub del_relationship {
542 my( $self, $source, $target ) = @_;
543 my $rel = $self->get_relationship( $source, $target );
544 return () unless $rel; # Nothing to delete; return an empty set.
545 my $colo = $rel->colocated;
546 my @vectors = ( [ $source, $target ] );
547 $self->_remove_relationship( $colo, $source, $target );
548 if( $rel->nonlocal ) {
549 # Remove the relationship wherever it occurs.
550 # Remove the relationship wherever it occurs.
551 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
552 $self->relationships;
553 foreach my $re ( @rel_edges ) {
554 $self->_remove_relationship( $colo, @$re );
555 push( @vectors, $re );
557 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
562 sub _remove_relationship {
563 my( $self, $equiv, @vector ) = @_;
564 $self->graph->delete_edge( @vector );
565 $self->_break_equivalence( @vector ) if $equiv;
568 =head2 relationship_valid( $source, $target, $type )
570 Checks whether a relationship of type $type may exist between the readings given
571 in $source and $target. Returns a tuple of ( status, message ) where status is
572 a yes/no boolean and, if the answer is no, message gives the reason why.
576 sub relationship_valid {
577 my( $self, $source, $target, $rel, $mustdrop ) = @_;
578 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
579 my $c = $self->collation;
580 ## Assume validity is okay if we are initializing from scratch.
581 return ( 1, "initializing" ) unless $c->tradition->_initialized;
582 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
583 # Check that the two readings do (for a repetition) or do not (for
584 # a transposition) appear in the same witness.
585 # TODO this might be called before witness paths are set...
587 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
588 foreach my $w ( $c->reading_witnesses( $target ) ) {
589 if( $seen_wits{$w} ) {
590 return ( 0, "Readings both occur in witness $w" )
591 if $rel eq 'transposition';
592 return ( 1, "ok" ) if $rel eq 'repetition';
595 return ( 0, "Readings occur only in distinct witnesses" )
596 if $rel eq 'repetition';
598 if ( $rel eq 'transposition' ) {
599 # We also need to check both that the readings occur in distinct
600 # witnesses, and that they are not in the same place. That is,
601 # proposing to link them should cause a witness loop.
602 if( $self->test_equivalence( $source, $target ) ) {
603 return ( 0, "Readings appear to be colocated, not transposed" );
608 } elsif( $rel ne 'repetition' ) {
609 # Check that linking the source and target in a relationship won't lead
610 # to a path loop for any witness.
611 # First, drop/stash any collations that might interfere
612 my $sourceobj = $c->reading( $source );
613 my $targetobj = $c->reading( $target );
614 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
615 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
616 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
617 push( @$mustdrop, $self->_drop_collations( $source ) );
618 push( @$mustdrop, $self->_drop_collations( $target ) );
619 if( $c->end->has_rank ) {
620 foreach my $rk ( $sourcerank .. $targetrank ) {
621 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
622 $c->readings_at_rank( $rk );
626 unless( $self->test_equivalence( $source, $target ) ) {
627 $self->_restore_collations( @$mustdrop );
628 return( 0, "Relationship would create witness loop" );
634 sub _drop_collations {
635 my( $self, $reading ) = @_;
637 foreach my $n ( $self->graph->neighbors( $reading ) ) {
638 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
639 push( @dropped, [ $reading, $n ] );
640 $self->del_relationship( $reading, $n );
641 #print STDERR "Dropped collation $reading -> $n\n";
647 sub _restore_collations {
648 my( $self, @vectors ) = @_;
649 foreach my $v ( @vectors ) {
651 $self->add_relationship( @$v, { 'type' => 'collated' } );
652 #print STDERR "Restored collation @$v\n";
654 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
659 =head2 filter_collations()
661 Utility function. Removes any redundant 'collated' relationships from the graph.
662 A collated relationship is redundant if the readings in question would occupy
663 the same rank regardless of the existence of the relationship.
667 sub filter_collations {
669 my $c = $self->collation;
670 foreach my $r ( 1 .. $c->end->rank - 1 ) {
673 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
674 next if $rdg->is_meta;
676 foreach my $pred ( $rdg->predecessors ) {
677 if( $pred->rank == $r - 1 ) {
679 $anchor = $rdg unless( $anchor );
683 push( @need_collations, $rdg ) unless $ip;
684 $c->relations->_drop_collations( "$rdg" );
687 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
688 unless $c->get_relationship( $anchor, $_ ) } @need_collations
689 : warn "No anchor found at $r";
693 =head2 related_readings( $reading, $filter )
695 Returns a list of readings that are connected via relationship links to $reading.
696 If $filter is set to a subroutine ref, returns only those related readings where
697 $filter( $relationship ) returns a true value.
701 sub related_readings {
702 my( $self, $reading, $filter ) = @_;
704 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
705 $reading = $reading->id;
711 if( $filter eq 'colocated' ) {
712 $filter = sub { $_[0]->colocated };
713 } elsif( !ref( $filter ) ) {
715 $filter = sub { $_[0]->type eq $type };
717 my %found = ( $reading => 1 );
718 my $check = [ $reading ];
722 foreach my $r ( @$check ) {
723 foreach my $nr ( $self->graph->neighbors( $r ) ) {
724 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
725 push( @$more, $nr ) unless exists $found{$nr};
732 delete $found{$reading};
733 @answer = keys %found;
735 @answer = $self->graph->all_reachable( $reading );
737 if( $return_object ) {
738 my $c = $self->collation;
739 return map { $c->reading( $_ ) } @answer;
745 =head2 merge_readings( $kept, $deleted );
747 Makes a best-effort merge of the relationship links between the given readings, and
748 stops tracking the to-be-deleted reading.
753 my( $self, $kept, $deleted, $combined ) = @_;
754 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
755 # Get the pair of kept / rel
756 my @vector = ( $kept );
757 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
758 next if $vector[0] eq $vector[1]; # Don't add a self loop
760 # If kept changes its text, drop the relationship.
763 # If kept / rel already has a relationship, just keep the old
764 my $rel = $self->get_relationship( @vector );
767 # Otherwise, adopt the relationship that would be deleted.
768 $rel = $self->get_relationship( @$edge );
769 $self->_set_relationship( $rel, @vector );
771 $self->_make_equivalence( $deleted, $kept );
774 ### Equivalence logic
776 sub _remove_equivalence_node {
777 my( $self, $node ) = @_;
778 my $group = $self->equivalence( $node );
779 my $nodelist = $self->eqreadings( $group );
780 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
781 $self->equivalence_graph->delete_vertex( $group );
782 $self->remove_eqreadings( $group );
783 $self->remove_equivalence( $group );
784 } elsif( @$nodelist == 1 ) {
785 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
786 " in group that should have only $node" );
788 my @newlist = grep { $_ ne $node } @$nodelist;
789 $self->set_eqreadings( $group, \@newlist );
790 $self->remove_equivalence( $node );
794 =head2 add_equivalence_edge
796 Add an edge in the equivalence graph corresponding to $source -> $target in the
797 collation. Should only be called by Collation.
801 sub add_equivalence_edge {
802 my( $self, $source, $target ) = @_;
803 my $seq = $self->equivalence( $source );
804 my $teq = $self->equivalence( $target );
805 $self->equivalence_graph->add_edge( $seq, $teq );
808 =head2 delete_equivalence_edge
810 Remove an edge in the equivalence graph corresponding to $source -> $target in the
811 collation. Should only be called by Collation.
815 sub delete_equivalence_edge {
816 my( $self, $source, $target ) = @_;
817 my $seq = $self->equivalence( $source );
818 my $teq = $self->equivalence( $target );
819 $self->equivalence_graph->delete_edge( $seq, $teq );
822 sub _is_disconnected {
824 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
825 || scalar $self->equivalence_graph->successorless_vertices > 1 );
828 # Equate two readings in the equivalence graph
829 sub _make_equivalence {
830 my( $self, $source, $target ) = @_;
831 # Get the source equivalent readings
832 my $seq = $self->equivalence( $source );
833 my $teq = $self->equivalence( $target );
834 # Nothing to do if they are already equivalent...
835 return if $seq eq $teq;
836 my $sourcepool = $self->eqreadings( $seq );
837 # and add them to the target readings.
838 push( @{$self->eqreadings( $teq )}, @$sourcepool );
839 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
840 # Then merge the nodes in the equivalence graph.
841 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
842 $self->equivalence_graph->add_edge( $pred, $teq );
844 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
845 $self->equivalence_graph->add_edge( $teq, $succ );
847 $self->equivalence_graph->delete_vertex( $seq );
848 # TODO enable this after collation parsing is done
849 throw( "Graph got disconnected making $source / $target equivalence" )
850 if $self->_is_disconnected && $self->collation->tradition->_initialized;
853 =head2 test_equivalence
855 Test whether, if two readings were equated with a 'colocated' relationship,
856 the graph would still be valid.
860 sub test_equivalence {
861 my( $self, $source, $target ) = @_;
862 # Try merging the nodes in the equivalence graph; return a true value if
863 # no cycle is introduced thereby. Restore the original graph first.
865 # Keep track of edges we add
868 # Get the reading equivalents
869 my $seq = $self->equivalence( $source );
870 my $teq = $self->equivalence( $target );
871 # Maybe this is easy?
872 return 1 if $seq eq $teq;
874 # Save the first graph
875 my $checkstr = $self->equivalence_graph->stringify();
876 # Add and save relevant edges
877 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
878 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
879 $added_pred{$pred} = 0;
881 $self->equivalence_graph->add_edge( $pred, $teq );
882 $added_pred{$pred} = 1;
885 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
886 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
887 $added_succ{$succ} = 0;
889 $self->equivalence_graph->add_edge( $teq, $succ );
890 $added_succ{$succ} = 1;
893 # Delete source equivalent and test
894 $self->equivalence_graph->delete_vertex( $seq );
895 my $ret = !$self->equivalence_graph->has_a_cycle;
897 # Restore what we changed
898 $self->equivalence_graph->add_vertex( $seq );
899 foreach my $pred ( keys %added_pred ) {
900 $self->equivalence_graph->add_edge( $pred, $seq );
901 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
903 foreach my $succ ( keys %added_succ ) {
904 $self->equivalence_graph->add_edge( $seq, $succ );
905 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
907 unless( $self->equivalence_graph->eq( $checkstr ) ) {
908 warn "GRAPH CHANGED after testing";
914 # Unmake an equivalence link between two readings. Should only be called internally.
915 sub _break_equivalence {
916 my( $self, $source, $target ) = @_;
918 # This is the hard one. Need to reconstruct the equivalence groups without
921 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
922 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
923 # If these groups intersect, they are still connected; do nothing.
924 foreach my $el ( keys %tng ) {
925 return if( exists $sng{$el} );
927 # If they don't intersect, then we split the nodes in the graph and in
928 # the hashes. First figure out which group has which name
929 my $oldgroup = $self->equivalence( $source ); # same as $target
930 my $keepsource = $sng{$oldgroup};
931 my $newgroup = $keepsource ? $target : $source;
932 my( $oldmembers, $newmembers );
934 $oldmembers = [ keys %sng ];
935 $newmembers = [ keys %tng ];
937 $oldmembers = [ keys %tng ];
938 $newmembers = [ keys %sng ];
941 # First alter the old group in the hash
942 $self->set_eqreadings( $oldgroup, $oldmembers );
943 foreach my $el ( @$oldmembers ) {
944 $self->set_equivalence( $el, $oldgroup );
947 # then add the new group back to the hash with its new key
948 $self->set_eqreadings( $newgroup, $newmembers );
949 foreach my $el ( @$newmembers ) {
950 $self->set_equivalence( $el, $newgroup );
953 # Now add the new group back to the equivalence graph
954 $self->equivalence_graph->add_vertex( $newgroup );
955 # ...add the appropriate edges to the source group vertext
956 my $c = $self->collation;
957 foreach my $rdg ( @$newmembers ) {
958 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
959 next unless $self->equivalence( $rp );
960 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
962 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
963 next unless $self->equivalence( $rs );
964 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
968 # ...and figure out which edges on the old group vertex to delete.
969 my( %old_pred, %old_succ );
970 foreach my $rdg ( @$oldmembers ) {
971 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
972 next unless $self->equivalence( $rp );
973 $old_pred{$self->equivalence( $rp )} = 1;
975 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
976 next unless $self->equivalence( $rs );
977 $old_succ{$self->equivalence( $rs )} = 1;
980 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
981 unless( $old_pred{$p} ) {
982 $self->equivalence_graph->delete_edge( $p, $oldgroup );
985 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
986 unless( $old_succ{$s} ) {
987 $self->equivalence_graph->delete_edge( $oldgroup, $s );
990 # TODO enable this after collation parsing is done
991 throw( "Graph got disconnected breaking $source / $target equivalence" )
992 if $self->_is_disconnected && $self->collation->tradition->_initialized;
995 sub _find_equiv_without {
996 my( $self, $first, $second ) = @_;
997 my %found = ( $first => 1 );
998 my $check = [ $first ];
1002 foreach my $r ( @$check ) {
1003 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1004 next if $r eq $second;
1005 if( $self->get_relationship( $r, $nr )->colocated ) {
1006 push( @$more, $nr ) unless exists $found{$nr};
1016 =head2 rebuild_equivalence
1018 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1019 adds all readings and edges, then makes an equivalence for all relationships.
1023 sub rebuild_equivalence {
1025 my $newgraph = Graph->new();
1026 # Set this as the new equivalence graph
1027 $self->_reset_equivalence( $newgraph );
1028 # Clear out the data hashes
1029 $self->_clear_equivalence;
1030 $self->_clear_eqreadings;
1033 foreach my $r ( $self->collation->readings ) {
1035 $newgraph->add_vertex( $rid );
1036 $self->set_equivalence( $rid, $rid );
1037 $self->set_eqreadings( $rid, [ $rid ] );
1041 foreach my $e ( $self->collation->paths ) {
1042 $self->add_equivalence_edge( @$e );
1045 # Now equate the colocated readings. This does no testing;
1046 # it assumes that all preexisting relationships are valid.
1047 foreach my $rel ( $self->relationships ) {
1048 my $relobj = $self->get_relationship( $rel );
1049 next unless $relobj && $relobj->colocated;
1050 $self->_make_equivalence( @$rel );
1054 =head2 equivalence_ranks
1056 Rank all vertices in the equivalence graph, and return a hash reference with
1057 vertex => rank mapping.
1061 sub equivalence_ranks {
1063 my $eqstart = $self->equivalence( $self->collation->start );
1064 my $eqranks = { $eqstart => 0 };
1065 my $rankeqs = { 0 => [ $eqstart ] };
1066 my @curr_origin = ( $eqstart );
1067 # A little iterative function.
1068 while( @curr_origin ) {
1069 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1071 return( $eqranks, $rankeqs );
1075 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1076 my $graph = $self->equivalence_graph;
1077 # Look at each of the children of @current_nodes. If all the child's
1078 # parents have a rank, assign it the highest rank + 1 and add it to
1079 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1080 # parent gets a rank.
1082 foreach my $c ( @current_nodes ) {
1083 warn "Current reading $c has no rank!"
1084 unless exists $node_ranks->{$c};
1085 foreach my $child ( $graph->successors( $c ) ) {
1086 next if exists $node_ranks->{$child};
1087 my $highest_rank = -1;
1089 foreach my $parent ( $graph->predecessors( $child ) ) {
1090 if( exists $node_ranks->{$parent} ) {
1091 $highest_rank = $node_ranks->{$parent}
1092 if $highest_rank <= $node_ranks->{$parent};
1099 my $c_rank = $highest_rank + 1;
1100 # print STDERR "Assigning rank $c_rank to node $child \n";
1101 $node_ranks->{$child} = $c_rank if $node_ranks;
1102 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1103 push( @next_nodes, $child );
1112 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1114 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1115 $rgraph->setAttribute( 'edgedefault', 'directed' );
1116 $rgraph->setAttribute( 'id', 'relationships', );
1117 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1118 $rgraph->setAttribute( 'parse.edges', 0 );
1119 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1120 $rgraph->setAttribute( 'parse.nodes', 0 );
1121 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1123 # Add the vertices according to their XML IDs
1124 my %rdg_lookup = ( reverse %$node_hash );
1125 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1126 my @nlist = sort keys( %rdg_lookup );
1127 foreach my $n ( @nlist ) {
1128 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1129 $n_el->setAttribute( 'id', $n );
1130 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1132 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1134 # Add the relationship edges, with their object information
1136 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1137 # Add an edge and fill in its relationship info.
1138 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1139 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1140 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1141 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1142 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1144 my $rel_obj = $self->get_relationship( @$e );
1145 foreach my $key ( keys %$edge_keys ) {
1146 my $value = $rel_obj->$key;
1147 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1151 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1159 return $tmp_a <=> $tmp_b;
1162 sub _add_graphml_data {
1163 my( $el, $key, $value ) = @_;
1164 return unless defined $value;
1165 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1166 $data_el->setAttribute( 'key', $key );
1167 $data_el->appendText( $value );
1171 Text::Tradition::Error->throw(
1172 'ident' => 'Relationship error',
1178 __PACKAGE__->meta->make_immutable;