1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 _clear_equivalence => 'clear',
114 has '_equivalence_readings' => (
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
121 _clear_eqreadings => 'clear',
125 around add_reading => sub {
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
135 around delete_reading => sub {
139 $self->_remove_equivalence_node( @_ );
143 =head2 get_relationship
145 Return the relationship object, if any, that exists between two readings.
149 sub get_relationship {
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
163 return $relationship;
166 sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
170 $self->_make_equivalence( @vector ) if $relationship->colocated;
175 Create a new relationship with the given options and return it.
176 Warn and return undef if the relationship cannot be created.
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
185 my $rel = $self->get_relationship( $source, $target );
187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
199 # Check to see if a nonlocal relationship is defined for the two readings
200 $rel = $self->scoped_relationship( $options->{'reading_a'},
201 $options->{'reading_b'} );
202 if( $rel && $rel->type eq $options->{'type'} ) {
205 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
207 $rel = Text::Tradition::Collation::Relationship->new( $options );
208 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
213 =head2 add_scoped_relationship( $rel )
215 Keep track of relationships defined between specific readings that are scoped
216 non-locally. Key on whichever reading occurs first alphabetically.
220 sub add_scoped_relationship {
221 my( $self, $rel ) = @_;
222 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
223 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
224 my $r = $self->scoped_relationship( $rdga, $rdgb );
226 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
227 $r->type, $rdga, $rdgb );
230 my( $first, $second ) = sort ( $rdga, $rdgb );
231 $self->scopedrels->{$first}->{$second} = $rel;
234 =head2 scoped_relationship( $reading_a, $reading_b )
236 Returns the general (document-level or global) relationship that has been defined
237 between the two reading strings. Returns undef if there is no general relationship.
241 sub scoped_relationship {
242 my( $self, $rdga, $rdgb ) = @_;
243 my( $first, $second ) = sort( $rdga, $rdgb );
244 if( exists $self->scopedrels->{$first}->{$second} ) {
245 return $self->scopedrels->{$first}->{$second};
251 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
253 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
254 for the possible options) between the readings given in $source and $target. Sets
255 up a scoped relationship between $sourcetext and $targettext if the relationship is
258 Returns a status boolean and a list of all reading pairs connected by the call to
269 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
270 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
271 "Got expected relationship drop warning on parse";
273 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
274 ok( $t1, "Parsed test fragment file" );
275 my $c1 = $t1->collation;
276 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
277 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
278 "Troublesome relationship exists" );
279 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
281 # Try to make the link we want
283 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
284 ok( 1, "Added cross-collation relationship as expected" );
285 } catch( Text::Tradition::Error $e ) {
286 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
290 $c1->calculate_ranks();
291 ok( 1, "Successfully calculated ranks" );
292 } catch ( Text::Tradition::Error $e ) {
293 ok( 0, "Collation now has a cycle: " . $e->message );
296 # Test 1.2: attempt merge of an identical reading
298 $c1->merge_readings( 'r9.3', 'r11.5' );
299 ok( 1, "Successfully merged reading 'pontifex'" );
300 } catch ( Text::Tradition::Error $e ) {
301 ok( 0, "Merge of mergeable readings failed: $e->message" );
305 # Test 1.3: attempt relationship with a meta reading (should fail)
307 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
308 ok( 0, "Allowed a meta-reading to be used in a relationship" );
309 } catch ( Text::Tradition::Error $e ) {
310 is( $e->message, 'Cannot set relationship on a meta reading',
311 "Relationship link prevented for a meta reading" );
314 # Test 1.4: try to break a relationship near a meta reading
315 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
317 $c1->del_relationship( 'r7.6', 'r7.7' );
318 $c1->del_relationship( 'r7.6', 'r7.3' );
319 ok( 1, "Relationship broken with a meta reading as neighbor" );
321 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
324 # Test 2.1: try to equate nodes that are prevented with a real intermediate
328 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
329 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
330 "Got expected relationship drop warning on parse";
331 my $c2 = $t2->collation;
332 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
333 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
334 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
335 "Created blocking relationship" );
336 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
337 # This time the link ought to fail
339 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
340 ok( 0, "Added cross-equivalent bad relationship" );
341 } catch ( Text::Tradition::Error $e ) {
342 like( $e->message, qr/witness loop/,
343 "Existing equivalence blocked crossing relationship" );
347 $c2->calculate_ranks();
348 ok( 1, "Successfully calculated ranks" );
349 } catch ( Text::Tradition::Error $e ) {
350 ok( 0, "Collation now has a cycle: " . $e->message );
353 # Test 3.1: make a straightforward pair of transpositions.
354 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
355 # Test 1: try to equate nodes that are prevented with an intermediate collation
356 my $c3 = $t3->collation;
358 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
359 ok( 1, "Added straightforward transposition" );
360 } catch ( Text::Tradition::Error $e ) {
361 ok( 0, "Failed to add normal transposition: " . $e->message );
364 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
365 ok( 1, "Added straightforward transposition complement" );
366 } catch ( Text::Tradition::Error $e ) {
367 ok( 0, "Failed to add normal transposition complement: " . $e->message );
370 # Test 3.2: try to make a transposition that could be a parallel.
372 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
373 ok( 0, "Added bad colocated transposition" );
374 } catch ( Text::Tradition::Error $e ) {
375 like( $e->message, qr/Readings appear to be colocated/,
376 "Prevented bad colocated transposition" );
379 # Test 3.3: make the parallel, and then make the transposition again.
381 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
382 ok( 1, "Equated identical readings for transposition" );
383 } catch ( Text::Tradition::Error $e ) {
384 ok( 0, "Failed to equate identical readings: " . $e->message );
387 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
388 ok( 1, "Added straightforward transposition complement" );
389 } catch ( Text::Tradition::Error $e ) {
390 ok( 0, "Failed to add normal transposition complement: " . $e->message );
397 sub add_relationship {
398 my( $self, $source, $target, $options ) = @_;
399 my $c = $self->collation;
400 my $sourceobj = $c->reading( $source );
401 my $targetobj = $c->reading( $target );
402 throw( "Adding self relationship at $source" ) if $source eq $target;
403 throw( "Cannot set relationship on a meta reading" )
404 if( $sourceobj->is_meta || $targetobj->is_meta );
407 my $droppedcolls = [];
408 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
409 $relationship = $options;
410 $thispaironly = 1; # If existing rel, set only where asked.
413 $options->{'scope'} = 'local' unless $options->{'scope'};
414 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
415 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
417 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
418 $options->{'type'}, $droppedcolls );
419 unless( $is_valid ) {
420 throw( "Invalid relationship: $reason" );
423 # Try to create the relationship object.
424 $options->{'reading_a'} = $sourceobj->text;
425 $options->{'reading_b'} = $targetobj->text;
426 $options->{'orig_a'} = $source;
427 $options->{'orig_b'} = $target;
428 if( $options->{'scope'} ne 'local' ) {
429 # Is there a relationship with this a & b already?
430 # Case-insensitive for non-orthographics.
431 my $rdga = $options->{'type'} eq 'orthographic'
432 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
433 my $rdgb = $options->{'type'} eq 'orthographic'
434 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
435 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
436 if( $otherrel && $otherrel->type eq $options->{type}
437 && $otherrel->scope eq $options->{scope} ) {
438 warn "Applying existing scoped relationship";
439 $relationship = $otherrel;
442 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
446 # Find all the pairs for which we need to set the relationship.
448 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
449 push( @vectors, $self->_find_applicable( $relationship ) );
452 # Now set the relationship(s).
454 my $rel = $self->get_relationship( $source, $target );
456 if( $rel && $rel ne $relationship ) {
457 if( $rel->nonlocal ) {
458 throw( "Found conflicting relationship at $source - $target" );
459 } elsif( $rel->type ne 'collated' ) {
460 # Replace a collation relationship; leave any other sort in place.
461 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
462 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
463 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
464 warn sprintf( "Not overriding local relationship %s with global %s "
465 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
466 $source, $target, $rel->reading_a, $rel->reading_b );
471 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
472 push( @pairs_set, [ $source, $target ] );
474 # Set any additional relationships that might be in @vectors.
475 foreach my $v ( @vectors ) {
476 next if $v->[0] eq $source && $v->[1] eq $target;
477 next if $v->[1] eq $source && $v->[0] eq $target;
478 my @added = $self->add_relationship( @$v, $relationship );
479 push( @pairs_set, @added );
482 # Finally, restore whatever collations we can, and return.
483 $self->_restore_collations( @$droppedcolls );
487 =head2 del_scoped_relationship( $reading_a, $reading_b )
489 Returns the general (document-level or global) relationship that has been defined
490 between the two reading strings. Returns undef if there is no general relationship.
494 sub del_scoped_relationship {
495 my( $self, $rdga, $rdgb ) = @_;
496 my( $first, $second ) = sort( $rdga, $rdgb );
497 return delete $self->scopedrels->{$first}->{$second};
500 sub _find_applicable {
501 my( $self, $rel ) = @_;
502 my $c = $self->collation;
503 # TODO Someday we might use a case sensitive language.
504 my $lang = $c->tradition->language;
506 my @identical_readings;
507 if( $rel->type eq 'orthographic' ) {
508 @identical_readings = grep { $_->text eq $rel->reading_a }
511 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
514 foreach my $ir ( @identical_readings ) {
516 if( $rel->type eq 'orthographic' ) {
517 @itarget = grep { $_->rank == $ir->rank
518 && $_->text eq $rel->reading_b } $c->readings;
520 @itarget = grep { $_->rank == $ir->rank
521 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
524 # Warn if there is more than one hit with no orth link between them.
525 my $itmain = shift @itarget;
528 map { $all_targets{$_} = 1 } @itarget;
529 map { delete $all_targets{$_} }
530 $self->related_readings( $itmain,
531 sub { $_[0]->type eq 'orthographic' } );
532 warn "More than one unrelated reading with text " . $itmain->text
533 . " at rank " . $ir->rank . "!" if keys %all_targets;
535 push( @vectors, [ $ir->id, $itmain->id ] );
541 =head2 del_relationship( $source, $target )
543 Removes the relationship between the given readings. If the relationship is
544 non-local, removes the relationship everywhere in the graph.
548 sub del_relationship {
549 my( $self, $source, $target ) = @_;
550 my $rel = $self->get_relationship( $source, $target );
551 return () unless $rel; # Nothing to delete; return an empty set.
552 my $colo = $rel->colocated;
553 my @vectors = ( [ $source, $target ] );
554 $self->_remove_relationship( $colo, $source, $target );
555 if( $rel->nonlocal ) {
556 # Remove the relationship wherever it occurs.
557 # Remove the relationship wherever it occurs.
558 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
559 $self->relationships;
560 foreach my $re ( @rel_edges ) {
561 $self->_remove_relationship( $colo, @$re );
562 push( @vectors, $re );
564 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
569 sub _remove_relationship {
570 my( $self, $equiv, @vector ) = @_;
571 $self->graph->delete_edge( @vector );
572 $self->_break_equivalence( @vector ) if $equiv;
575 =head2 relationship_valid( $source, $target, $type )
577 Checks whether a relationship of type $type may exist between the readings given
578 in $source and $target. Returns a tuple of ( status, message ) where status is
579 a yes/no boolean and, if the answer is no, message gives the reason why.
583 sub relationship_valid {
584 my( $self, $source, $target, $rel, $mustdrop ) = @_;
585 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
586 my $c = $self->collation;
587 ## Assume validity is okay if we are initializing from scratch.
588 return ( 1, "initializing" ) unless $c->tradition->_initialized;
589 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
590 # Check that the two readings do (for a repetition) or do not (for
591 # a transposition) appear in the same witness.
592 # TODO this might be called before witness paths are set...
594 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
595 foreach my $w ( $c->reading_witnesses( $target ) ) {
596 if( $seen_wits{$w} ) {
597 return ( 0, "Readings both occur in witness $w" )
598 if $rel eq 'transposition';
599 return ( 1, "ok" ) if $rel eq 'repetition';
602 return ( 0, "Readings occur only in distinct witnesses" )
603 if $rel eq 'repetition';
605 if ( $rel eq 'transposition' ) {
606 # We also need to check both that the readings occur in distinct
607 # witnesses, and that they are not in the same place. That is,
608 # proposing to link them should cause a witness loop.
609 if( $self->test_equivalence( $source, $target ) ) {
610 return ( 0, "Readings appear to be colocated, not transposed" );
615 } elsif( $rel ne 'repetition' ) {
616 # Check that linking the source and target in a relationship won't lead
617 # to a path loop for any witness.
618 # First, drop/stash any collations that might interfere
619 my $sourceobj = $c->reading( $source );
620 my $targetobj = $c->reading( $target );
621 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
622 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
623 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
624 push( @$mustdrop, $self->_drop_collations( $source ) );
625 push( @$mustdrop, $self->_drop_collations( $target ) );
626 if( $c->end->has_rank ) {
627 foreach my $rk ( $sourcerank .. $targetrank ) {
628 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
629 $c->readings_at_rank( $rk );
633 unless( $self->test_equivalence( $source, $target ) ) {
634 $self->_restore_collations( @$mustdrop );
635 return( 0, "Relationship would create witness loop" );
641 sub _drop_collations {
642 my( $self, $reading ) = @_;
644 foreach my $n ( $self->graph->neighbors( $reading ) ) {
645 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
646 push( @dropped, [ $reading, $n ] );
647 $self->del_relationship( $reading, $n );
648 #print STDERR "Dropped collation $reading -> $n\n";
654 sub _restore_collations {
655 my( $self, @vectors ) = @_;
656 foreach my $v ( @vectors ) {
658 $self->add_relationship( @$v, { 'type' => 'collated' } );
659 #print STDERR "Restored collation @$v\n";
661 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
666 =head2 filter_collations()
668 Utility function. Removes any redundant 'collated' relationships from the graph.
669 A collated relationship is redundant if the readings in question would occupy
670 the same rank regardless of the existence of the relationship.
674 sub filter_collations {
676 my $c = $self->collation;
677 foreach my $r ( 1 .. $c->end->rank - 1 ) {
680 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
681 next if $rdg->is_meta;
683 foreach my $pred ( $rdg->predecessors ) {
684 if( $pred->rank == $r - 1 ) {
686 $anchor = $rdg unless( $anchor );
690 push( @need_collations, $rdg ) unless $ip;
691 $c->relations->_drop_collations( "$rdg" );
694 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
695 unless $c->get_relationship( $anchor, $_ ) } @need_collations
696 : warn "No anchor found at $r";
700 =head2 related_readings( $reading, $filter )
702 Returns a list of readings that are connected via relationship links to $reading.
703 If $filter is set to a subroutine ref, returns only those related readings where
704 $filter( $relationship ) returns a true value.
708 sub related_readings {
709 my( $self, $reading, $filter ) = @_;
711 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
712 $reading = $reading->id;
718 if( $filter eq 'colocated' ) {
719 $filter = sub { $_[0]->colocated };
720 } elsif( !ref( $filter ) ) {
722 $filter = sub { $_[0]->type eq $type };
724 my %found = ( $reading => 1 );
725 my $check = [ $reading ];
729 foreach my $r ( @$check ) {
730 foreach my $nr ( $self->graph->neighbors( $r ) ) {
731 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
732 push( @$more, $nr ) unless exists $found{$nr};
739 delete $found{$reading};
740 @answer = keys %found;
742 @answer = $self->graph->all_reachable( $reading );
744 if( $return_object ) {
745 my $c = $self->collation;
746 return map { $c->reading( $_ ) } @answer;
752 =head2 merge_readings( $kept, $deleted );
754 Makes a best-effort merge of the relationship links between the given readings, and
755 stops tracking the to-be-deleted reading.
760 my( $self, $kept, $deleted, $combined ) = @_;
761 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
762 # Get the pair of kept / rel
763 my @vector = ( $kept );
764 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
765 next if $vector[0] eq $vector[1]; # Don't add a self loop
767 # If kept changes its text, drop the relationship.
770 # If kept / rel already has a relationship, just keep the old
771 my $rel = $self->get_relationship( @vector );
774 # Otherwise, adopt the relationship that would be deleted.
775 $rel = $self->get_relationship( @$edge );
776 $self->_set_relationship( $rel, @vector );
778 $self->_make_equivalence( $deleted, $kept );
781 ### Equivalence logic
783 sub _remove_equivalence_node {
784 my( $self, $node ) = @_;
785 my $group = $self->equivalence( $node );
786 my $nodelist = $self->eqreadings( $group );
787 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
788 $self->equivalence_graph->delete_vertex( $group );
789 $self->remove_eqreadings( $group );
790 $self->remove_equivalence( $group );
791 } elsif( @$nodelist == 1 ) {
792 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
793 " in group that should have only $node" );
795 my @newlist = grep { $_ ne $node } @$nodelist;
796 $self->set_eqreadings( $group, \@newlist );
797 $self->remove_equivalence( $node );
801 =head2 add_equivalence_edge
803 Add an edge in the equivalence graph corresponding to $source -> $target in the
804 collation. Should only be called by Collation.
808 sub add_equivalence_edge {
809 my( $self, $source, $target ) = @_;
810 my $seq = $self->equivalence( $source );
811 my $teq = $self->equivalence( $target );
812 $self->equivalence_graph->add_edge( $seq, $teq );
815 =head2 delete_equivalence_edge
817 Remove an edge in the equivalence graph corresponding to $source -> $target in the
818 collation. Should only be called by Collation.
822 sub delete_equivalence_edge {
823 my( $self, $source, $target ) = @_;
824 my $seq = $self->equivalence( $source );
825 my $teq = $self->equivalence( $target );
826 $self->equivalence_graph->delete_edge( $seq, $teq );
829 sub _is_disconnected {
831 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
832 || scalar $self->equivalence_graph->successorless_vertices > 1 );
835 # Equate two readings in the equivalence graph
836 sub _make_equivalence {
837 my( $self, $source, $target ) = @_;
838 # Get the source equivalent readings
839 my $seq = $self->equivalence( $source );
840 my $teq = $self->equivalence( $target );
841 # Nothing to do if they are already equivalent...
842 return if $seq eq $teq;
843 my $sourcepool = $self->eqreadings( $seq );
844 # and add them to the target readings.
845 push( @{$self->eqreadings( $teq )}, @$sourcepool );
846 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
847 # Then merge the nodes in the equivalence graph.
848 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
849 $self->equivalence_graph->add_edge( $pred, $teq );
851 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
852 $self->equivalence_graph->add_edge( $teq, $succ );
854 $self->equivalence_graph->delete_vertex( $seq );
855 # TODO enable this after collation parsing is done
856 throw( "Graph got disconnected making $source / $target equivalence" )
857 if $self->_is_disconnected && $self->collation->tradition->_initialized;
860 =head2 test_equivalence
862 Test whether, if two readings were equated with a 'colocated' relationship,
863 the graph would still be valid.
867 sub test_equivalence {
868 my( $self, $source, $target ) = @_;
869 # Try merging the nodes in the equivalence graph; return a true value if
870 # no cycle is introduced thereby. Restore the original graph first.
872 # Keep track of edges we add
875 # Get the reading equivalents
876 my $seq = $self->equivalence( $source );
877 my $teq = $self->equivalence( $target );
878 # Maybe this is easy?
879 return 1 if $seq eq $teq;
881 # Save the first graph
882 my $checkstr = $self->equivalence_graph->stringify();
883 # Add and save relevant edges
884 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
885 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
886 $added_pred{$pred} = 0;
888 $self->equivalence_graph->add_edge( $pred, $teq );
889 $added_pred{$pred} = 1;
892 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
893 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
894 $added_succ{$succ} = 0;
896 $self->equivalence_graph->add_edge( $teq, $succ );
897 $added_succ{$succ} = 1;
900 # Delete source equivalent and test
901 $self->equivalence_graph->delete_vertex( $seq );
902 my $ret = !$self->equivalence_graph->has_a_cycle;
904 # Restore what we changed
905 $self->equivalence_graph->add_vertex( $seq );
906 foreach my $pred ( keys %added_pred ) {
907 $self->equivalence_graph->add_edge( $pred, $seq );
908 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
910 foreach my $succ ( keys %added_succ ) {
911 $self->equivalence_graph->add_edge( $seq, $succ );
912 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
914 unless( $self->equivalence_graph->eq( $checkstr ) ) {
915 warn "GRAPH CHANGED after testing";
921 # Unmake an equivalence link between two readings. Should only be called internally.
922 sub _break_equivalence {
923 my( $self, $source, $target ) = @_;
925 # This is the hard one. Need to reconstruct the equivalence groups without
928 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
929 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
930 # If these groups intersect, they are still connected; do nothing.
931 foreach my $el ( keys %tng ) {
932 return if( exists $sng{$el} );
934 # If they don't intersect, then we split the nodes in the graph and in
935 # the hashes. First figure out which group has which name
936 my $oldgroup = $self->equivalence( $source ); # same as $target
937 my $keepsource = $sng{$oldgroup};
938 my $newgroup = $keepsource ? $target : $source;
939 my( $oldmembers, $newmembers );
941 $oldmembers = [ keys %sng ];
942 $newmembers = [ keys %tng ];
944 $oldmembers = [ keys %tng ];
945 $newmembers = [ keys %sng ];
948 # First alter the old group in the hash
949 $self->set_eqreadings( $oldgroup, $oldmembers );
950 foreach my $el ( @$oldmembers ) {
951 $self->set_equivalence( $el, $oldgroup );
954 # then add the new group back to the hash with its new key
955 $self->set_eqreadings( $newgroup, $newmembers );
956 foreach my $el ( @$newmembers ) {
957 $self->set_equivalence( $el, $newgroup );
960 # Now add the new group back to the equivalence graph
961 $self->equivalence_graph->add_vertex( $newgroup );
962 # ...add the appropriate edges to the source group vertext
963 my $c = $self->collation;
964 foreach my $rdg ( @$newmembers ) {
965 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
966 next unless $self->equivalence( $rp );
967 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
969 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
970 next unless $self->equivalence( $rs );
971 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
975 # ...and figure out which edges on the old group vertex to delete.
976 my( %old_pred, %old_succ );
977 foreach my $rdg ( @$oldmembers ) {
978 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
979 next unless $self->equivalence( $rp );
980 $old_pred{$self->equivalence( $rp )} = 1;
982 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
983 next unless $self->equivalence( $rs );
984 $old_succ{$self->equivalence( $rs )} = 1;
987 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
988 unless( $old_pred{$p} ) {
989 $self->equivalence_graph->delete_edge( $p, $oldgroup );
992 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
993 unless( $old_succ{$s} ) {
994 $self->equivalence_graph->delete_edge( $oldgroup, $s );
997 # TODO enable this after collation parsing is done
998 throw( "Graph got disconnected breaking $source / $target equivalence" )
999 if $self->_is_disconnected && $self->collation->tradition->_initialized;
1002 sub _find_equiv_without {
1003 my( $self, $first, $second ) = @_;
1004 my %found = ( $first => 1 );
1005 my $check = [ $first ];
1009 foreach my $r ( @$check ) {
1010 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1011 next if $r eq $second;
1012 if( $self->get_relationship( $r, $nr )->colocated ) {
1013 push( @$more, $nr ) unless exists $found{$nr};
1023 =head2 rebuild_equivalence
1025 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1026 adds all readings and edges, then makes an equivalence for all relationships.
1030 sub rebuild_equivalence {
1032 my $newgraph = Graph->new();
1033 # Set this as the new equivalence graph
1034 $self->_reset_equivalence( $newgraph );
1035 # Clear out the data hashes
1036 $self->_clear_equivalence;
1037 $self->_clear_eqreadings;
1040 foreach my $r ( $self->collation->readings ) {
1042 $newgraph->add_vertex( $rid );
1043 $self->set_equivalence( $rid, $rid );
1044 $self->set_eqreadings( $rid, [ $rid ] );
1048 foreach my $e ( $self->collation->paths ) {
1049 $self->add_equivalence_edge( @$e );
1052 # Now equate the colocated readings. This does no testing;
1053 # it assumes that all preexisting relationships are valid.
1054 foreach my $rel ( $self->relationships ) {
1055 my $relobj = $self->get_relationship( $rel );
1056 next unless $relobj && $relobj->colocated;
1057 $self->_make_equivalence( @$rel );
1061 =head2 equivalence_ranks
1063 Rank all vertices in the equivalence graph, and return a hash reference with
1064 vertex => rank mapping.
1068 sub equivalence_ranks {
1070 my $eqstart = $self->equivalence( $self->collation->start );
1071 my $eqranks = { $eqstart => 0 };
1072 my $rankeqs = { 0 => [ $eqstart ] };
1073 my @curr_origin = ( $eqstart );
1074 # A little iterative function.
1075 while( @curr_origin ) {
1076 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1078 return( $eqranks, $rankeqs );
1082 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1083 my $graph = $self->equivalence_graph;
1084 # Look at each of the children of @current_nodes. If all the child's
1085 # parents have a rank, assign it the highest rank + 1 and add it to
1086 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1087 # parent gets a rank.
1089 foreach my $c ( @current_nodes ) {
1090 warn "Current reading $c has no rank!"
1091 unless exists $node_ranks->{$c};
1092 foreach my $child ( $graph->successors( $c ) ) {
1093 next if exists $node_ranks->{$child};
1094 my $highest_rank = -1;
1096 foreach my $parent ( $graph->predecessors( $child ) ) {
1097 if( exists $node_ranks->{$parent} ) {
1098 $highest_rank = $node_ranks->{$parent}
1099 if $highest_rank <= $node_ranks->{$parent};
1106 my $c_rank = $highest_rank + 1;
1107 # print STDERR "Assigning rank $c_rank to node $child \n";
1108 $node_ranks->{$child} = $c_rank if $node_ranks;
1109 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1110 push( @next_nodes, $child );
1119 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1121 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1122 $rgraph->setAttribute( 'edgedefault', 'directed' );
1123 $rgraph->setAttribute( 'id', 'relationships', );
1124 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1125 $rgraph->setAttribute( 'parse.edges', 0 );
1126 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1127 $rgraph->setAttribute( 'parse.nodes', 0 );
1128 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1130 # Add the vertices according to their XML IDs
1131 my %rdg_lookup = ( reverse %$node_hash );
1132 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1133 my @nlist = sort keys( %rdg_lookup );
1134 foreach my $n ( @nlist ) {
1135 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1136 $n_el->setAttribute( 'id', $n );
1137 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1139 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1141 # Add the relationship edges, with their object information
1143 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1144 # Add an edge and fill in its relationship info.
1145 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1146 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1147 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1148 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1149 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1151 my $rel_obj = $self->get_relationship( @$e );
1152 foreach my $key ( keys %$edge_keys ) {
1153 my $value = $rel_obj->$key;
1154 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1158 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1166 return $tmp_a <=> $tmp_b;
1169 sub _add_graphml_data {
1170 my( $el, $key, $value ) = @_;
1171 return unless defined $value;
1172 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1173 $data_el->setAttribute( 'key', $key );
1174 $data_el->appendText( $value );
1178 Text::Tradition::Error->throw(
1179 'ident' => 'Relationship error',
1185 __PACKAGE__->meta->make_immutable;