1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 _clear_equivalence => 'clear',
114 has '_equivalence_readings' => (
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
121 _clear_eqreadings => 'clear',
125 around add_reading => sub {
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
135 around delete_reading => sub {
139 $self->_remove_equivalence_node( @_ );
143 =head2 get_relationship
145 Return the relationship object, if any, that exists between two readings.
149 sub get_relationship {
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
163 return $relationship;
166 sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
170 $self->_make_equivalence( @vector ) if $relationship->colocated;
175 Create a new relationship with the given options and return it.
176 Warn and return undef if the relationship cannot be created.
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
185 my $rel = $self->get_relationship( $source, $target );
187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
199 $rel = Text::Tradition::Collation::Relationship->new( $options );
200 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
204 =head2 add_scoped_relationship( $rel )
206 Keep track of relationships defined between specific readings that are scoped
207 non-locally. Key on whichever reading occurs first alphabetically.
211 sub add_scoped_relationship {
212 my( $self, $rel ) = @_;
213 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
214 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
215 my $r = $self->scoped_relationship( $rdga, $rdgb );
217 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
218 $r->type, $rdga, $rdgb );
221 my( $first, $second ) = sort ( $rdga, $rdgb );
222 $self->scopedrels->{$first}->{$second} = $rel;
225 =head2 scoped_relationship( $reading_a, $reading_b )
227 Returns the general (document-level or global) relationship that has been defined
228 between the two reading strings. Returns undef if there is no general relationship.
232 sub scoped_relationship {
233 my( $self, $rdga, $rdgb ) = @_;
234 my( $first, $second ) = sort( $rdga, $rdgb );
235 my( $lcfirst, $lcsecond ) = sort( lc( $rdga ), lc( $rdgb ) );
236 if( exists $self->scopedrels->{$first}->{$second} ) {
237 return $self->scopedrels->{$first}->{$second};
238 } elsif( exists $self->scopedrels->{$lcfirst}->{$lcsecond} ) {
239 my $rel = $self->scopedrels->{$lcfirst}->{$lcsecond};
240 return $rel->type ne 'orthographic' ? $rel : undef;
246 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
248 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
249 for the possible options) between the readings given in $source and $target. Sets
250 up a scoped relationship between $sourcetext and $targettext if the relationship is
253 Returns a status boolean and a list of all reading pairs connected by the call to
264 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
265 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
266 "Got expected relationship drop warning on parse";
268 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
269 ok( $t1, "Parsed test fragment file" );
270 my $c1 = $t1->collation;
271 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
272 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
273 "Troublesome relationship exists" );
274 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
276 # Try to make the link we want
278 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
279 ok( 1, "Added cross-collation relationship as expected" );
280 } catch( Text::Tradition::Error $e ) {
281 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
285 $c1->calculate_ranks();
286 ok( 1, "Successfully calculated ranks" );
287 } catch ( Text::Tradition::Error $e ) {
288 ok( 0, "Collation now has a cycle: " . $e->message );
291 # Test 1.2: attempt merge of an identical reading
293 $c1->merge_readings( 'r9.3', 'r11.5' );
294 ok( 1, "Successfully merged reading 'pontifex'" );
295 } catch ( Text::Tradition::Error $e ) {
296 ok( 0, "Merge of mergeable readings failed: $e->message" );
300 # Test 1.3: attempt relationship with a meta reading (should fail)
302 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
303 ok( 0, "Allowed a meta-reading to be used in a relationship" );
304 } catch ( Text::Tradition::Error $e ) {
305 is( $e->message, 'Cannot set relationship on a meta reading',
306 "Relationship link prevented for a meta reading" );
309 # Test 1.4: try to break a relationship near a meta reading
310 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
312 $c1->del_relationship( 'r7.6', 'r7.7' );
313 $c1->del_relationship( 'r7.6', 'r7.3' );
314 ok( 1, "Relationship broken with a meta reading as neighbor" );
316 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
319 # Test 2.1: try to equate nodes that are prevented with a real intermediate
323 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
324 } 'DROPPING r14.2 -> r8.1: Cannot set relationship on a meta reading',
325 "Got expected relationship drop warning on parse";
326 my $c2 = $t2->collation;
327 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
328 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
329 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
330 "Created blocking relationship" );
331 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
332 # This time the link ought to fail
334 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
335 ok( 0, "Added cross-equivalent bad relationship" );
336 } catch ( Text::Tradition::Error $e ) {
337 like( $e->message, qr/witness loop/,
338 "Existing equivalence blocked crossing relationship" );
342 $c2->calculate_ranks();
343 ok( 1, "Successfully calculated ranks" );
344 } catch ( Text::Tradition::Error $e ) {
345 ok( 0, "Collation now has a cycle: " . $e->message );
348 # Test 3.1: make a straightforward pair of transpositions.
349 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
350 # Test 1: try to equate nodes that are prevented with an intermediate collation
351 my $c3 = $t3->collation;
353 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
354 ok( 1, "Added straightforward transposition" );
355 } catch ( Text::Tradition::Error $e ) {
356 ok( 0, "Failed to add normal transposition: " . $e->message );
359 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
360 ok( 1, "Added straightforward transposition complement" );
361 } catch ( Text::Tradition::Error $e ) {
362 ok( 0, "Failed to add normal transposition complement: " . $e->message );
365 # Test 3.2: try to make a transposition that could be a parallel.
367 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
368 ok( 0, "Added bad colocated transposition" );
369 } catch ( Text::Tradition::Error $e ) {
370 like( $e->message, qr/Readings appear to be colocated/,
371 "Prevented bad colocated transposition" );
374 # Test 3.3: make the parallel, and then make the transposition again.
376 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
377 ok( 1, "Equated identical readings for transposition" );
378 } catch ( Text::Tradition::Error $e ) {
379 ok( 0, "Failed to equate identical readings: " . $e->message );
382 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
383 ok( 1, "Added straightforward transposition complement" );
384 } catch ( Text::Tradition::Error $e ) {
385 ok( 0, "Failed to add normal transposition complement: " . $e->message );
392 sub add_relationship {
393 my( $self, $source, $target, $options ) = @_;
394 my $c = $self->collation;
395 my $sourceobj = $c->reading( $source );
396 my $targetobj = $c->reading( $target );
397 throw( "Adding self relationship at $source" ) if $source eq $target;
398 throw( "Cannot set relationship on a meta reading" )
399 if( $sourceobj->is_meta || $targetobj->is_meta );
402 my $droppedcolls = [];
403 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
404 $relationship = $options;
405 $thispaironly = 1; # If existing rel, set only where asked.
408 $options->{'scope'} = 'local' unless $options->{'scope'};
409 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
410 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
412 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
413 $options->{'type'}, $droppedcolls );
414 unless( $is_valid ) {
415 throw( "Invalid relationship: $reason" );
418 # Try to create the relationship object.
419 $options->{'reading_a'} = $sourceobj->text;
420 $options->{'reading_b'} = $targetobj->text;
421 $options->{'orig_a'} = $source;
422 $options->{'orig_b'} = $target;
423 if( $options->{'scope'} ne 'local' ) {
424 # Is there a relationship with this a & b already?
425 # Case-insensitive for non-orthographics.
426 my $rdga = $options->{'reading_a'};
427 my $rdgb = $options->{'reading_b'};
428 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
429 if( $otherrel && $otherrel->type eq $options->{type}
430 && $otherrel->scope eq $options->{scope} ) {
431 warn "Applying existing scoped relationship for $rdga / $rdgb";
432 $relationship = $otherrel;
433 } elsif( $otherrel ) {
434 throw( "Conflicting scoped relationship for $rdga / $rdgb at $source / $target" );
437 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
441 # Now set the relationship(s).
443 my $rel = $self->get_relationship( $source, $target );
445 if( $rel && $rel ne $relationship ) {
446 if( $rel->nonlocal ) {
447 throw( "Found conflicting relationship at $source - $target" );
448 } elsif( $rel->type ne 'collated' ) {
449 # Replace a collation relationship; leave any other sort in place.
450 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
451 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
452 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
453 warn sprintf( "Not overriding local relationship %s with global %s "
454 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
455 $source, $target, $rel->reading_a, $rel->reading_b );
460 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
461 push( @pairs_set, [ $source, $target ] );
463 # Find all the pairs for which we need to set the relationship.
464 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
465 push( @pairs_set, $self->add_global_relationship( $relationship ) );
467 # Finally, restore whatever collations we can, and return.
468 $self->_restore_collations( @$droppedcolls );
472 =head2 add_global_relationship( $options, $skipvector )
474 Adds the relationship specified wherever the relevant readings appear together
475 in the graph. Options as in add_relationship above.
479 sub add_global_relationship {
480 my( $self, $options ) = @_;
481 # First see if we are dealing with a relationship object already
483 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
484 $relationship = $options;
486 # Then see if a scoped relationship already applies for the words.
487 my $scopedrel = $self->scoped_relationship(
488 $options->{reading_a}, $options->{reading_b} );
489 $relationship = $scopedrel ? $scopedrel
490 : $self->create( $options );
493 throw( "Relationship passed to add_global is not global" )
494 unless $relationship->nonlocal;
495 throw( "Relationship passed to add_global is not a valid global type" )
496 unless $relationship->colocated && $relationship->type ne 'collated';
498 # Apply the relationship wherever it is valid
500 foreach my $v ( $self->_find_applicable( $relationship ) ) {
501 my $exists = $self->get_relationship( @$v );
502 if( $exists && $exists->type ne 'collated' ) {
503 throw( "Found conflicting relationship at @$v" )
504 unless $exists->type eq $relationship->type
505 && $exists->scope eq $relationship->scope;
507 my @added = $self->add_relationship( @$v, $relationship );
508 push( @pairs_set, @added );
515 =head2 del_scoped_relationship( $reading_a, $reading_b )
517 Returns the general (document-level or global) relationship that has been defined
518 between the two reading strings. Returns undef if there is no general relationship.
522 sub del_scoped_relationship {
523 my( $self, $rdga, $rdgb ) = @_;
524 my( $first, $second ) = sort( $rdga, $rdgb );
525 return delete $self->scopedrels->{$first}->{$second};
528 sub _find_applicable {
529 my( $self, $rel ) = @_;
530 my $c = $self->collation;
531 # TODO Someday we might use a case sensitive language.
532 my $lang = $c->tradition->language;
534 my @identical_readings;
535 if( $rel->type eq 'orthographic' ) {
536 @identical_readings = grep { $_->text eq $rel->reading_a }
539 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
542 foreach my $ir ( @identical_readings ) {
544 if( $rel->type eq 'orthographic' ) {
545 @itarget = grep { $_->rank == $ir->rank
546 && $_->text eq $rel->reading_b } $c->readings;
548 @itarget = grep { $_->rank == $ir->rank
549 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
552 # Warn if there is more than one hit with no orth link between them.
553 my $itmain = shift @itarget;
556 map { $all_targets{$_} = 1 } @itarget;
557 map { delete $all_targets{$_} }
558 $self->related_readings( $itmain,
559 sub { $_[0]->type eq 'orthographic' } );
560 warn "More than one unrelated reading with text " . $itmain->text
561 . " at rank " . $ir->rank . "!" if keys %all_targets;
563 push( @vectors, [ $ir->id, $itmain->id ] );
569 =head2 del_relationship( $source, $target )
571 Removes the relationship between the given readings. If the relationship is
572 non-local, removes the relationship everywhere in the graph.
576 sub del_relationship {
577 my( $self, $source, $target ) = @_;
578 my $rel = $self->get_relationship( $source, $target );
579 return () unless $rel; # Nothing to delete; return an empty set.
580 my $colo = $rel->colocated;
581 my @vectors = ( [ $source, $target ] );
582 $self->_remove_relationship( $colo, $source, $target );
583 if( $rel->nonlocal ) {
584 # Remove the relationship wherever it occurs.
585 # Remove the relationship wherever it occurs.
586 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
587 $self->relationships;
588 foreach my $re ( @rel_edges ) {
589 $self->_remove_relationship( $colo, @$re );
590 push( @vectors, $re );
592 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
597 sub _remove_relationship {
598 my( $self, $equiv, @vector ) = @_;
599 $self->graph->delete_edge( @vector );
600 $self->_break_equivalence( @vector ) if $equiv;
603 =head2 relationship_valid( $source, $target, $type )
605 Checks whether a relationship of type $type may exist between the readings given
606 in $source and $target. Returns a tuple of ( status, message ) where status is
607 a yes/no boolean and, if the answer is no, message gives the reason why.
611 sub relationship_valid {
612 my( $self, $source, $target, $rel, $mustdrop ) = @_;
613 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
614 my $c = $self->collation;
615 ## Assume validity is okay if we are initializing from scratch.
616 return ( 1, "initializing" ) unless $c->tradition->_initialized;
617 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
618 # Check that the two readings do (for a repetition) or do not (for
619 # a transposition) appear in the same witness.
620 # TODO this might be called before witness paths are set...
622 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
623 foreach my $w ( $c->reading_witnesses( $target ) ) {
624 if( $seen_wits{$w} ) {
625 return ( 0, "Readings both occur in witness $w" )
626 if $rel eq 'transposition';
627 return ( 1, "ok" ) if $rel eq 'repetition';
630 return ( 0, "Readings occur only in distinct witnesses" )
631 if $rel eq 'repetition';
633 if ( $rel eq 'transposition' ) {
634 # We also need to check both that the readings occur in distinct
635 # witnesses, and that they are not in the same place. That is,
636 # proposing to link them should cause a witness loop.
637 if( $self->test_equivalence( $source, $target ) ) {
638 return ( 0, "Readings appear to be colocated, not transposed" );
643 } elsif( $rel ne 'repetition' ) {
644 # Check that linking the source and target in a relationship won't lead
645 # to a path loop for any witness.
646 # First, drop/stash any collations that might interfere
647 my $sourceobj = $c->reading( $source );
648 my $targetobj = $c->reading( $target );
649 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
650 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
651 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
652 push( @$mustdrop, $self->_drop_collations( $source ) );
653 push( @$mustdrop, $self->_drop_collations( $target ) );
654 if( $c->end->has_rank ) {
655 foreach my $rk ( $sourcerank .. $targetrank ) {
656 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
657 $c->readings_at_rank( $rk );
661 unless( $self->test_equivalence( $source, $target ) ) {
662 $self->_restore_collations( @$mustdrop );
663 return( 0, "Relationship would create witness loop" );
669 sub _drop_collations {
670 my( $self, $reading ) = @_;
672 foreach my $n ( $self->graph->neighbors( $reading ) ) {
673 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
674 push( @dropped, [ $reading, $n ] );
675 $self->del_relationship( $reading, $n );
676 #print STDERR "Dropped collation $reading -> $n\n";
682 sub _restore_collations {
683 my( $self, @vectors ) = @_;
684 foreach my $v ( @vectors ) {
686 $self->add_relationship( @$v, { 'type' => 'collated' } );
687 #print STDERR "Restored collation @$v\n";
689 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
694 =head2 filter_collations()
696 Utility function. Removes any redundant 'collated' relationships from the graph.
697 A collated relationship is redundant if the readings in question would occupy
698 the same rank regardless of the existence of the relationship.
702 sub filter_collations {
704 my $c = $self->collation;
705 foreach my $r ( 1 .. $c->end->rank - 1 ) {
708 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
709 next if $rdg->is_meta;
711 foreach my $pred ( $rdg->predecessors ) {
712 if( $pred->rank == $r - 1 ) {
714 $anchor = $rdg unless( $anchor );
718 push( @need_collations, $rdg ) unless $ip;
719 $c->relations->_drop_collations( "$rdg" );
722 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
723 unless $c->get_relationship( $anchor, $_ ) } @need_collations
724 : warn "No anchor found at $r";
728 =head2 related_readings( $reading, $filter )
730 Returns a list of readings that are connected via relationship links to $reading.
731 If $filter is set to a subroutine ref, returns only those related readings where
732 $filter( $relationship ) returns a true value.
736 sub related_readings {
737 my( $self, $reading, $filter ) = @_;
739 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
740 $reading = $reading->id;
746 if( $filter eq 'colocated' ) {
747 $filter = sub { $_[0]->colocated };
748 } elsif( !ref( $filter ) ) {
750 $filter = sub { $_[0]->type eq $type };
752 my %found = ( $reading => 1 );
753 my $check = [ $reading ];
757 foreach my $r ( @$check ) {
758 foreach my $nr ( $self->graph->neighbors( $r ) ) {
759 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
760 push( @$more, $nr ) unless exists $found{$nr};
767 delete $found{$reading};
768 @answer = keys %found;
770 @answer = $self->graph->all_reachable( $reading );
772 if( $return_object ) {
773 my $c = $self->collation;
774 return map { $c->reading( $_ ) } @answer;
780 =head2 merge_readings( $kept, $deleted );
782 Makes a best-effort merge of the relationship links between the given readings, and
783 stops tracking the to-be-deleted reading.
788 my( $self, $kept, $deleted, $combined ) = @_;
789 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
790 # Get the pair of kept / rel
791 my @vector = ( $kept );
792 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
793 next if $vector[0] eq $vector[1]; # Don't add a self loop
795 # If kept changes its text, drop the relationship.
798 # If kept / rel already has a relationship, just keep the old
799 my $rel = $self->get_relationship( @vector );
802 # Otherwise, adopt the relationship that would be deleted.
803 $rel = $self->get_relationship( @$edge );
804 $self->_set_relationship( $rel, @vector );
806 $self->_make_equivalence( $deleted, $kept );
809 ### Equivalence logic
811 sub _remove_equivalence_node {
812 my( $self, $node ) = @_;
813 my $group = $self->equivalence( $node );
814 my $nodelist = $self->eqreadings( $group );
815 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
816 $self->equivalence_graph->delete_vertex( $group );
817 $self->remove_eqreadings( $group );
818 $self->remove_equivalence( $group );
819 } elsif( @$nodelist == 1 ) {
820 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
821 " in group that should have only $node" );
823 my @newlist = grep { $_ ne $node } @$nodelist;
824 $self->set_eqreadings( $group, \@newlist );
825 $self->remove_equivalence( $node );
829 =head2 add_equivalence_edge
831 Add an edge in the equivalence graph corresponding to $source -> $target in the
832 collation. Should only be called by Collation.
836 sub add_equivalence_edge {
837 my( $self, $source, $target ) = @_;
838 my $seq = $self->equivalence( $source );
839 my $teq = $self->equivalence( $target );
840 $self->equivalence_graph->add_edge( $seq, $teq );
843 =head2 delete_equivalence_edge
845 Remove an edge in the equivalence graph corresponding to $source -> $target in the
846 collation. Should only be called by Collation.
850 sub delete_equivalence_edge {
851 my( $self, $source, $target ) = @_;
852 my $seq = $self->equivalence( $source );
853 my $teq = $self->equivalence( $target );
854 $self->equivalence_graph->delete_edge( $seq, $teq );
857 sub _is_disconnected {
859 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
860 || scalar $self->equivalence_graph->successorless_vertices > 1 );
863 # Equate two readings in the equivalence graph
864 sub _make_equivalence {
865 my( $self, $source, $target ) = @_;
866 # Get the source equivalent readings
867 my $seq = $self->equivalence( $source );
868 my $teq = $self->equivalence( $target );
869 # Nothing to do if they are already equivalent...
870 return if $seq eq $teq;
871 my $sourcepool = $self->eqreadings( $seq );
872 # and add them to the target readings.
873 push( @{$self->eqreadings( $teq )}, @$sourcepool );
874 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
875 # Then merge the nodes in the equivalence graph.
876 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
877 $self->equivalence_graph->add_edge( $pred, $teq );
879 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
880 $self->equivalence_graph->add_edge( $teq, $succ );
882 $self->equivalence_graph->delete_vertex( $seq );
883 # TODO enable this after collation parsing is done
884 throw( "Graph got disconnected making $source / $target equivalence" )
885 if $self->_is_disconnected && $self->collation->tradition->_initialized;
888 =head2 test_equivalence
890 Test whether, if two readings were equated with a 'colocated' relationship,
891 the graph would still be valid.
895 sub test_equivalence {
896 my( $self, $source, $target ) = @_;
897 # Try merging the nodes in the equivalence graph; return a true value if
898 # no cycle is introduced thereby. Restore the original graph first.
900 # Keep track of edges we add
903 # Get the reading equivalents
904 my $seq = $self->equivalence( $source );
905 my $teq = $self->equivalence( $target );
906 # Maybe this is easy?
907 return 1 if $seq eq $teq;
909 # Save the first graph
910 my $checkstr = $self->equivalence_graph->stringify();
911 # Add and save relevant edges
912 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
913 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
914 $added_pred{$pred} = 0;
916 $self->equivalence_graph->add_edge( $pred, $teq );
917 $added_pred{$pred} = 1;
920 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
921 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
922 $added_succ{$succ} = 0;
924 $self->equivalence_graph->add_edge( $teq, $succ );
925 $added_succ{$succ} = 1;
928 # Delete source equivalent and test
929 $self->equivalence_graph->delete_vertex( $seq );
930 my $ret = !$self->equivalence_graph->has_a_cycle;
932 # Restore what we changed
933 $self->equivalence_graph->add_vertex( $seq );
934 foreach my $pred ( keys %added_pred ) {
935 $self->equivalence_graph->add_edge( $pred, $seq );
936 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
938 foreach my $succ ( keys %added_succ ) {
939 $self->equivalence_graph->add_edge( $seq, $succ );
940 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
942 unless( $self->equivalence_graph->eq( $checkstr ) ) {
943 warn "GRAPH CHANGED after testing";
949 # Unmake an equivalence link between two readings. Should only be called internally.
950 sub _break_equivalence {
951 my( $self, $source, $target ) = @_;
953 # This is the hard one. Need to reconstruct the equivalence groups without
956 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
957 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
958 # If these groups intersect, they are still connected; do nothing.
959 foreach my $el ( keys %tng ) {
960 return if( exists $sng{$el} );
962 # If they don't intersect, then we split the nodes in the graph and in
963 # the hashes. First figure out which group has which name
964 my $oldgroup = $self->equivalence( $source ); # same as $target
965 my $keepsource = $sng{$oldgroup};
966 my $newgroup = $keepsource ? $target : $source;
967 my( $oldmembers, $newmembers );
969 $oldmembers = [ keys %sng ];
970 $newmembers = [ keys %tng ];
972 $oldmembers = [ keys %tng ];
973 $newmembers = [ keys %sng ];
976 # First alter the old group in the hash
977 $self->set_eqreadings( $oldgroup, $oldmembers );
978 foreach my $el ( @$oldmembers ) {
979 $self->set_equivalence( $el, $oldgroup );
982 # then add the new group back to the hash with its new key
983 $self->set_eqreadings( $newgroup, $newmembers );
984 foreach my $el ( @$newmembers ) {
985 $self->set_equivalence( $el, $newgroup );
988 # Now add the new group back to the equivalence graph
989 $self->equivalence_graph->add_vertex( $newgroup );
990 # ...add the appropriate edges to the source group vertext
991 my $c = $self->collation;
992 foreach my $rdg ( @$newmembers ) {
993 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
994 next unless $self->equivalence( $rp );
995 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
997 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
998 next unless $self->equivalence( $rs );
999 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
1003 # ...and figure out which edges on the old group vertex to delete.
1004 my( %old_pred, %old_succ );
1005 foreach my $rdg ( @$oldmembers ) {
1006 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1007 next unless $self->equivalence( $rp );
1008 $old_pred{$self->equivalence( $rp )} = 1;
1010 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1011 next unless $self->equivalence( $rs );
1012 $old_succ{$self->equivalence( $rs )} = 1;
1015 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
1016 unless( $old_pred{$p} ) {
1017 $self->equivalence_graph->delete_edge( $p, $oldgroup );
1020 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
1021 unless( $old_succ{$s} ) {
1022 $self->equivalence_graph->delete_edge( $oldgroup, $s );
1025 # TODO enable this after collation parsing is done
1026 throw( "Graph got disconnected breaking $source / $target equivalence" )
1027 if $self->_is_disconnected && $self->collation->tradition->_initialized;
1030 sub _find_equiv_without {
1031 my( $self, $first, $second ) = @_;
1032 my %found = ( $first => 1 );
1033 my $check = [ $first ];
1037 foreach my $r ( @$check ) {
1038 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1039 next if $r eq $second;
1040 if( $self->get_relationship( $r, $nr )->colocated ) {
1041 push( @$more, $nr ) unless exists $found{$nr};
1051 =head2 rebuild_equivalence
1053 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1054 adds all readings and edges, then makes an equivalence for all relationships.
1058 sub rebuild_equivalence {
1060 my $newgraph = Graph->new();
1061 # Set this as the new equivalence graph
1062 $self->_reset_equivalence( $newgraph );
1063 # Clear out the data hashes
1064 $self->_clear_equivalence;
1065 $self->_clear_eqreadings;
1067 $self->collation->tradition->_init_done(0);
1069 foreach my $r ( $self->collation->readings ) {
1071 $newgraph->add_vertex( $rid );
1072 $self->set_equivalence( $rid, $rid );
1073 $self->set_eqreadings( $rid, [ $rid ] );
1077 foreach my $e ( $self->collation->paths ) {
1078 $self->add_equivalence_edge( @$e );
1081 # Now equate the colocated readings. This does no testing;
1082 # it assumes that all preexisting relationships are valid.
1083 foreach my $rel ( $self->relationships ) {
1084 my $relobj = $self->get_relationship( $rel );
1085 next unless $relobj && $relobj->colocated;
1086 $self->_make_equivalence( @$rel );
1088 $self->collation->tradition->_init_done(1);
1091 =head2 equivalence_ranks
1093 Rank all vertices in the equivalence graph, and return a hash reference with
1094 vertex => rank mapping.
1098 sub equivalence_ranks {
1100 my $eqstart = $self->equivalence( $self->collation->start );
1101 my $eqranks = { $eqstart => 0 };
1102 my $rankeqs = { 0 => [ $eqstart ] };
1103 my @curr_origin = ( $eqstart );
1104 # A little iterative function.
1105 while( @curr_origin ) {
1106 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1108 return( $eqranks, $rankeqs );
1112 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1113 my $graph = $self->equivalence_graph;
1114 # Look at each of the children of @current_nodes. If all the child's
1115 # parents have a rank, assign it the highest rank + 1 and add it to
1116 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1117 # parent gets a rank.
1119 foreach my $c ( @current_nodes ) {
1120 warn "Current reading $c has no rank!"
1121 unless exists $node_ranks->{$c};
1122 foreach my $child ( $graph->successors( $c ) ) {
1123 next if exists $node_ranks->{$child};
1124 my $highest_rank = -1;
1126 foreach my $parent ( $graph->predecessors( $child ) ) {
1127 if( exists $node_ranks->{$parent} ) {
1128 $highest_rank = $node_ranks->{$parent}
1129 if $highest_rank <= $node_ranks->{$parent};
1136 my $c_rank = $highest_rank + 1;
1137 # print STDERR "Assigning rank $c_rank to node $child \n";
1138 $node_ranks->{$child} = $c_rank if $node_ranks;
1139 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1140 push( @next_nodes, $child );
1149 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1151 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1152 $rgraph->setAttribute( 'edgedefault', 'directed' );
1153 $rgraph->setAttribute( 'id', 'relationships', );
1154 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1155 $rgraph->setAttribute( 'parse.edges', 0 );
1156 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1157 $rgraph->setAttribute( 'parse.nodes', 0 );
1158 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1160 # Add the vertices according to their XML IDs
1161 my %rdg_lookup = ( reverse %$node_hash );
1162 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1163 my @nlist = sort keys( %rdg_lookup );
1164 foreach my $n ( @nlist ) {
1165 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1166 $n_el->setAttribute( 'id', $n );
1167 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1169 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1171 # Add the relationship edges, with their object information
1173 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1174 # Add an edge and fill in its relationship info.
1175 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1176 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1177 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1178 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1179 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1181 my $rel_obj = $self->get_relationship( @$e );
1182 foreach my $key ( keys %$edge_keys ) {
1183 my $value = $rel_obj->$key;
1184 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1188 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1196 return $tmp_a <=> $tmp_b;
1199 sub _add_graphml_data {
1200 my( $el, $key, $value ) = @_;
1201 return unless defined $value;
1202 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1203 $data_el->setAttribute( 'key', $key );
1204 $data_el->appendText( $value );
1208 Text::Tradition::Error->throw(
1209 'ident' => 'Relationship error',
1215 __PACKAGE__->meta->make_immutable;