1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
210 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
211 # Test 1: try to equate nodes that are prevented with an intermediate collation
212 ok( $t1, "Parsed test fragment file" );
213 my $c1 = $t1->collation;
214 my $trel = $c1->get_relationship( '9,2', '9,3' );
215 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
216 "Troublesome relationship exists" );
217 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
219 # Try to make the link we want
221 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
222 ok( 1, "Added cross-collation relationship as expected" );
224 ok( 0, "Existing collation blocked equivalence relationship" );
228 $c1->calculate_ranks();
229 ok( 1, "Successfully calculated ranks" );
231 ok( 0, "Collation now has a cycle" );
234 # Test 2: try to equate nodes that are prevented with a real intermediate
236 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
237 my $c2 = $t2->collation;
238 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
239 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
240 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
241 "Created blocking relationship" );
242 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
243 # This time the link ought to fail
245 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
246 ok( 0, "Added cross-equivalent bad relationship" );
248 ok( 1, "Existing equivalence blocked crossing relationship" );
252 $c2->calculate_ranks();
253 ok( 1, "Successfully calculated ranks" );
255 ok( 0, "Collation now has a cycle" );
258 # Test 3: make a straightforward pair of transpositions.
259 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
260 # Test 1: try to equate nodes that are prevented with an intermediate collation
261 my $c3 = $t3->collation;
263 $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
264 ok( 1, "Added straightforward transposition" );
266 ok( 0, "Failed to add normal transposition" );
269 $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
270 ok( 1, "Added straightforward transposition complement" );
272 ok( 0, "Failed to add normal transposition complement" );
275 # Test 4: try to make a transposition that could be a parallel.
277 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
278 ok( 0, "Added bad colocated transposition" );
280 ok( 1, "Prevented bad colocated transposition" );
283 # Test 5: make the parallel, and then make the transposition again.
285 $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
286 ok( 1, "Equated identical readings for transposition" );
288 ok( 0, "Failed to equate identical readings" );
291 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
292 ok( 1, "Added straightforward transposition complement" );
294 ok( 0, "Failed to add normal transposition complement" );
301 sub add_relationship {
302 my( $self, $source, $target, $options ) = @_;
303 my $c = $self->collation;
307 my $droppedcolls = [];
308 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
309 $relationship = $options;
310 $thispaironly = 1; # If existing rel, set only where asked.
313 $options->{'scope'} = 'local' unless $options->{'scope'};
314 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
315 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
317 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
318 $options->{'type'}, $droppedcolls );
319 unless( $is_valid ) {
320 throw( "Invalid relationship: $reason" );
323 # Try to create the relationship object.
324 $options->{'reading_a'} = $c->reading( $source )->text;
325 $options->{'reading_b'} = $c->reading( $target )->text;
326 $options->{'orig_a'} = $source;
327 $options->{'orig_b'} = $target;
328 if( $options->{'scope'} ne 'local' ) {
329 # Is there a relationship with this a & b already?
330 # Case-insensitive for non-orthographics.
331 my $rdga = $options->{'type'} eq 'orthographic'
332 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
333 my $rdgb = $options->{'type'} eq 'orthographic'
334 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
335 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
336 if( $otherrel && $otherrel->type eq $options->{type}
337 && $otherrel->scope eq $options->{scope} ) {
338 warn "Applying existing scoped relationship";
339 $relationship = $otherrel;
342 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
346 # Find all the pairs for which we need to set the relationship.
348 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
349 push( @vectors, $self->_find_applicable( $relationship ) );
352 # Now set the relationship(s).
354 my $rel = $self->get_relationship( $source, $target );
356 if( $rel && $rel ne $relationship ) {
357 if( $rel->nonlocal ) {
358 throw( "Found conflicting relationship at $source - $target" );
359 } elsif( $rel->type ne 'collated' ) {
360 # Replace a collation relationship; leave any other sort in place.
361 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
362 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
363 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
364 warn sprintf( "Not overriding local relationship %s with global %s "
365 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
366 $source, $target, $rel->reading_a, $rel->reading_b );
371 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
372 push( @pairs_set, [ $source, $target ] );
374 # Set any additional relationships that might be in @vectors.
375 foreach my $v ( @vectors ) {
376 next if $v->[0] eq $source && $v->[1] eq $target;
377 next if $v->[1] eq $source && $v->[0] eq $target;
378 my @added = $self->add_relationship( @$v, $relationship );
379 push( @pairs_set, @added );
382 # Finally, restore whatever collations we can, and return.
383 $self->_restore_collations( @$droppedcolls );
387 =head2 del_scoped_relationship( $reading_a, $reading_b )
389 Returns the general (document-level or global) relationship that has been defined
390 between the two reading strings. Returns undef if there is no general relationship.
394 sub del_scoped_relationship {
395 my( $self, $rdga, $rdgb ) = @_;
396 my( $first, $second ) = sort( $rdga, $rdgb );
397 return delete $self->scopedrels->{$first}->{$second};
400 sub _find_applicable {
401 my( $self, $rel ) = @_;
402 my $c = $self->collation;
403 # TODO Someday we might use a case sensitive language.
404 my $lang = $c->tradition->language;
406 my @identical_readings;
407 if( $rel->type eq 'orthographic' ) {
408 @identical_readings = grep { $_->text eq $rel->reading_a }
411 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
414 foreach my $ir ( @identical_readings ) {
416 if( $rel->type eq 'orthographic' ) {
417 @itarget = grep { $_->rank == $ir->rank
418 && $_->text eq $rel->reading_b } $c->readings;
420 @itarget = grep { $_->rank == $ir->rank
421 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
424 # Warn if there is more than one hit with no orth link between them.
425 my $itmain = shift @itarget;
428 map { $all_targets{$_} = 1 } @itarget;
429 map { delete $all_targets{$_} }
430 $self->related_readings( $itmain,
431 sub { $_[0]->type eq 'orthographic' } );
432 warn "More than one unrelated reading with text " . $itmain->text
433 . " at rank " . $ir->rank . "!" if keys %all_targets;
435 push( @vectors, [ $ir->id, $itmain->id ] );
441 =head2 del_relationship( $source, $target )
443 Removes the relationship between the given readings. If the relationship is
444 non-local, removes the relationship everywhere in the graph.
448 sub del_relationship {
449 my( $self, $source, $target ) = @_;
450 my $rel = $self->get_relationship( $source, $target );
451 return () unless $rel; # Nothing to delete; return an empty set.
452 my @vectors = ( [ $source, $target ] );
453 $self->_remove_relationship( $source, $target );
454 if( $rel->nonlocal ) {
455 # Remove the relationship wherever it occurs.
456 # Remove the relationship wherever it occurs.
457 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
458 $self->relationships;
459 foreach my $re ( @rel_edges ) {
460 $self->_remove_relationship( @$re );
461 push( @vectors, $re );
463 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
468 sub _remove_relationship {
469 my( $self, @vector ) = @_;
470 $self->graph->delete_edge( @vector );
473 =head2 relationship_valid( $source, $target, $type )
475 Checks whether a relationship of type $type may exist between the readings given
476 in $source and $target. Returns a tuple of ( status, message ) where status is
477 a yes/no boolean and, if the answer is no, message gives the reason why.
481 sub relationship_valid {
482 my( $self, $source, $target, $rel, $mustdrop ) = @_;
483 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
484 my $c = $self->collation;
485 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
486 # Check that the two readings do (for a repetition) or do not (for
487 # a transposition) appear in the same witness.
488 # TODO this might be called before witness paths are set...
490 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
491 foreach my $w ( $c->reading_witnesses( $target ) ) {
492 if( $seen_wits{$w} ) {
493 return ( 0, "Readings both occur in witness $w" )
494 if $rel eq 'transposition';
495 return ( 1, "ok" ) if $rel eq 'repetition';
498 return ( 0, "Readings occur only in distinct witnesses" )
499 if $rel eq 'repetition';
501 if ( $rel eq 'transposition' ) {
502 # We also need to check both that the readings occur in distinct
503 # witnesses, and that they are not in the same place. That is,
504 # proposing to link them should cause a witness loop.
506 my( $startrank, $endrank );
507 if( $c->end->has_rank ) {
508 my $cpred = $c->common_predecessor( $source, $target );
509 my $csucc = $c->common_successor( $source, $target );
510 $startrank = $cpred->rank;
511 $endrank = $csucc->rank;
513 my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank,
515 if( $eqgraph->has_a_cycle ) {
518 return ( 0, "Readings appear to be colocated, not transposed" );
521 } elsif( $rel ne 'repetition' ) {
522 # Check that linking the source and target in a relationship won't lead
523 # to a path loop for any witness.
524 # First, drop/stash any collations that might interfere
525 my $sourceobj = $c->reading( $source );
526 my $targetobj = $c->reading( $target );
527 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
528 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
529 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
530 push( @$mustdrop, $self->_drop_collations( $source ) );
531 push( @$mustdrop, $self->_drop_collations( $target ) );
534 my( $startrank, $endrank );
535 if( $c->end->has_rank ) {
536 my $cpred = $c->common_predecessor( $source, $target );
537 my $csucc = $c->common_successor( $source, $target );
538 $startrank = $cpred->rank;
539 $endrank = $csucc->rank;
540 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
541 foreach my $rk ( $startrank+1 .. $endrank-1 ) {
542 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
543 $c->readings_at_rank( $rk );
547 my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank,
549 if( $eqgraph->has_a_cycle ) {
550 $self->_restore_collations( @$mustdrop );
551 return( 0, "Relationship would create witness loop" );
557 sub _drop_collations {
558 my( $self, $reading ) = @_;
560 foreach my $n ( $self->graph->neighbors( $reading ) ) {
561 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
562 push( @dropped, [ $reading, $n ] );
563 $self->del_relationship( $reading, $n );
569 sub _restore_collations {
570 my( $self, @vectors ) = @_;
571 foreach my $v ( @vectors ) {
573 $self->add_relationship( @$v, { 'type' => 'collated' } );
575 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
580 =head2 filter_collations()
582 Utility function. Removes any redundant 'collated' relationships from the graph.
583 A collated relationship is redundant if the readings in question would occupy
584 the same rank regardless of the existence of the relationship.
588 sub filter_collations {
590 my $c = $self->collation;
591 foreach my $r ( 1 .. $c->end->rank - 1 ) {
594 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
595 next if $rdg->is_meta;
597 foreach my $pred ( $rdg->predecessors ) {
598 if( $pred->rank == $r - 1 ) {
600 $anchor = $rdg unless( $anchor );
604 push( @need_collations, $rdg ) unless $ip;
605 $c->relations->_drop_collations( "$rdg" );
608 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
609 unless $c->get_relationship( $anchor, $_ ) } @need_collations
610 : warn "No anchor found at $r";
614 =head2 related_readings( $reading, $filter )
616 Returns a list of readings that are connected via relationship links to $reading.
617 If $filter is set to a subroutine ref, returns only those related readings where
618 $filter( $relationship ) returns a true value.
622 sub related_readings {
623 my( $self, $reading, $filter ) = @_;
625 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
626 $reading = $reading->id;
632 if( $filter eq 'colocated' ) {
633 $filter = sub { $_[0]->colocated };
635 my %found = ( $reading => 1 );
636 my $check = [ $reading ];
640 foreach my $r ( @$check ) {
641 foreach my $nr ( $self->graph->neighbors( $r ) ) {
642 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
643 push( @$more, $nr ) unless exists $found{$nr};
650 delete $found{$reading};
651 @answer = keys %found;
653 @answer = $self->graph->all_reachable( $reading );
655 if( $return_object ) {
656 my $c = $self->collation;
657 return map { $c->reading( $_ ) } @answer;
663 =head2 merge_readings( $kept, $deleted );
665 Makes a best-effort merge of the relationship links between the given readings, and
666 stops tracking the to-be-deleted reading.
671 my( $self, $kept, $deleted, $combined ) = @_;
672 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
673 # Get the pair of kept / rel
674 my @vector = ( $kept );
675 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
676 next if $vector[0] eq $vector[1]; # Don't add a self loop
678 # If kept changes its text, drop the relationship.
681 # If kept / rel already has a relationship, just keep the old
682 my $rel = $self->get_relationship( @vector );
685 # Otherwise, adopt the relationship that would be deleted.
686 $rel = $self->get_relationship( @$edge );
687 $self->_set_relationship( $rel, @vector );
689 $self->delete_reading( $deleted );
693 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
695 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
696 $rgraph->setAttribute( 'edgedefault', 'directed' );
697 $rgraph->setAttribute( 'id', 'relationships', );
698 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
699 $rgraph->setAttribute( 'parse.edges', 0 );
700 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
701 $rgraph->setAttribute( 'parse.nodes', 0 );
702 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
704 # Add the vertices according to their XML IDs
705 my %rdg_lookup = ( reverse %$node_hash );
706 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
707 my @nlist = sort keys( %rdg_lookup );
708 foreach my $n ( @nlist ) {
709 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
710 $n_el->setAttribute( 'id', $n );
711 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
713 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
715 # Add the relationship edges, with their object information
717 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
718 # Add an edge and fill in its relationship info.
719 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
720 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
721 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
722 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
723 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
725 my $rel_obj = $self->get_relationship( @$e );
726 foreach my $key ( keys %$edge_keys ) {
727 my $value = $rel_obj->$key;
728 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
732 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
740 return $tmp_a <=> $tmp_b;
743 sub _add_graphml_data {
744 my( $el, $key, $value ) = @_;
745 return unless defined $value;
746 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
747 $data_el->setAttribute( 'key', $key );
748 $data_el->appendText( $value );
752 Text::Tradition::Error->throw(
753 'ident' => 'Relationship error',
759 __PACKAGE__->meta->make_immutable;