1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
210 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
211 # Test 1: try to equate nodes that are prevented with an intermediate collation
212 ok( $t1, "Parsed test fragment file" );
213 my $c1 = $t1->collation;
215 $c1->calculate_ranks();
216 my $trel = $c1->get_relationship( '9,2', '9,3' );
217 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
218 "Troublesome relationship exists" );
219 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
221 # Try to make the link we want
223 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
224 ok( 1, "Added cross-collation relationship as expected" );
226 ok( 0, "Existing collation blocked equivalence relationship" );
230 $c1->calculate_ranks();
231 ok( 1, "Successfully calculated ranks" );
233 ok( 0, "Collation now has a cycle" );
236 # Test 2: try to equate nodes that are prevented with a real intermediate
239 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
240 # Test 1: try to equate nodes that are prevented with an intermediate collation
241 my $c2 = $t2->collation;
243 $c2->calculate_ranks();
244 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
245 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
246 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
247 "Created blocking relationship" );
248 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
249 # This time the link ought to fail
251 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
252 ok( 0, "Added cross-equivalent bad relationship" );
254 ok( 1, "Existing equivalence blocked crossing relationship" );
258 $c2->calculate_ranks();
259 ok( 1, "Successfully calculated ranks" );
261 ok( 0, "Collation now has a cycle" );
268 sub add_relationship {
269 my( $self, $source, $target, $options ) = @_;
270 my $c = $self->collation;
274 my $droppedcolls = [];
275 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
276 $relationship = $options;
277 $thispaironly = 1; # If existing rel, set only where asked.
280 $options->{'scope'} = 'local' unless $options->{'scope'};
281 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
282 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
284 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
285 $options->{'type'}, $droppedcolls );
286 unless( $is_valid ) {
287 throw( "Invalid relationship: $reason" );
290 # Try to create the relationship object.
291 $options->{'reading_a'} = $c->reading( $source )->text;
292 $options->{'reading_b'} = $c->reading( $target )->text;
293 $options->{'orig_a'} = $source;
294 $options->{'orig_b'} = $target;
295 if( $options->{'scope'} ne 'local' ) {
296 # Is there a relationship with this a & b already?
297 # Case-insensitive for non-orthographics.
298 my $rdga = $options->{'type'} eq 'orthographic'
299 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
300 my $rdgb = $options->{'type'} eq 'orthographic'
301 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
302 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
303 if( $otherrel && $otherrel->type eq $options->{type}
304 && $otherrel->scope eq $options->{scope} ) {
305 warn "Applying existing scoped relationship";
306 $relationship = $otherrel;
309 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
313 # Find all the pairs for which we need to set the relationship.
315 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
316 push( @vectors, $self->_find_applicable( $relationship ) );
319 # Now set the relationship(s).
321 my $rel = $self->get_relationship( $source, $target );
322 if( $rel && $rel ne $relationship ) {
323 if( $rel->nonlocal ) {
324 throw( "Found conflicting relationship at $source - $target" );
325 } elsif( $rel->type ne 'collated' ) {
326 # Replace a collation relationship; leave any other sort in place.
327 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
328 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
329 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
330 warn sprintf( "Not overriding local relationship %s with global %s "
331 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
332 $source, $target, $rel->reading_a, $rel->reading_b );
337 $self->_set_relationship( $relationship, $source, $target );
338 push( @pairs_set, [ $source, $target ] );
340 # Set any additional relationships that might be in @vectors.
341 foreach my $v ( @vectors ) {
342 next if $v->[0] eq $source && $v->[1] eq $target;
343 next if $v->[1] eq $source && $v->[0] eq $target;
344 my @added = $self->add_relationship( @$v, $relationship );
345 push( @pairs_set, @added );
348 # Finally, restore whatever collations we can, and return.
349 $self->_restore_collations( @$droppedcolls );
353 =head2 del_scoped_relationship( $reading_a, $reading_b )
355 Returns the general (document-level or global) relationship that has been defined
356 between the two reading strings. Returns undef if there is no general relationship.
360 sub del_scoped_relationship {
361 my( $self, $rdga, $rdgb ) = @_;
362 my( $first, $second ) = sort( $rdga, $rdgb );
363 return delete $self->scopedrels->{$first}->{$second};
366 sub _find_applicable {
367 my( $self, $rel ) = @_;
368 my $c = $self->collation;
369 # TODO Someday we might use a case sensitive language.
370 my $lang = $c->tradition->language;
372 my @identical_readings;
373 if( $rel->type eq 'orthographic' ) {
374 @identical_readings = grep { $_->text eq $rel->reading_a }
377 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
380 foreach my $ir ( @identical_readings ) {
382 if( $rel->type eq 'orthographic' ) {
383 @itarget = grep { $_->rank == $ir->rank
384 && $_->text eq $rel->reading_b } $c->readings;
386 @itarget = grep { $_->rank == $ir->rank
387 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
390 # Warn if there is more than one hit with no orth link between them.
391 my $itmain = shift @itarget;
394 map { $all_targets{$_} = 1 } @itarget;
395 map { delete $all_targets{$_} }
396 $self->related_readings( $itmain,
397 sub { $_[0]->type eq 'orthographic' } );
398 warn "More than one unrelated reading with text " . $itmain->text
399 . " at rank " . $ir->rank . "!" if keys %all_targets;
401 push( @vectors, [ $ir->id, $itmain->id ] );
407 =head2 del_relationship( $source, $target )
409 Removes the relationship between the given readings. If the relationship is
410 non-local, removes the relationship everywhere in the graph.
414 sub del_relationship {
415 my( $self, $source, $target ) = @_;
416 my $rel = $self->get_relationship( $source, $target );
417 return () unless $rel; # Nothing to delete; return an empty set.
418 my @vectors = ( [ $source, $target ] );
419 $self->_remove_relationship( $source, $target );
420 if( $rel->nonlocal ) {
421 # Remove the relationship wherever it occurs.
422 # Remove the relationship wherever it occurs.
423 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
424 $self->relationships;
425 foreach my $re ( @rel_edges ) {
426 $self->_remove_relationship( @$re );
427 push( @vectors, $re );
429 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
434 sub _remove_relationship {
435 my( $self, @vector ) = @_;
436 $self->graph->delete_edge( @vector );
439 =head2 relationship_valid( $source, $target, $type )
441 Checks whether a relationship of type $type may exist between the readings given
442 in $source and $target. Returns a tuple of ( status, message ) where status is
443 a yes/no boolean and, if the answer is no, message gives the reason why.
447 sub relationship_valid {
448 my( $self, $source, $target, $rel, $mustdrop ) = @_;
449 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
450 my $c = $self->collation;
451 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
452 # Check that the two readings do (for a repetition) or do not (for
453 # a transposition) appear in the same witness.
454 # TODO this might be called before witness paths are set...
456 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
457 foreach my $w ( $c->reading_witnesses( $target ) ) {
458 if( $seen_wits{$w} ) {
459 return ( 0, "Readings both occur in witness $w" )
460 if $rel eq 'transposition';
461 return ( 1, "ok" ) if $rel eq 'repetition';
464 return ( 0, "Readings occur only in distinct witnesses" )
465 if $rel eq 'repetition';
467 if ( $rel eq 'transposition' ) {
468 # We also need to check both that the readings occur in distinct
469 # witnesses, and that they are not in the same place. That is,
470 # proposing to link them should cause a witness loop.
472 my( $startrank, $endrank );
473 if( $c->end->has_rank ) {
474 my $cpred = $c->common_predecessor( $source, $target );
475 my $csucc = $c->common_successor( $source, $target );
476 $startrank = $cpred->rank;
477 $endrank = $csucc->rank;
479 my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank,
481 if( $eqgraph->has_a_cycle ) {
484 return ( 0, "Readings appear to be colocated, not transposed" );
487 } elsif( $rel ne 'repetition' ) {
488 # Check that linking the source and target in a relationship won't lead
489 # to a path loop for any witness.
490 # First, drop/stash any collations that might interfere
491 my $sourceobj = $c->reading( $source );
492 my $targetobj = $c->reading( $target );
493 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
494 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
495 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
496 push( @$mustdrop, $self->_drop_collations( $source ) );
497 push( @$mustdrop, $self->_drop_collations( $target ) );
500 my( $startrank, $endrank );
501 if( $c->end->has_rank ) {
502 my $cpred = $c->common_predecessor( $source, $target );
503 my $csucc = $c->common_successor( $source, $target );
504 $startrank = $cpred->rank;
505 $endrank = $csucc->rank;
506 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
507 foreach my $rk ( $startrank+1 .. $endrank-1 ) {
508 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
509 $c->readings_at_rank( $rk );
513 my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank,
515 if( $eqgraph->has_a_cycle ) {
516 $self->_restore_collations( @$mustdrop );
517 return( 0, "Relationship would create witness loop" );
523 sub _drop_collations {
524 my( $self, $reading ) = @_;
526 foreach my $n ( $self->graph->neighbors( $reading ) ) {
527 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
528 push( @dropped, [ $reading, $n ] );
529 $self->del_relationship( $reading, $n );
535 sub _restore_collations {
536 my( $self, @vectors ) = @_;
537 foreach my $v ( @vectors ) {
539 $self->add_relationship( @$v, { 'type' => 'collated' } );
541 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
546 =head2 related_readings( $reading, $filter )
548 Returns a list of readings that are connected via relationship links to $reading.
549 If $filter is set to a subroutine ref, returns only those related readings where
550 $filter( $relationship ) returns a true value.
554 sub related_readings {
555 my( $self, $reading, $filter ) = @_;
557 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
558 $reading = $reading->id;
564 if( $filter eq 'colocated' ) {
565 $filter = sub { $_[0]->colocated };
567 my %found = ( $reading => 1 );
568 my $check = [ $reading ];
572 foreach my $r ( @$check ) {
573 foreach my $nr ( $self->graph->neighbors( $r ) ) {
574 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
575 push( @$more, $nr ) unless exists $found{$nr};
582 delete $found{$reading};
583 @answer = keys %found;
585 @answer = $self->graph->all_reachable( $reading );
587 if( $return_object ) {
588 my $c = $self->collation;
589 return map { $c->reading( $_ ) } @answer;
595 =head2 merge_readings( $kept, $deleted );
597 Makes a best-effort merge of the relationship links between the given readings, and
598 stops tracking the to-be-deleted reading.
603 my( $self, $kept, $deleted, $combined ) = @_;
604 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
605 # Get the pair of kept / rel
606 my @vector = ( $kept );
607 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
608 next if $vector[0] eq $vector[1]; # Don't add a self loop
610 # If kept changes its text, drop the relationship.
613 # If kept / rel already has a relationship, just keep the old
614 my $rel = $self->get_relationship( @vector );
617 # Otherwise, adopt the relationship that would be deleted.
618 $rel = $self->get_relationship( @$edge );
619 $self->_set_relationship( $rel, @vector );
621 $self->delete_reading( $deleted );
625 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
627 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
628 $rgraph->setAttribute( 'edgedefault', 'directed' );
629 $rgraph->setAttribute( 'id', 'relationships', );
630 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
631 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
632 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
633 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
634 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
636 # Add the vertices according to their XML IDs
637 my %rdg_lookup = ( reverse %$node_hash );
638 my @nlist = sort keys( %rdg_lookup );
639 foreach my $n ( @nlist ) {
640 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
641 $n_el->setAttribute( 'id', $n );
642 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
645 # Add the relationship edges, with their object information
647 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
648 # Add an edge and fill in its relationship info.
649 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
650 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
651 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
652 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
653 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
655 my $rel_obj = $self->get_relationship( @$e );
656 foreach my $key ( keys %$edge_keys ) {
657 my $value = $rel_obj->$key;
658 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
669 return $tmp_a <=> $tmp_b;
672 sub _add_graphml_data {
673 my( $el, $key, $value ) = @_;
674 return unless defined $value;
675 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
676 $data_el->setAttribute( 'key', $key );
677 $data_el->appendText( $value );
681 Text::Tradition::Error->throw(
682 'ident' => 'Relationship error',
688 __PACKAGE__->meta->make_immutable;