1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
210 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
211 # Test 1: try to equate nodes that are prevented with an intermediate collation
212 ok( $t1, "Parsed test fragment file" );
213 my $c1 = $t1->collation;
215 $c1->calculate_ranks();
216 my $trel = $c1->get_relationship( '9,2', '9,3' );
217 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
218 "Troublesome relationship exists" );
219 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
221 # Try to make the link we want
223 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
224 ok( 1, "Added cross-collation relationship as expected" );
226 ok( 0, "Existing collation blocked equivalence relationship" );
230 $c1->calculate_ranks();
231 ok( 1, "Successfully calculated ranks" );
233 ok( 0, "Collation now has a cycle" );
236 # Test 2: try to equate nodes that are prevented with a real intermediate
239 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
240 # Test 1: try to equate nodes that are prevented with an intermediate collation
241 my $c2 = $t2->collation;
243 $c2->calculate_ranks();
244 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
245 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
246 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
247 "Created blocking relationship" );
248 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
249 # This time the link ought to fail
251 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
252 ok( 0, "Added cross-equivalent bad relationship" );
254 ok( 1, "Existing equivalence blocked crossing relationship" );
258 $c2->calculate_ranks();
259 ok( 1, "Successfully calculated ranks" );
261 ok( 0, "Collation now has a cycle" );
268 sub add_relationship {
269 my( $self, $source, $target, $options ) = @_;
270 my $c = $self->collation;
274 my $droppedcolls = [];
275 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
276 $relationship = $options;
277 $thispaironly = 1; # If existing rel, set only where asked.
280 $options->{'scope'} = 'local' unless $options->{'scope'};
281 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
282 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
284 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
285 $options->{'type'}, $droppedcolls );
286 unless( $is_valid ) {
287 throw( "Invalid relationship: $reason" );
290 # Try to create the relationship object.
291 $options->{'reading_a'} = $c->reading( $source )->text;
292 $options->{'reading_b'} = $c->reading( $target )->text;
293 $options->{'orig_a'} = $source;
294 $options->{'orig_b'} = $target;
295 if( $options->{'scope'} ne 'local' ) {
296 # Is there a relationship with this a & b already?
297 # Case-insensitive for non-orthographics.
298 my $rdga = $options->{'type'} eq 'orthographic'
299 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
300 my $rdgb = $options->{'type'} eq 'orthographic'
301 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
302 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
303 if( $otherrel && $otherrel->type eq $options->{type}
304 && $otherrel->scope eq $options->{scope} ) {
305 warn "Applying existing scoped relationship";
306 $relationship = $otherrel;
309 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
313 # Find all the pairs for which we need to set the relationship.
315 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
316 push( @vectors, $self->_find_applicable( $relationship ) );
319 # Now set the relationship(s).
321 my $rel = $self->get_relationship( $source, $target );
322 if( $rel && $rel ne $relationship ) {
323 if( $rel->nonlocal ) {
324 throw( "Found conflicting relationship at $source - $target" );
325 } elsif( $rel->type ne 'collated' ) {
326 # Replace a collation relationship; leave any other sort in place.
327 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
328 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
329 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
330 warn sprintf( "Not overriding local relationship %s with global %s "
331 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
332 $source, $target, $rel->reading_a, $rel->reading_b );
337 $self->_set_relationship( $relationship, $source, $target );
338 push( @pairs_set, [ $source, $target ] );
340 # Set any additional relationships that might be in @vectors.
341 foreach my $v ( @vectors ) {
342 next if $v->[0] eq $source && $v->[1] eq $target;
343 next if $v->[1] eq $source && $v->[0] eq $target;
344 my @added = $self->add_relationship( @$v, $relationship );
345 push( @pairs_set, @added );
348 # Finally, restore whatever collations we can, and return.
349 $self->_restore_collations( @$droppedcolls );
353 =head2 del_scoped_relationship( $reading_a, $reading_b )
355 Returns the general (document-level or global) relationship that has been defined
356 between the two reading strings. Returns undef if there is no general relationship.
360 sub del_scoped_relationship {
361 my( $self, $rdga, $rdgb ) = @_;
362 my( $first, $second ) = sort( $rdga, $rdgb );
363 return delete $self->scopedrels->{$first}->{$second};
366 sub _find_applicable {
367 my( $self, $rel ) = @_;
368 my $c = $self->collation;
369 # TODO Someday we might use a case sensitive language.
370 my $lang = $c->tradition->language;
372 my @identical_readings;
373 if( $rel->type eq 'orthographic' ) {
374 @identical_readings = grep { $_->text eq $rel->reading_a }
377 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
380 foreach my $ir ( @identical_readings ) {
382 if( $rel->type eq 'orthographic' ) {
383 @itarget = grep { $_->rank == $ir->rank
384 && $_->text eq $rel->reading_b } $c->readings;
386 @itarget = grep { $_->rank == $ir->rank
387 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
390 # Warn if there is more than one hit with no orth link between them.
391 my $itmain = shift @itarget;
394 map { $all_targets{$_} = 1 } @itarget;
395 map { delete $all_targets{$_} }
396 $self->related_readings( $itmain,
397 sub { $_[0]->type eq 'orthographic' } );
398 warn "More than one unrelated reading with text " . $itmain->text
399 . " at rank " . $ir->rank . "!" if keys %all_targets;
401 push( @vectors, [ $ir->id, $itmain->id ] );
407 =head2 del_relationship( $source, $target )
409 Removes the relationship between the given readings. If the relationship is
410 non-local, removes the relationship everywhere in the graph.
414 sub del_relationship {
415 my( $self, $source, $target ) = @_;
416 my $rel = $self->get_relationship( $source, $target );
417 return () unless $rel; # Nothing to delete; return an empty set.
418 my @vectors = ( [ $source, $target ] );
419 $self->_remove_relationship( $source, $target );
420 if( $rel->nonlocal ) {
421 # Remove the relationship wherever it occurs.
422 # Remove the relationship wherever it occurs.
423 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
424 $self->relationships;
425 foreach my $re ( @rel_edges ) {
426 $self->_remove_relationship( @$re );
427 push( @vectors, $re );
429 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
434 sub _remove_relationship {
435 my( $self, @vector ) = @_;
436 $self->graph->delete_edge( @vector );
439 =head2 relationship_valid( $source, $target, $type )
441 Checks whether a relationship of type $type may exist between the readings given
442 in $source and $target. Returns a tuple of ( status, message ) where status is
443 a yes/no boolean and, if the answer is no, message gives the reason why.
447 sub relationship_valid {
448 my( $self, $source, $target, $rel, $mustdrop ) = @_;
449 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
450 my $c = $self->collation;
451 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
452 # Check that the two readings do (for a repetition) or do not (for
453 # a transposition) appear in the same witness.
454 # TODO this might be called before witness paths are set...
456 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
457 foreach my $w ( $c->reading_witnesses( $target ) ) {
458 if( $seen_wits{$w} ) {
459 return ( 0, "Readings both occur in witness $w" )
460 if $rel eq 'transposition';
461 return ( 1, "ok" ) if $rel eq 'repetition';
464 return $rel eq 'transposition' ? ( 1, "ok" )
465 : ( 0, "Readings occur only in distinct witnesses" );
467 # Check that linking the source and target in a relationship won't lead
468 # to a path loop for any witness.
469 # First, drop/stash any collations that might interfere
470 my $sourceobj = $c->reading( $source );
471 my $targetobj = $c->reading( $target );
472 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
473 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
474 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
475 push( @$mustdrop, $self->_drop_collations( $source ) );
476 push( @$mustdrop, $self->_drop_collations( $target ) );
479 my( $startrank, $endrank );
480 if( $c->end->has_rank ) {
481 my $cpred = $c->common_predecessor( $source, $target );
482 my $csucc = $c->common_successor( $source, $target );
483 $startrank = $cpred->rank;
484 $endrank = $csucc->rank;
485 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
486 foreach my $rk ( $startrank+1 .. $endrank-1 ) {
487 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
488 $c->readings_at_rank( $rk );
492 my $eqgraph = $c->equivalence_graph( $map, $startrank, $endrank,
494 if( $eqgraph->has_a_cycle ) {
495 $self->_restore_collations( @$mustdrop );
496 return( 0, "Relationship would create witness loop" );
502 sub _drop_collations {
503 my( $self, $reading ) = @_;
505 foreach my $n ( $self->graph->neighbors( $reading ) ) {
506 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
507 push( @dropped, [ $reading, $n ] );
508 $self->del_relationship( $reading, $n );
514 sub _restore_collations {
515 my( $self, @vectors ) = @_;
516 foreach my $v ( @vectors ) {
518 $self->add_relationship( @$v, { 'type' => 'collated' } );
520 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
525 =head2 related_readings( $reading, $filter )
527 Returns a list of readings that are connected via relationship links to $reading.
528 If $filter is set to a subroutine ref, returns only those related readings where
529 $filter( $relationship ) returns a true value.
533 sub related_readings {
534 my( $self, $reading, $filter ) = @_;
536 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
537 $reading = $reading->id;
543 if( $filter eq 'colocated' ) {
544 $filter = sub { $_[0]->colocated };
546 my %found = ( $reading => 1 );
547 my $check = [ $reading ];
551 foreach my $r ( @$check ) {
552 foreach my $nr ( $self->graph->neighbors( $r ) ) {
553 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
554 push( @$more, $nr ) unless exists $found{$nr};
561 delete $found{$reading};
562 @answer = keys %found;
564 @answer = $self->graph->all_reachable( $reading );
566 if( $return_object ) {
567 my $c = $self->collation;
568 return map { $c->reading( $_ ) } @answer;
574 =head2 merge_readings( $kept, $deleted );
576 Makes a best-effort merge of the relationship links between the given readings, and
577 stops tracking the to-be-deleted reading.
582 my( $self, $kept, $deleted, $combined ) = @_;
583 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
584 # Get the pair of kept / rel
585 my @vector = ( $kept );
586 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
587 next if $vector[0] eq $vector[1]; # Don't add a self loop
589 # If kept changes its text, drop the relationship.
592 # If kept / rel already has a relationship, just keep the old
593 my $rel = $self->get_relationship( @vector );
596 # Otherwise, adopt the relationship that would be deleted.
597 $rel = $self->get_relationship( @$edge );
598 $self->_set_relationship( $rel, @vector );
600 $self->delete_reading( $deleted );
604 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
606 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
607 $rgraph->setAttribute( 'edgedefault', 'directed' );
608 $rgraph->setAttribute( 'id', 'relationships', );
609 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
610 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
611 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
612 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
613 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
615 # Add the vertices according to their XML IDs
616 my %rdg_lookup = ( reverse %$node_hash );
617 my @nlist = sort keys( %rdg_lookup );
618 foreach my $n ( @nlist ) {
619 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
620 $n_el->setAttribute( 'id', $n );
621 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
624 # Add the relationship edges, with their object information
626 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
627 # Add an edge and fill in its relationship info.
628 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
629 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
630 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
631 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
632 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
634 my $rel_obj = $self->get_relationship( @$e );
635 foreach my $key ( keys %$edge_keys ) {
636 my $value = $rel_obj->$key;
637 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
648 return $tmp_a <=> $tmp_b;
651 sub _add_graphml_data {
652 my( $el, $key, $value ) = @_;
653 return unless defined $value;
654 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
655 $data_el->setAttribute( 'key', $key );
656 $data_el->appendText( $value );
660 Text::Tradition::Error->throw(
661 'ident' => 'Relationship error',
667 __PACKAGE__->meta->make_immutable;