1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
210 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
211 # Test 1: try to equate nodes that are prevented with an intermediate collation
212 ok( $t1, "Parsed test fragment file" );
213 my $c1 = $t1->collation;
214 my $trel = $c1->get_relationship( '9,2', '9,3' );
215 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
216 "Troublesome relationship exists" );
217 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
219 # Try to make the link we want
221 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
222 ok( 1, "Added cross-collation relationship as expected" );
224 ok( 0, "Existing collation blocked equivalence relationship" );
228 $c1->calculate_ranks();
229 ok( 1, "Successfully calculated ranks" );
231 ok( 0, "Collation now has a cycle" );
234 # Test 2: try to equate nodes that are prevented with a real intermediate
237 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
238 # Test 1: try to equate nodes that are prevented with an intermediate collation
239 my $c2 = $t2->collation;
240 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
241 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
242 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
243 "Created blocking relationship" );
244 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
245 # This time the link ought to fail
247 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
248 ok( 0, "Existing equivalence blocked crossing relationship" );
250 ok( 1, "Added cross-equivalent bad relationship" );
254 $c2->calculate_ranks();
255 ok( 1, "Successfully calculated ranks" );
257 ok( 0, "Collation now has a cycle" );
264 sub add_relationship {
265 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
269 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
270 $relationship = $options;
271 $thispaironly = 1; # If existing rel, set only where asked.
274 $options->{'scope'} = 'local' unless $options->{'scope'};
275 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
276 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
278 my( $is_valid, $reason ) =
279 $self->relationship_valid( $source, $target, $options->{'type'} );
280 unless( $is_valid ) {
281 throw( "Invalid relationship: $reason" );
284 # Try to create the relationship object.
285 $options->{'reading_a'} = $source_rdg->text;
286 $options->{'reading_b'} = $target_rdg->text;
287 $options->{'orig_a'} = $source;
288 $options->{'orig_b'} = $target;
289 if( $options->{'scope'} ne 'local' ) {
290 # Is there a relationship with this a & b already?
291 # Case-insensitive for non-orthographics.
292 my $rdga = $options->{'type'} eq 'orthographic'
293 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
294 my $rdgb = $options->{'type'} eq 'orthographic'
295 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
296 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
297 if( $otherrel && $otherrel->type eq $options->{type}
298 && $otherrel->scope eq $options->{scope} ) {
299 warn "Applying existing scoped relationship";
300 $relationship = $otherrel;
303 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
307 # Find all the pairs for which we need to set the relationship.
308 my @vectors = [ $source, $target ];
309 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
310 push( @vectors, $self->_find_applicable( $relationship ) );
313 # Now set the relationship(s).
315 foreach my $v ( @vectors ) {
316 my $rel = $self->get_relationship( @$v );
317 if( $rel && $rel ne $relationship ) {
318 if( $rel->nonlocal ) {
319 throw( "Found conflicting relationship at @$v" );
320 } elsif( $rel->type ne 'collated' ) {
321 # Replace a collation relationship; leave any other sort in place.
322 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
323 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
324 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
325 warn sprintf( "Not overriding local relationship %s with global %s "
326 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
327 @$v, $rel->reading_a, $rel->reading_b );
332 map { $self->_drop_collations( $_ ) } @$v;
333 $self->_set_relationship( $relationship, @$v );
334 push( @pairs_set, $v );
340 =head2 del_scoped_relationship( $reading_a, $reading_b )
342 Returns the general (document-level or global) relationship that has been defined
343 between the two reading strings. Returns undef if there is no general relationship.
347 sub del_scoped_relationship {
348 my( $self, $rdga, $rdgb ) = @_;
349 my( $first, $second ) = sort( $rdga, $rdgb );
350 return delete $self->scopedrels->{$first}->{$second};
353 sub _find_applicable {
354 my( $self, $rel ) = @_;
355 my $c = $self->collation;
356 # TODO Someday we might use a case sensitive language.
357 my $lang = $c->tradition->language;
359 my @identical_readings;
360 if( $rel->type eq 'orthographic' ) {
361 @identical_readings = grep { $_->text eq $rel->reading_a }
364 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
367 foreach my $ir ( @identical_readings ) {
369 if( $rel->type eq 'orthographic' ) {
370 @itarget = grep { $_->rank == $ir->rank
371 && $_->text eq $rel->reading_b } $c->readings;
373 @itarget = grep { $_->rank == $ir->rank
374 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
377 # Warn if there is more than one hit with no orth link between them.
378 my $itmain = shift @itarget;
381 map { $all_targets{$_} = 1 } @itarget;
382 map { delete $all_targets{$_} }
383 $self->related_readings( $itmain,
384 sub { $_[0]->type eq 'orthographic' } );
385 warn "More than one unrelated reading with text " . $itmain->text
386 . " at rank " . $ir->rank . "!" if keys %all_targets;
388 push( @vectors, [ $ir->id, $itmain->id ] );
394 =head2 del_relationship( $source, $target )
396 Removes the relationship between the given readings. If the relationship is
397 non-local, removes the relationship everywhere in the graph.
401 sub del_relationship {
402 my( $self, $source, $target ) = @_;
403 my $rel = $self->get_relationship( $source, $target );
404 return () unless $rel; # Nothing to delete; return an empty set.
405 my @vectors = ( [ $source, $target ] );
406 $self->_remove_relationship( $source, $target );
407 if( $rel->nonlocal ) {
408 # Remove the relationship wherever it occurs.
409 # Remove the relationship wherever it occurs.
410 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
411 $self->relationships;
412 foreach my $re ( @rel_edges ) {
413 $self->_remove_relationship( @$re );
414 push( @vectors, $re );
416 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
421 sub _remove_relationship {
422 my( $self, @vector ) = @_;
423 $self->graph->delete_edge( @vector );
426 =head2 relationship_valid( $source, $target, $type )
428 Checks whether a relationship of type $type may exist between the readings given
429 in $source and $target. Returns a tuple of ( status, message ) where status is
430 a yes/no boolean and, if the answer is no, message gives the reason why.
434 sub relationship_valid {
435 my( $self, $source, $target, $rel ) = @_;
436 my $c = $self->collation;
437 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
438 # Check that the two readings do (for a repetition) or do not (for
439 # a transposition) appear in the same witness.
440 # TODO this might be called before witness paths are set...
442 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
443 foreach my $w ( $c->reading_witnesses( $target ) ) {
444 if( $seen_wits{$w} ) {
445 return ( 0, "Readings both occur in witness $w" )
446 if $rel eq 'transposition';
447 return ( 1, "ok" ) if $rel eq 'repetition';
450 return $rel eq 'transposition' ? ( 1, "ok" )
451 : ( 0, "Readings occur only in distinct witnesses" );
453 # Check that linking the source and target in a relationship won't lead
454 # to a path loop for any witness. If they have the same rank then fine.
456 if $c->reading( $source )->has_rank
457 && $c->reading( $target )->has_rank
458 && $c->reading( $source )->rank == $c->reading( $target )->rank;
460 # Otherwise, first make a lookup table of all the
461 # readings related to either the source or the target.
462 my @proposed_related = ( $source, $target );
463 # Drop the collation links of source and target, unless we want to
464 # add a collation relationship.
465 foreach my $r ( ( $source, $target ) ) {
466 $self->_drop_collations( $r ) unless $rel eq 'collated';
467 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
470 map { $pr_ids{ $_ } = 1 } @proposed_related;
472 # The cumulative predecessors and successors of the proposed-related readings
473 # should not overlap.
476 foreach my $pr ( keys %pr_ids ) {
477 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
478 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
480 foreach my $k ( keys %all_pred ) {
481 return( 0, "Relationship would create witness loop" )
482 if exists $all_succ{$k};
484 foreach my $k ( keys %pr_ids ) {
485 return( 0, "Relationship would create witness loop" )
486 if exists $all_pred{$k} || exists $all_succ{$k};
492 sub _drop_collations {
493 my( $self, $reading ) = @_;
494 foreach my $n ( $self->graph->neighbors( $reading ) ) {
495 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
496 $self->del_relationship( $reading, $n );
501 =head2 related_readings( $reading, $filter )
503 Returns a list of readings that are connected via relationship links to $reading.
504 If $filter is set to a subroutine ref, returns only those related readings where
505 $filter( $relationship ) returns a true value.
509 sub related_readings {
510 my( $self, $reading, $filter ) = @_;
512 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
513 $reading = $reading->id;
519 if( $filter eq 'colocated' ) {
520 $filter = sub { $_[0]->colocated };
522 my %found = ( $reading => 1 );
523 my $check = [ $reading ];
527 foreach my $r ( @$check ) {
528 foreach my $nr ( $self->graph->neighbors( $r ) ) {
529 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
530 push( @$more, $nr ) unless exists $found{$nr};
537 delete $found{$reading};
538 @answer = keys %found;
540 @answer = $self->graph->all_reachable( $reading );
542 if( $return_object ) {
543 my $c = $self->collation;
544 return map { $c->reading( $_ ) } @answer;
550 =head2 merge_readings( $kept, $deleted );
552 Makes a best-effort merge of the relationship links between the given readings, and
553 stops tracking the to-be-deleted reading.
558 my( $self, $kept, $deleted, $combined ) = @_;
559 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
560 # Get the pair of kept / rel
561 my @vector = ( $kept );
562 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
563 next if $vector[0] eq $vector[1]; # Don't add a self loop
565 # If kept changes its text, drop the relationship.
568 # If kept / rel already has a relationship, just keep the old
569 my $rel = $self->get_relationship( @vector );
572 # Otherwise, adopt the relationship that would be deleted.
573 $rel = $self->get_relationship( @$edge );
574 $self->_set_relationship( $rel, @vector );
576 $self->delete_reading( $deleted );
580 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
582 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
583 $rgraph->setAttribute( 'edgedefault', 'directed' );
584 $rgraph->setAttribute( 'id', 'relationships', );
585 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
586 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
587 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
588 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
589 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
591 # Add the vertices according to their XML IDs
592 my %rdg_lookup = ( reverse %$node_hash );
593 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
594 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
595 $n_el->setAttribute( 'id', $n );
596 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
599 # Add the relationship edges, with their object information
601 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
602 # Add an edge and fill in its relationship info.
603 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
604 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
605 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
606 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
607 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
609 my $rel_obj = $self->get_relationship( @$e );
610 foreach my $key ( keys %$edge_keys ) {
611 my $value = $rel_obj->$key;
612 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
623 return $tmp_a <=> $tmp_b;
626 sub _add_graphml_data {
627 my( $el, $key, $value ) = @_;
628 return unless defined $value;
629 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
630 $data_el->setAttribute( 'key', $key );
631 $data_el->appendText( $value );
635 Text::Tradition::Error->throw(
636 'ident' => 'Relationship error',
642 __PACKAGE__->meta->make_immutable;