1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
207 sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
217 $options->{'scope'} = 'local' unless $options->{'scope'};
218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
220 my( $is_valid, $reason ) =
221 $self->relationship_valid( $source, $target, $options->{'type'} );
222 unless( $is_valid ) {
223 throw( "Invalid relationship: $reason" );
226 # Try to create the relationship object.
227 $options->{'reading_a'} = $source_rdg->text;
228 $options->{'reading_b'} = $target_rdg->text;
229 $options->{'orig_a'} = $source;
230 $options->{'orig_b'} = $target;
231 if( $options->{'scope'} ne 'local' ) {
232 # Is there a relationship with this a & b already?
233 # Case-insensitive for non-orthographics.
234 my $rdga = $options->{'type'} eq 'orthographic'
235 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
236 my $rdgb = $options->{'type'} eq 'orthographic'
237 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
238 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
239 if( $otherrel && $otherrel->type eq $options->{type}
240 && $otherrel->scope eq $options->{scope} ) {
241 warn "Applying existing scoped relationship";
242 $relationship = $otherrel;
245 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
249 # Find all the pairs for which we need to set the relationship.
250 my @vectors = [ $source, $target ];
251 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
252 push( @vectors, $self->_find_applicable( $relationship ) );
255 # Now set the relationship(s).
257 foreach my $v ( @vectors ) {
258 my $rel = $self->get_relationship( @$v );
259 if( $rel && $rel ne $relationship ) {
260 if( $rel->nonlocal ) {
261 throw( "Found conflicting relationship at @$v" );
262 } elsif( $rel->type ne 'collated' ) {
263 # Replace a collation relationship; leave any other sort in place.
264 warn "Not overriding local relationship set at @$v";
268 $self->_set_relationship( $relationship, @$v );
269 push( @pairs_set, $v );
275 =head2 del_scoped_relationship( $reading_a, $reading_b )
277 Returns the general (document-level or global) relationship that has been defined
278 between the two reading strings. Returns undef if there is no general relationship.
282 sub del_scoped_relationship {
283 my( $self, $rdga, $rdgb ) = @_;
284 my( $first, $second ) = sort( $rdga, $rdgb );
285 return delete $self->scopedrels->{$first}->{$second};
288 sub _find_applicable {
289 my( $self, $rel ) = @_;
290 my $c = $self->collation;
291 # TODO Someday we might use a case sensitive language.
292 my $lang = $c->tradition->language;
294 my @identical_readings;
295 if( $rel->type eq 'orthographic' ) {
296 @identical_readings = grep { $_->text eq $rel->reading_a }
299 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
302 foreach my $ir ( @identical_readings ) {
304 if( $rel->type eq 'orthographic' ) {
305 @itarget = grep { $_->rank == $ir->rank
306 && $_->text eq $rel->reading_b } $c->readings;
308 @itarget = grep { $_->rank == $ir->rank
309 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
312 # Warn if there is more than one hit with no orth link between them.
313 my $itmain = shift @itarget;
316 map { $all_targets{$_} = 1 } @itarget;
317 map { delete $all_targets{$_} }
318 $self->related_readings( $itmain,
319 sub { $_[0]->type eq 'orthographic' } );
320 warn "More than one unrelated reading with text " . $itmain->text
321 . " at rank " . $ir->rank . "!" if keys %all_targets;
323 push( @vectors, [ $ir->id, $itmain->id ] );
329 =head2 del_relationship( $source, $target )
331 Removes the relationship between the given readings. If the relationship is
332 non-local, removes the relationship everywhere in the graph.
336 sub del_relationship {
337 my( $self, $source, $target ) = @_;
338 my $rel = $self->get_relationship( $source, $target );
339 return () unless $rel; # Nothing to delete; return an empty set.
340 my @vectors = ( [ $source, $target ] );
341 $self->_remove_relationship( $source, $target );
342 if( $rel->nonlocal ) {
343 # Remove the relationship wherever it occurs.
344 # Remove the relationship wherever it occurs.
345 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
346 $self->relationships;
347 foreach my $re ( @rel_edges ) {
348 $self->_remove_relationship( @$re );
349 push( @vectors, $re );
351 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
356 sub _remove_relationship {
357 my( $self, @vector ) = @_;
358 $self->graph->delete_edge( @vector );
361 =head2 relationship_valid( $source, $target, $type )
363 Checks whether a relationship of type $type may exist between the readings given
364 in $source and $target. Returns a tuple of ( status, message ) where status is
365 a yes/no boolean and, if the answer is no, message gives the reason why.
369 sub relationship_valid {
370 my( $self, $source, $target, $rel ) = @_;
371 my $c = $self->collation;
372 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
373 # Check that the two readings do (for a repetition) or do not (for
374 # a transposition) appear in the same witness.
375 # TODO this might be called before witness paths are set...
377 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
378 foreach my $w ( $c->reading_witnesses( $target ) ) {
379 if( $seen_wits{$w} ) {
380 return ( 0, "Readings both occur in witness $w" )
381 if $rel eq 'transposition';
382 return ( 1, "ok" ) if $rel eq 'repetition';
384 return $rel eq 'transposition' ? ( 1, "ok" )
385 : ( 0, "Readings occur only in distinct witnesses" );
388 # Check that linking the source and target in a relationship won't lead
389 # to a path loop for any witness. If they have the same rank then fine.
391 if $c->reading( $source )->has_rank
392 && $c->reading( $target )->has_rank
393 && $c->reading( $source )->rank == $c->reading( $target )->rank;
395 # Otherwise, first make a lookup table of all the
396 # readings related to either the source or the target.
397 my @proposed_related = ( $source, $target );
398 # Drop the collation links of source and target, unless we want to
399 # add a collation relationship.
400 foreach my $r ( ( $source, $target ) ) {
401 $self->_drop_collations( $r ) unless $rel eq 'collated';
402 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
405 map { $pr_ids{ $_ } = 1 } @proposed_related;
407 # The cumulative predecessors and successors of the proposed-related readings
408 # should not overlap.
411 foreach my $pr ( keys %pr_ids ) {
412 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
413 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
415 foreach my $k ( keys %all_pred ) {
416 return( 0, "Relationship would create witness loop" )
417 if exists $all_succ{$k};
419 foreach my $k ( keys %pr_ids ) {
420 return( 0, "Relationship would create witness loop" )
421 if exists $all_pred{$k} || exists $all_succ{$k};
427 sub _drop_collations {
428 my( $self, $reading ) = @_;
429 foreach my $n ( $self->graph->neighbors( $reading ) ) {
430 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
431 $self->del_relationship( $reading, $n );
436 =head2 related_readings( $reading, $filter )
438 Returns a list of readings that are connected via relationship links to $reading.
439 If $filter is set to a subroutine ref, returns only those related readings where
440 $filter( $relationship ) returns a true value.
444 sub related_readings {
445 my( $self, $reading, $filter ) = @_;
447 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
448 $reading = $reading->id;
454 if( $filter eq 'colocated' ) {
455 $filter = sub { $_[0]->colocated };
457 my %found = ( $reading => 1 );
458 my $check = [ $reading ];
462 foreach my $r ( @$check ) {
463 foreach my $nr ( $self->graph->neighbors( $r ) ) {
464 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
465 push( @$more, $nr ) unless exists $found{$nr};
472 delete $found{$reading};
473 @answer = keys %found;
475 @answer = $self->graph->all_reachable( $reading );
477 if( $return_object ) {
478 my $c = $self->collation;
479 return map { $c->reading( $_ ) } @answer;
485 =head2 merge_readings( $kept, $deleted );
487 Makes a best-effort merge of the relationship links between the given readings, and
488 stops tracking the to-be-deleted reading.
493 my( $self, $kept, $deleted, $combined ) = @_;
494 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
495 # Get the pair of kept / rel
496 my @vector = ( $kept );
497 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
498 next if $vector[0] eq $vector[1]; # Don't add a self loop
500 # If kept changes its text, drop the relationship.
503 # If kept / rel already has a relationship, just keep the old
504 my $rel = $self->get_relationship( @vector );
507 # Otherwise, adopt the relationship that would be deleted.
508 $rel = $self->get_relationship( @$edge );
509 $self->_set_relationship( $rel, @vector );
511 $self->delete_reading( $deleted );
515 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
517 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
518 $rgraph->setAttribute( 'edgedefault', 'directed' );
519 $rgraph->setAttribute( 'id', 'relationships', );
520 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
521 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
522 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
523 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
524 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
526 # Add the vertices according to their XML IDs
527 my %rdg_lookup = ( reverse %$node_hash );
528 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
529 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
530 $n_el->setAttribute( 'id', $n );
531 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
534 # Add the relationship edges, with their object information
536 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
537 # Add an edge and fill in its relationship info.
538 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
539 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
540 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
541 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
543 my $rel_obj = $self->get_relationship( @$e );
544 foreach my $key ( keys %$edge_keys ) {
545 my $value = $rel_obj->$key;
546 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
557 return $tmp_a <=> $tmp_b;
560 sub _add_graphml_data {
561 my( $el, $key, $value ) = @_;
562 return unless defined $value;
563 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
564 $data_el->setAttribute( 'key', $key );
565 $data_el->appendText( $value );
569 Text::Tradition::Error->throw(
570 'ident' => 'Relationship error',
576 __PACKAGE__->meta->make_immutable;