1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
62 =head2 new( collation => $collation );
64 Creates a new relationship store for the given collation.
70 isa => 'Text::Tradition::Collation',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
84 default => sub { Graph->new( undirected => 1 ) },
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
89 delete_relationship => 'delete_edge',
93 around 'delete_relationship' => sub {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
104 return $self->$orig( @vector );
107 =head2 get_relationship
109 Return the relationship object, if any, that exists between two readings.
113 sub get_relationship {
116 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
117 # Dereference the edge arrayref that was passed.
124 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
125 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
127 return $relationship;
130 sub _set_relationship {
131 my( $self, $relationship, @vector ) = @_;
132 $self->graph->add_edge( @vector );
133 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
136 sub _remove_relationship {
137 my( $self, @vector ) = @_;
138 $self->graph->delete_edge( @vector );
143 Create a new relationship with the given options and return it.
144 Warn and return undef if the relationship cannot be created.
149 my( $self, $options ) = @_;
150 # Check to see if a relationship exists between the two given readings
151 my $source = delete $options->{'orig_a'};
152 my $target = delete $options->{'orig_b'};
153 my $rel = $self->get_relationship( $source, $target );
155 if( $rel->type ne $options->{'type'} ) {
156 throw( "Another relationship of type " . $rel->type
157 . " already exists between $source and $target" );
163 # Check to see if a nonlocal relationship is defined for the two readings
164 $rel = $self->scoped_relationship( $options->{'reading_a'},
165 $options->{'reading_b'} );
166 if( $rel && $rel->type eq $options->{'type'} ) {
169 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
171 $rel = Text::Tradition::Collation::Relationship->new( $options );
172 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
177 =head2 add_scoped_relationship( $rel )
179 Keep track of relationships defined between specific readings that are scoped
180 non-locally. Key on whichever reading occurs first alphabetically.
184 sub add_scoped_relationship {
185 my( $self, $rel ) = @_;
186 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
188 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
189 $r->type, $rel->reading_a, $rel->reading_b );
192 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
195 =head2 scoped_relationship( $reading_a, $reading_b )
197 Returns the general (document-level or global) relationship that has been defined
198 between the two reading strings. Returns undef if there is no general relationship.
202 sub scoped_relationship {
203 my( $self, $rdga, $rdgb ) = @_;
204 my( $first, $second ) = sort( $rdga, $rdgb );
205 if( exists $self->scopedrels->{$first}->{$second} ) {
206 return $self->scopedrels->{$first}->{$second};
212 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
214 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
215 for the possible options) between the readings given in $source and $target. Sets
216 up a scoped relationship between $sourcetext and $targettext if the relationship is
219 Returns a status boolean and a list of all reading pairs connected by the call to
224 sub add_relationship {
225 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
228 $options->{'scope'} = 'local' unless $options->{'scope'};
230 my( $is_valid, $reason ) =
231 $self->relationship_valid( $source, $target, $options->{'type'} );
232 unless( $is_valid ) {
233 throw( "Invalid relationship: $reason" );
236 # Try to create the relationship object.
237 $options->{'reading_a'} = $source_rdg->text;
238 $options->{'reading_b'} = $target_rdg->text;
239 $options->{'orig_a'} = $source;
240 $options->{'orig_b'} = $target;
241 my $relationship = $self->create( $options ); # Will throw on error
243 # Find all the pairs for which we need to set the relationship.
244 my @vectors = ( [ $source, $target ] );
245 if( $relationship->colocated && $relationship->nonlocal ) {
246 my $c = $self->collation;
247 # Set the same relationship everywhere we can, throughout the graph.
248 my @identical_readings = grep { $_->text eq $relationship->reading_a }
250 foreach my $ir ( @identical_readings ) {
251 next if $ir->id eq $source;
252 # Check to see if there is a target reading with the same text at
255 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
259 warn "More than one reading with text " . $target_rdg->text
260 . " at rank " . $ir->rank . "!" if @itarget > 1;
261 push( @vectors, [ $ir->id, $itarget[0]->id ] );
266 # Now set the relationship(s).
268 foreach my $v ( @vectors ) {
269 my $rel = $self->get_relationship( @$v );
271 if( $rel->nonlocal ) {
272 throw( "Found conflicting relationship at @$v" );
274 warn "Not overriding local relationship set at @$v";
278 $self->_set_relationship( $relationship, @$v );
279 push( @pairs_set, $v );
285 =head2 del_relationship( $source, $target )
287 Removes the relationship between the given readings. If the relationship is
288 non-local, removes the relationship everywhere in the graph.
292 sub del_relationship {
293 my( $self, $source, $target ) = @_;
294 my $rel = $self->get_relationship( $source, $target );
295 throw( "No relationship defined between $source and $target" ) unless $rel;
296 my @vectors = ( [ $source, $target ] );
297 $self->_remove_relationship( $source, $target );
298 if( $rel->nonlocal ) {
299 # Remove the relationship wherever it occurs.
300 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
301 $self->relationships;
302 foreach my $re ( @rel_edges ) {
303 $self->_remove_relationship( @$re );
304 push( @vectors, $re );
310 =head2 relationship_valid( $source, $target, $type )
312 Checks whether a relationship of type $type may exist between the readings given
313 in $source and $target. Returns a tuple of ( status, message ) where status is
314 a yes/no boolean and, if the answer is no, message gives the reason why.
318 sub relationship_valid {
319 my( $self, $source, $target, $rel ) = @_;
320 my $c = $self->collation;
321 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
322 # Check that the two readings do (for a repetition) or do not (for
323 # a transposition) appear in the same witness.
324 # TODO this might be called before witness paths are set...
326 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
327 foreach my $w ( $c->reading_witnesses( $target ) ) {
328 if( $seen_wits{$w} ) {
329 return ( 0, "Readings both occur in witness $w" )
330 if $rel eq 'transposition';
331 return ( 1, "ok" ) if $rel eq 'repetition';
333 return $rel eq 'transposition' ? ( 1, "ok" )
334 : ( 0, "Readings occur only in distinct witnesses" );
337 # Check that linking the source and target in a relationship won't lead
338 # to a path loop for any witness. If they have the same rank then fine.
340 if $c->reading( $source )->has_rank
341 && $c->reading( $target )->has_rank
342 && $c->reading( $source )->rank == $c->reading( $target )->rank;
344 # Otherwise, first make a lookup table of all the
345 # readings related to either the source or the target.
346 my @proposed_related = ( $source, $target );
347 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
348 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
350 map { $pr_ids{ $_ } = 1 } @proposed_related;
352 # The cumulative predecessors and successors of the proposed-related readings
353 # should not overlap.
356 foreach my $pr ( keys %pr_ids ) {
357 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
358 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
360 foreach my $k ( keys %all_pred ) {
361 return( 0, "Relationship would create witness loop" )
362 if exists $all_succ{$k};
364 foreach my $k ( keys %pr_ids ) {
365 return( 0, "Relationship would create witness loop" )
366 if exists $all_pred{$k} || exists $all_succ{$k};
372 =head2 related_readings( $reading, $filter )
374 Returns a list of readings that are connected via relationship links to $reading.
375 If $filter is set to a subroutine ref, returns only those related readings where
376 $filter( $relationship ) returns a true value.
380 sub related_readings {
381 my( $self, $reading, $filter ) = @_;
383 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
384 $reading = $reading->id;
390 if( $filter eq 'colocated' ) {
391 $filter = sub { $_[0]->colocated };
393 my %found = ( $reading => 1 );
394 my $check = [ $reading ];
398 foreach my $r ( @$check ) {
399 foreach my $nr ( $self->graph->neighbors( $r ) ) {
400 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
401 push( @$more, $nr ) unless exists $found{$nr};
408 delete $found{$reading};
409 @answer = keys %found;
411 @answer = $self->graph->all_reachable( $reading );
413 if( $return_object ) {
414 my $c = $self->collation;
415 return map { $c->reading( $_ ) } @answer;
421 =head2 merge_readings( $kept, $deleted );
423 Makes a best-effort merge of the relationship links between the given readings, and
424 stops tracking the to-be-deleted reading.
429 my( $self, $kept, $deleted, $combined ) = @_;
430 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
431 # Get the pair of kept / rel
432 my @vector = ( $kept );
433 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
434 next if $vector[0] eq $vector[1]; # Don't add a self loop
436 # If kept changes its text, drop the relationship.
439 # If kept / rel already has a relationship, warn and keep the old
440 my $rel = $self->get_relationship( @vector );
442 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
446 # Otherwise, adopt the relationship that would be deleted.
447 $rel = $self->get_relationship( @$edge );
448 $self->_set_relationship( $rel, @vector );
450 $self->delete_reading( $deleted );
454 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
456 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
457 $rgraph->setAttribute( 'edgedefault', 'directed' );
458 $rgraph->setAttribute( 'id', 'relationships', );
459 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
460 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
461 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
462 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
463 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
465 # Add the vertices according to their XML IDs
466 my %rdg_lookup = ( reverse %$node_hash );
467 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
468 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
469 $n_el->setAttribute( 'id', $n );
470 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
473 # Add the relationship edges, with their object information
475 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
476 # Add an edge and fill in its relationship info.
477 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
478 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
479 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
480 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
482 my $rel_obj = $self->get_relationship( @$e );
483 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
484 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
485 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
486 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
487 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
488 $rel_obj->non_independent ) if $rel_obj->nonind_set;
497 return $tmp_a <=> $tmp_b;
500 sub _add_graphml_data {
501 my( $el, $key, $value ) = @_;
502 return unless defined $value;
503 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
504 $data_el->setAttribute( 'key', $key );
505 $data_el->appendText( $value );
509 Text::Tradition::Error->throw(
510 'ident' => 'Relationship error',
516 __PACKAGE__->meta->make_immutable;