1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
207 sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
217 $options->{'scope'} = 'local' unless $options->{'scope'};
218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
220 my( $is_valid, $reason ) =
221 $self->relationship_valid( $source, $target, $options->{'type'} );
222 unless( $is_valid ) {
223 throw( "Invalid relationship: $reason" );
226 # Try to create the relationship object.
227 $options->{'reading_a'} = $source_rdg->text;
228 $options->{'reading_b'} = $target_rdg->text;
229 $options->{'orig_a'} = $source;
230 $options->{'orig_b'} = $target;
231 if( $options->{'scope'} ne 'local' ) {
232 # Is there a relationship with this a & b already?
233 # Case-insensitive for non-orthographics.
234 my $rdga = $options->{'type'} eq 'orthographic'
235 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
236 my $rdgb = $options->{'type'} eq 'orthographic'
237 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
238 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
239 if( $otherrel && $otherrel->type eq $options->{type}
240 && $otherrel->scope eq $options->{scope} ) {
241 warn "Applying existing scoped relationship";
242 $relationship = $otherrel;
245 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
249 # Find all the pairs for which we need to set the relationship.
250 my @vectors = [ $source, $target ];
251 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
252 push( @vectors, $self->_find_applicable( $relationship ) );
255 # Now set the relationship(s).
257 foreach my $v ( @vectors ) {
258 my $rel = $self->get_relationship( @$v );
259 if( $rel && $rel ne $relationship ) {
260 if( $rel->nonlocal ) {
261 throw( "Found conflicting relationship at @$v" );
262 } elsif( $rel->type ne 'collated' ) {
263 # Replace a collation relationship; leave any other sort in place.
264 warn "Not overriding local relationship set at @$v";
268 $self->_set_relationship( $relationship, @$v );
269 push( @pairs_set, $v );
275 sub _find_applicable {
276 my( $self, $rel ) = @_;
277 my $c = $self->collation;
278 # TODO Someday we might use a case sensitive language.
279 my $lang = $c->tradition->language;
281 my @identical_readings;
282 if( $rel->type eq 'orthographic' ) {
283 @identical_readings = grep { $_->text eq $rel->reading_a }
286 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
289 foreach my $ir ( @identical_readings ) {
291 if( $rel->type eq 'orthographic' ) {
292 @itarget = grep { $_->rank == $ir->rank
293 && $_->text eq $rel->reading_b } $c->readings;
295 @itarget = grep { $_->rank == $ir->rank
296 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
299 # Warn if there is more than one hit with no orth link between them.
300 my $itmain = shift @itarget;
303 map { $all_targets{$_} = 1 } @itarget;
304 map { delete $all_targets{$_} }
305 $self->related_readings( $itmain,
306 sub { $_[0]->type eq 'orthographic' } );
307 warn "More than one unrelated reading with text " . $itmain->text
308 . " at rank " . $ir->rank . "!" if keys %all_targets;
310 push( @vectors, [ $ir->id, $itmain->id ] );
316 =head2 del_relationship( $source, $target )
318 Removes the relationship between the given readings. If the relationship is
319 non-local, removes the relationship everywhere in the graph.
323 sub del_relationship {
324 my( $self, $source, $target ) = @_;
325 my $rel = $self->get_relationship( $source, $target );
326 return () unless $rel; # Nothing to delete; return an empty set.
327 my @vectors = ( [ $source, $target ] );
328 $self->_remove_relationship( $source, $target );
329 if( $rel->nonlocal ) {
330 # Remove the relationship wherever it occurs.
331 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
332 $self->relationships;
333 foreach my $re ( @rel_edges ) {
334 $self->_remove_relationship( @$re );
335 push( @vectors, $re );
341 sub _remove_relationship {
342 my( $self, @vector ) = @_;
343 $self->graph->delete_edge( @vector );
346 =head2 relationship_valid( $source, $target, $type )
348 Checks whether a relationship of type $type may exist between the readings given
349 in $source and $target. Returns a tuple of ( status, message ) where status is
350 a yes/no boolean and, if the answer is no, message gives the reason why.
354 sub relationship_valid {
355 my( $self, $source, $target, $rel ) = @_;
356 my $c = $self->collation;
357 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
358 # Check that the two readings do (for a repetition) or do not (for
359 # a transposition) appear in the same witness.
360 # TODO this might be called before witness paths are set...
362 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
363 foreach my $w ( $c->reading_witnesses( $target ) ) {
364 if( $seen_wits{$w} ) {
365 return ( 0, "Readings both occur in witness $w" )
366 if $rel eq 'transposition';
367 return ( 1, "ok" ) if $rel eq 'repetition';
369 return $rel eq 'transposition' ? ( 1, "ok" )
370 : ( 0, "Readings occur only in distinct witnesses" );
373 # Check that linking the source and target in a relationship won't lead
374 # to a path loop for any witness. If they have the same rank then fine.
376 if $c->reading( $source )->has_rank
377 && $c->reading( $target )->has_rank
378 && $c->reading( $source )->rank == $c->reading( $target )->rank;
380 # Otherwise, first make a lookup table of all the
381 # readings related to either the source or the target.
382 my @proposed_related = ( $source, $target );
383 # Drop the collation links of source and target, unless we want to
384 # add a collation relationship.
385 foreach my $r ( ( $source, $target ) ) {
386 $self->_drop_collations( $r ) unless $rel eq 'collated';
387 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
390 map { $pr_ids{ $_ } = 1 } @proposed_related;
392 # The cumulative predecessors and successors of the proposed-related readings
393 # should not overlap.
396 foreach my $pr ( keys %pr_ids ) {
397 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
398 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
400 foreach my $k ( keys %all_pred ) {
401 return( 0, "Relationship would create witness loop" )
402 if exists $all_succ{$k};
404 foreach my $k ( keys %pr_ids ) {
405 return( 0, "Relationship would create witness loop" )
406 if exists $all_pred{$k} || exists $all_succ{$k};
412 sub _drop_collations {
413 my( $self, $reading ) = @_;
414 foreach my $n ( $self->graph->neighbors( $reading ) ) {
415 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
416 $self->del_relationship( $reading, $n );
421 =head2 related_readings( $reading, $filter )
423 Returns a list of readings that are connected via relationship links to $reading.
424 If $filter is set to a subroutine ref, returns only those related readings where
425 $filter( $relationship ) returns a true value.
429 sub related_readings {
430 my( $self, $reading, $filter ) = @_;
432 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
433 $reading = $reading->id;
439 if( $filter eq 'colocated' ) {
440 $filter = sub { $_[0]->colocated };
442 my %found = ( $reading => 1 );
443 my $check = [ $reading ];
447 foreach my $r ( @$check ) {
448 foreach my $nr ( $self->graph->neighbors( $r ) ) {
449 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
450 push( @$more, $nr ) unless exists $found{$nr};
457 delete $found{$reading};
458 @answer = keys %found;
460 @answer = $self->graph->all_reachable( $reading );
462 if( $return_object ) {
463 my $c = $self->collation;
464 return map { $c->reading( $_ ) } @answer;
470 =head2 merge_readings( $kept, $deleted );
472 Makes a best-effort merge of the relationship links between the given readings, and
473 stops tracking the to-be-deleted reading.
478 my( $self, $kept, $deleted, $combined ) = @_;
479 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
480 # Get the pair of kept / rel
481 my @vector = ( $kept );
482 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
483 next if $vector[0] eq $vector[1]; # Don't add a self loop
485 # If kept changes its text, drop the relationship.
488 # If kept / rel already has a relationship, just keep the old
489 my $rel = $self->get_relationship( @vector );
492 # Otherwise, adopt the relationship that would be deleted.
493 $rel = $self->get_relationship( @$edge );
494 $self->_set_relationship( $rel, @vector );
496 $self->delete_reading( $deleted );
500 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
502 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
503 $rgraph->setAttribute( 'edgedefault', 'directed' );
504 $rgraph->setAttribute( 'id', 'relationships', );
505 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
506 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
507 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
508 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
509 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
511 # Add the vertices according to their XML IDs
512 my %rdg_lookup = ( reverse %$node_hash );
513 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
514 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
515 $n_el->setAttribute( 'id', $n );
516 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
519 # Add the relationship edges, with their object information
521 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
522 # Add an edge and fill in its relationship info.
523 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
524 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
525 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
526 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
528 my $rel_obj = $self->get_relationship( @$e );
529 foreach my $key ( keys %$edge_keys ) {
530 my $value = $rel_obj->$key;
531 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
542 return $tmp_a <=> $tmp_b;
545 sub _add_graphml_data {
546 my( $el, $key, $value ) = @_;
547 return unless defined $value;
548 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
549 $data_el->setAttribute( 'key', $key );
550 $data_el->appendText( $value );
554 Text::Tradition::Error->throw(
555 'ident' => 'Relationship error',
561 __PACKAGE__->meta->make_immutable;