1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
62 =head2 new( collation => $collation );
64 Creates a new relationship store for the given collation.
70 isa => 'Text::Tradition::Collation',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
84 default => sub { Graph->new( undirected => 1 ) },
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
92 =head2 get_relationship
94 Return the relationship object, if any, that exists between two readings.
98 sub get_relationship {
101 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102 # Dereference the edge arrayref that was passed.
109 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
112 return $relationship;
115 sub _set_relationship {
116 my( $self, $relationship, @vector ) = @_;
117 $self->graph->add_edge( @vector );
118 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
129 my( $self, $options ) = @_;
130 # Check to see if a relationship exists between the two given readings
131 my $source = delete $options->{'orig_a'};
132 my $target = delete $options->{'orig_b'};
133 my $rel = $self->get_relationship( $source, $target );
135 if( $rel->type eq 'collated' ) {
136 # Always replace a 'collated' relationship with a more descriptive
138 $self->del_relationship( $source, $target );
139 } elsif( $rel->type ne $options->{'type'} ) {
140 throw( "Another relationship of type " . $rel->type
141 . " already exists between $source and $target" );
147 # Check to see if a nonlocal relationship is defined for the two readings
148 $rel = $self->scoped_relationship( $options->{'reading_a'},
149 $options->{'reading_b'} );
150 if( $rel && $rel->type eq $options->{'type'} ) {
153 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
155 $rel = Text::Tradition::Collation::Relationship->new( $options );
156 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
161 =head2 add_scoped_relationship( $rel )
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally. Key on whichever reading occurs first alphabetically.
168 sub add_scoped_relationship {
169 my( $self, $rel ) = @_;
170 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
171 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
172 my $r = $self->scoped_relationship( $rdga, $rdgb );
174 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
175 $r->type, $rdga, $rdgb );
178 my( $first, $second ) = sort ( $rdga, $rdgb );
179 $self->scopedrels->{$first}->{$second} = $rel;
182 =head2 scoped_relationship( $reading_a, $reading_b )
184 Returns the general (document-level or global) relationship that has been defined
185 between the two reading strings. Returns undef if there is no general relationship.
189 sub scoped_relationship {
190 my( $self, $rdga, $rdgb ) = @_;
191 my( $first, $second ) = sort( $rdga, $rdgb );
192 if( exists $self->scopedrels->{$first}->{$second} ) {
193 return $self->scopedrels->{$first}->{$second};
199 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
201 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
202 for the possible options) between the readings given in $source and $target. Sets
203 up a scoped relationship between $sourcetext and $targettext if the relationship is
206 Returns a status boolean and a list of all reading pairs connected by the call to
211 sub add_relationship {
212 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
216 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
217 $relationship = $options;
218 $thispaironly = 1; # If existing rel, set only where asked.
221 $options->{'scope'} = 'local' unless $options->{'scope'};
222 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
224 my( $is_valid, $reason ) =
225 $self->relationship_valid( $source, $target, $options->{'type'} );
226 unless( $is_valid ) {
227 throw( "Invalid relationship: $reason" );
230 # Try to create the relationship object.
231 $options->{'reading_a'} = $source_rdg->text;
232 $options->{'reading_b'} = $target_rdg->text;
233 $options->{'orig_a'} = $source;
234 $options->{'orig_b'} = $target;
235 if( $options->{'scope'} ne 'local' ) {
236 # Is there a relationship with this a & b already?
237 # Case-insensitive for non-orthographics.
238 my $rdga = $options->{'type'} eq 'orthographic'
239 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
240 my $rdgb = $options->{'type'} eq 'orthographic'
241 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
242 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
243 if( $otherrel && $otherrel->type eq $options->{type}
244 && $otherrel->scope eq $options->{scope} ) {
245 warn "Applying existing scoped relationship";
246 $relationship = $otherrel;
249 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
253 # Find all the pairs for which we need to set the relationship.
254 my @vectors = [ $source, $target ];
255 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
256 push( @vectors, $self->_find_applicable( $relationship ) );
258 $DB::single = 1 if grep { $_->[0] eq 'w494' || $_->[1] eq 'w494' } @vectors;
260 # Now set the relationship(s).
262 foreach my $v ( @vectors ) {
263 my $rel = $self->get_relationship( @$v );
264 if( $rel && $rel ne $relationship ) {
265 if( $rel->nonlocal ) {
266 throw( "Found conflicting relationship at @$v" );
268 warn "Not overriding local relationship set at @$v";
272 $self->_set_relationship( $relationship, @$v );
273 push( @pairs_set, $v );
279 sub _find_applicable {
280 my( $self, $rel ) = @_;
281 my $c = $self->collation;
282 # TODO Someday we might use a case sensitive language.
283 my $lang = $c->tradition->language;
285 my @identical_readings;
286 if( $rel->type eq 'orthographic' ) {
287 @identical_readings = grep { $_->text eq $rel->reading_a }
290 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
293 foreach my $ir ( @identical_readings ) {
295 if( $rel->type eq 'orthographic' ) {
296 @itarget = grep { $_->rank == $ir->rank
297 && $_->text eq $rel->reading_b } $c->readings;
299 @itarget = grep { $_->rank == $ir->rank
300 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
303 # Warn if there is more than one hit with no orth link between them.
304 my $itmain = shift @itarget;
307 map { $all_targets{$_} = 1 } @itarget;
308 map { delete $all_targets{$_} }
309 $self->related_readings( $itmain,
310 sub { $_[0]->type eq 'orthographic' } );
311 warn "More than one unrelated reading with text " . $itmain->text
312 . " at rank " . $ir->rank . "!" if keys %all_targets;
314 push( @vectors, [ $ir->id, $itmain->id ] );
320 =head2 del_relationship( $source, $target )
322 Removes the relationship between the given readings. If the relationship is
323 non-local, removes the relationship everywhere in the graph.
327 sub del_relationship {
328 my( $self, $source, $target ) = @_;
329 my $rel = $self->get_relationship( $source, $target );
330 throw( "No relationship defined between $source and $target" ) unless $rel;
331 my @vectors = ( [ $source, $target ] );
332 $self->_remove_relationship( $source, $target );
333 if( $rel->nonlocal ) {
334 # Remove the relationship wherever it occurs.
335 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
336 $self->relationships;
337 foreach my $re ( @rel_edges ) {
338 $self->_remove_relationship( @$re );
339 push( @vectors, $re );
345 sub _remove_relationship {
346 my( $self, @vector ) = @_;
347 $self->graph->delete_edge( @vector );
350 =head2 relationship_valid( $source, $target, $type )
352 Checks whether a relationship of type $type may exist between the readings given
353 in $source and $target. Returns a tuple of ( status, message ) where status is
354 a yes/no boolean and, if the answer is no, message gives the reason why.
358 sub relationship_valid {
359 my( $self, $source, $target, $rel ) = @_;
360 my $c = $self->collation;
361 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
362 # Check that the two readings do (for a repetition) or do not (for
363 # a transposition) appear in the same witness.
364 # TODO this might be called before witness paths are set...
366 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
367 foreach my $w ( $c->reading_witnesses( $target ) ) {
368 if( $seen_wits{$w} ) {
369 return ( 0, "Readings both occur in witness $w" )
370 if $rel eq 'transposition';
371 return ( 1, "ok" ) if $rel eq 'repetition';
373 return $rel eq 'transposition' ? ( 1, "ok" )
374 : ( 0, "Readings occur only in distinct witnesses" );
377 # Check that linking the source and target in a relationship won't lead
378 # to a path loop for any witness. If they have the same rank then fine.
380 if $c->reading( $source )->has_rank
381 && $c->reading( $target )->has_rank
382 && $c->reading( $source )->rank == $c->reading( $target )->rank;
384 # Otherwise, first make a lookup table of all the
385 # readings related to either the source or the target.
386 my @proposed_related = ( $source, $target );
387 # Drop the collation links of source and target, unless we want to
388 # add a collation relationship.
389 foreach my $r ( ( $source, $target ) ) {
390 $self->_drop_collations( $r ) unless $rel eq 'collated';
391 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
394 map { $pr_ids{ $_ } = 1 } @proposed_related;
396 # The cumulative predecessors and successors of the proposed-related readings
397 # should not overlap.
400 foreach my $pr ( keys %pr_ids ) {
401 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
402 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
404 foreach my $k ( keys %all_pred ) {
405 return( 0, "Relationship would create witness loop" )
406 if exists $all_succ{$k};
408 foreach my $k ( keys %pr_ids ) {
409 return( 0, "Relationship would create witness loop" )
410 if exists $all_pred{$k} || exists $all_succ{$k};
416 sub _drop_collations {
417 my( $self, $reading ) = @_;
418 foreach my $n ( $self->graph->neighbors( $reading ) ) {
419 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
420 $self->del_relationship( $reading, $n );
425 =head2 related_readings( $reading, $filter )
427 Returns a list of readings that are connected via relationship links to $reading.
428 If $filter is set to a subroutine ref, returns only those related readings where
429 $filter( $relationship ) returns a true value.
433 sub related_readings {
434 my( $self, $reading, $filter ) = @_;
436 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
437 $reading = $reading->id;
443 if( $filter eq 'colocated' ) {
444 $filter = sub { $_[0]->colocated };
446 my %found = ( $reading => 1 );
447 my $check = [ $reading ];
451 foreach my $r ( @$check ) {
452 foreach my $nr ( $self->graph->neighbors( $r ) ) {
453 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
454 push( @$more, $nr ) unless exists $found{$nr};
461 delete $found{$reading};
462 @answer = keys %found;
464 @answer = $self->graph->all_reachable( $reading );
466 if( $return_object ) {
467 my $c = $self->collation;
468 return map { $c->reading( $_ ) } @answer;
474 =head2 merge_readings( $kept, $deleted );
476 Makes a best-effort merge of the relationship links between the given readings, and
477 stops tracking the to-be-deleted reading.
482 my( $self, $kept, $deleted, $combined ) = @_;
483 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
484 # Get the pair of kept / rel
485 my @vector = ( $kept );
486 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
487 next if $vector[0] eq $vector[1]; # Don't add a self loop
489 # If kept changes its text, drop the relationship.
492 # If kept / rel already has a relationship, just keep the old
493 my $rel = $self->get_relationship( @vector );
496 # Otherwise, adopt the relationship that would be deleted.
497 $rel = $self->get_relationship( @$edge );
498 $self->_set_relationship( $rel, @vector );
500 $self->delete_reading( $deleted );
504 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
506 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
507 $rgraph->setAttribute( 'edgedefault', 'directed' );
508 $rgraph->setAttribute( 'id', 'relationships', );
509 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
510 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
511 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
512 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
513 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
515 # Add the vertices according to their XML IDs
516 my %rdg_lookup = ( reverse %$node_hash );
517 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
518 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
519 $n_el->setAttribute( 'id', $n );
520 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
523 # Add the relationship edges, with their object information
525 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
526 # Add an edge and fill in its relationship info.
527 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
528 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
529 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
530 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
532 my $rel_obj = $self->get_relationship( @$e );
533 foreach my $key ( keys %$edge_keys ) {
534 my $value = $rel_obj->$key;
535 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
546 return $tmp_a <=> $tmp_b;
549 sub _add_graphml_data {
550 my( $el, $key, $value ) = @_;
551 return unless defined $value;
552 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
553 $data_el->setAttribute( 'key', $key );
554 $data_el->appendText( $value );
558 Text::Tradition::Error->throw(
559 'ident' => 'Relationship error',
565 __PACKAGE__->meta->make_immutable;