1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
62 =head2 new( collation => $collation );
64 Creates a new relationship store for the given collation.
70 isa => 'Text::Tradition::Collation',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
84 default => sub { Graph->new( undirected => 1 ) },
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
92 =head2 get_relationship
94 Return the relationship object, if any, that exists between two readings.
98 sub get_relationship {
101 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102 # Dereference the edge arrayref that was passed.
109 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
112 return $relationship;
115 sub _set_relationship {
116 my( $self, $relationship, @vector ) = @_;
117 $self->graph->add_edge( @vector );
118 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
129 my( $self, $options ) = @_;
130 # Check to see if a relationship exists between the two given readings
131 my $source = delete $options->{'orig_a'};
132 my $target = delete $options->{'orig_b'};
133 my $rel = $self->get_relationship( $source, $target );
135 if( $rel->type eq 'collated' ) {
136 # Always replace a 'collated' relationship with a more descriptive
138 $self->del_relationship( $source, $target );
139 } elsif( $rel->type ne $options->{'type'} ) {
140 throw( "Another relationship of type " . $rel->type
141 . " already exists between $source and $target" );
147 # Check to see if a nonlocal relationship is defined for the two readings
148 $rel = $self->scoped_relationship( $options->{'reading_a'},
149 $options->{'reading_b'} );
150 if( $rel && $rel->type eq $options->{'type'} ) {
153 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
155 $rel = Text::Tradition::Collation::Relationship->new( $options );
156 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
161 =head2 add_scoped_relationship( $rel )
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally. Key on whichever reading occurs first alphabetically.
168 sub add_scoped_relationship {
169 my( $self, $rel ) = @_;
170 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
172 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173 $r->type, $rel->reading_a, $rel->reading_b );
176 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
179 =head2 scoped_relationship( $reading_a, $reading_b )
181 Returns the general (document-level or global) relationship that has been defined
182 between the two reading strings. Returns undef if there is no general relationship.
186 sub scoped_relationship {
187 my( $self, $rdga, $rdgb ) = @_;
188 my( $first, $second ) = sort( $rdga, $rdgb );
189 if( exists $self->scopedrels->{$first}->{$second} ) {
190 return $self->scopedrels->{$first}->{$second};
196 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
198 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
199 for the possible options) between the readings given in $source and $target. Sets
200 up a scoped relationship between $sourcetext and $targettext if the relationship is
203 Returns a status boolean and a list of all reading pairs connected by the call to
208 sub add_relationship {
209 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
213 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214 $relationship = $options;
215 $thispaironly = 1; # If existing rel, set only where asked.
218 $options->{'scope'} = 'local' unless $options->{'scope'};
219 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
221 my( $is_valid, $reason ) =
222 $self->relationship_valid( $source, $target, $options->{'type'} );
223 unless( $is_valid ) {
224 throw( "Invalid relationship: $reason" );
227 # Try to create the relationship object.
228 $options->{'reading_a'} = $source_rdg->text;
229 $options->{'reading_b'} = $target_rdg->text;
230 $options->{'orig_a'} = $source;
231 $options->{'orig_b'} = $target;
232 if( $options->{'scope'} ne 'local' ) {
233 # Is there a relationship with this a & b already?
234 my $otherrel = $self->scoped_relationship( $options->{reading_a},
235 $options->{reading_b} );
236 if( $otherrel && $otherrel->type eq $options->{type}
237 && $otherrel->scope eq $options->{scope} ) {
238 warn "Applying existing scoped relationship";
239 $relationship = $otherrel;
242 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
246 # Find all the pairs for which we need to set the relationship.
247 my @vectors = [ $source, $target ];
248 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
249 push( @vectors, $self->_find_applicable( $relationship ) );
252 # Now set the relationship(s).
254 foreach my $v ( @vectors ) {
255 my $rel = $self->get_relationship( @$v );
256 if( $rel && $rel ne $relationship ) {
257 if( $rel->nonlocal ) {
258 throw( "Found conflicting relationship at @$v" );
260 warn "Not overriding local relationship set at @$v";
264 $self->_set_relationship( $relationship, @$v );
265 push( @pairs_set, $v );
271 sub _find_applicable {
272 my( $self, $rel ) = @_;
273 my $c = $self->collation;
274 # TODO Someday we might use a case sensitive language.
275 my $lang = $c->tradition->language;
277 my @identical_readings;
278 if( $rel->type eq 'orthographic' ) {
279 @identical_readings = grep { $_->text eq $rel->reading_a }
282 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
285 foreach my $ir ( @identical_readings ) {
287 if( $rel->type eq 'orthographic' ) {
288 @itarget = grep { $_->rank == $ir->rank
289 && $_->text eq $rel->reading_b } $c->readings;
291 @itarget = grep { $_->rank == $ir->rank
292 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
295 # Warn if there is more than one hit with no orth link between them.
296 my $itmain = shift @itarget;
299 map { $all_targets{$_} = 1 } @itarget;
300 map { delete $all_targets{$_} }
301 $self->related_readings( $itmain,
302 sub { $_[0]->type eq 'orthographic' } );
303 warn "More than one unrelated reading with text " . $itmain->text
304 . " at rank " . $ir->rank . "!" if keys %all_targets;
306 push( @vectors, [ $ir->id, $itmain->id ] );
312 =head2 del_relationship( $source, $target )
314 Removes the relationship between the given readings. If the relationship is
315 non-local, removes the relationship everywhere in the graph.
319 sub del_relationship {
320 my( $self, $source, $target ) = @_;
321 my $rel = $self->get_relationship( $source, $target );
322 throw( "No relationship defined between $source and $target" ) unless $rel;
323 my @vectors = ( [ $source, $target ] );
324 $self->_remove_relationship( $source, $target );
325 if( $rel->nonlocal ) {
326 # Remove the relationship wherever it occurs.
327 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
328 $self->relationships;
329 foreach my $re ( @rel_edges ) {
330 $self->_remove_relationship( @$re );
331 push( @vectors, $re );
337 sub _remove_relationship {
338 my( $self, @vector ) = @_;
339 $self->graph->delete_edge( @vector );
342 =head2 relationship_valid( $source, $target, $type )
344 Checks whether a relationship of type $type may exist between the readings given
345 in $source and $target. Returns a tuple of ( status, message ) where status is
346 a yes/no boolean and, if the answer is no, message gives the reason why.
350 sub relationship_valid {
351 my( $self, $source, $target, $rel ) = @_;
352 my $c = $self->collation;
353 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
354 # Check that the two readings do (for a repetition) or do not (for
355 # a transposition) appear in the same witness.
356 # TODO this might be called before witness paths are set...
358 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
359 foreach my $w ( $c->reading_witnesses( $target ) ) {
360 if( $seen_wits{$w} ) {
361 return ( 0, "Readings both occur in witness $w" )
362 if $rel eq 'transposition';
363 return ( 1, "ok" ) if $rel eq 'repetition';
365 return $rel eq 'transposition' ? ( 1, "ok" )
366 : ( 0, "Readings occur only in distinct witnesses" );
369 # Check that linking the source and target in a relationship won't lead
370 # to a path loop for any witness. If they have the same rank then fine.
372 if $c->reading( $source )->has_rank
373 && $c->reading( $target )->has_rank
374 && $c->reading( $source )->rank == $c->reading( $target )->rank;
376 # Otherwise, first make a lookup table of all the
377 # readings related to either the source or the target.
378 my @proposed_related = ( $source, $target );
379 # Drop the collation links of source and target, unless we want to
380 # add a collation relationship.
381 foreach my $r ( ( $source, $target ) ) {
382 $self->_drop_collations( $r ) unless $rel eq 'collated';
383 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
386 map { $pr_ids{ $_ } = 1 } @proposed_related;
388 # The cumulative predecessors and successors of the proposed-related readings
389 # should not overlap.
392 foreach my $pr ( keys %pr_ids ) {
393 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
394 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
396 foreach my $k ( keys %all_pred ) {
397 return( 0, "Relationship would create witness loop" )
398 if exists $all_succ{$k};
400 foreach my $k ( keys %pr_ids ) {
401 return( 0, "Relationship would create witness loop" )
402 if exists $all_pred{$k} || exists $all_succ{$k};
408 sub _drop_collations {
409 my( $self, $reading ) = @_;
410 foreach my $n ( $self->graph->neighbors( $reading ) ) {
411 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
412 $self->del_relationship( $reading, $n );
417 =head2 related_readings( $reading, $filter )
419 Returns a list of readings that are connected via relationship links to $reading.
420 If $filter is set to a subroutine ref, returns only those related readings where
421 $filter( $relationship ) returns a true value.
425 sub related_readings {
426 my( $self, $reading, $filter ) = @_;
428 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
429 $reading = $reading->id;
435 if( $filter eq 'colocated' ) {
436 $filter = sub { $_[0]->colocated };
438 my %found = ( $reading => 1 );
439 my $check = [ $reading ];
443 foreach my $r ( @$check ) {
444 foreach my $nr ( $self->graph->neighbors( $r ) ) {
445 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
446 push( @$more, $nr ) unless exists $found{$nr};
453 delete $found{$reading};
454 @answer = keys %found;
456 @answer = $self->graph->all_reachable( $reading );
458 if( $return_object ) {
459 my $c = $self->collation;
460 return map { $c->reading( $_ ) } @answer;
466 =head2 merge_readings( $kept, $deleted );
468 Makes a best-effort merge of the relationship links between the given readings, and
469 stops tracking the to-be-deleted reading.
474 my( $self, $kept, $deleted, $combined ) = @_;
475 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
476 # Get the pair of kept / rel
477 my @vector = ( $kept );
478 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
479 next if $vector[0] eq $vector[1]; # Don't add a self loop
481 # If kept changes its text, drop the relationship.
484 # If kept / rel already has a relationship, warn and keep the old
485 my $rel = $self->get_relationship( @vector );
487 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
491 # Otherwise, adopt the relationship that would be deleted.
492 $rel = $self->get_relationship( @$edge );
493 $self->_set_relationship( $rel, @vector );
495 $self->delete_reading( $deleted );
499 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
501 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
502 $rgraph->setAttribute( 'edgedefault', 'directed' );
503 $rgraph->setAttribute( 'id', 'relationships', );
504 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
505 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
506 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
507 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
508 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
510 # Add the vertices according to their XML IDs
511 my %rdg_lookup = ( reverse %$node_hash );
512 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
513 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
514 $n_el->setAttribute( 'id', $n );
515 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
518 # Add the relationship edges, with their object information
520 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
521 # Add an edge and fill in its relationship info.
522 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
523 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
524 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
525 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
527 my $rel_obj = $self->get_relationship( @$e );
528 foreach my $key ( keys %$edge_keys ) {
529 my $value = $rel_obj->$key;
530 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
541 return $tmp_a <=> $tmp_b;
544 sub _add_graphml_data {
545 my( $el, $key, $value ) = @_;
546 return unless defined $value;
547 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
548 $data_el->setAttribute( 'key', $key );
549 $data_el->appendText( $value );
553 Text::Tradition::Error->throw(
554 'ident' => 'Relationship error',
560 __PACKAGE__->meta->make_immutable;