try not to let collations interfere with relationship mapping
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
44my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
49@v2 = $c->del_relationship( 'n8', 'n13' );
50is( scalar @v2, 2, "Deleted second global relationship" );
51try {
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54} catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56}
57
3ae5e2ad 58=end testing
59
22222af9 60=head1 METHODS
61
62=head2 new( collation => $collation );
63
64Creates a new relationship store for the given collation.
65
66=cut
67
68has 'collation' => (
69 is => 'ro',
70 isa => 'Text::Tradition::Collation',
71 required => 1,
72 weak_ref => 1,
73 );
74
75has 'scopedrels' => (
76 is => 'ro',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
79 );
80
81has 'graph' => (
82 is => 'ro',
83 isa => 'Graph',
84 default => sub { Graph->new( undirected => 1 ) },
85 handles => {
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
89 },
90 );
91
3ae5e2ad 92=head2 get_relationship
93
94Return the relationship object, if any, that exists between two readings.
95
96=cut
97
98sub get_relationship {
4633f9e4 99 my $self = shift;
100 my @vector;
101 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102 # Dereference the edge arrayref that was passed.
103 my $edge = shift;
104 @vector = @$edge;
105 } else {
106 @vector = @_;
107 }
3ae5e2ad 108 my $relationship;
109 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 111 }
3ae5e2ad 112 return $relationship;
113}
114
115sub _set_relationship {
116 my( $self, $relationship, @vector ) = @_;
117 $self->graph->add_edge( @vector );
118 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119}
a1615ee4 120
22222af9 121=head2 create
122
123Create a new relationship with the given options and return it.
124Warn and return undef if the relationship cannot be created.
125
126=cut
127
128sub create {
129 my( $self, $options ) = @_;
130 # Check to see if a relationship exists between the two given readings
131 my $source = delete $options->{'orig_a'};
132 my $target = delete $options->{'orig_b'};
3ae5e2ad 133 my $rel = $self->get_relationship( $source, $target );
134 if( $rel ) {
3d14b48e 135 if( $rel->type eq 'collated' ) {
136 # Always replace a 'collated' relationship with a more descriptive
137 # one, if asked.
138 $self->del_relationship( $source, $target );
139 } elsif( $rel->type ne $options->{'type'} ) {
63778331 140 throw( "Another relationship of type " . $rel->type
141 . " already exists between $source and $target" );
22222af9 142 } else {
143 return $rel;
144 }
145 }
146
147 # Check to see if a nonlocal relationship is defined for the two readings
148 $rel = $self->scoped_relationship( $options->{'reading_a'},
149 $options->{'reading_b'} );
150 if( $rel && $rel->type eq $options->{'type'} ) {
151 return $rel;
152 } elsif( $rel ) {
63778331 153 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 154 } else {
155 $rel = Text::Tradition::Collation::Relationship->new( $options );
156 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 return $rel;
158 }
159}
160
161=head2 add_scoped_relationship( $rel )
162
163Keep track of relationships defined between specific readings that are scoped
164non-locally. Key on whichever reading occurs first alphabetically.
165
166=cut
167
168sub add_scoped_relationship {
169 my( $self, $rel ) = @_;
170 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
171 if( $r ) {
172 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173 $r->type, $rel->reading_a, $rel->reading_b );
174 return;
175 }
176 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
177}
178
179=head2 scoped_relationship( $reading_a, $reading_b )
180
181Returns the general (document-level or global) relationship that has been defined
182between the two reading strings. Returns undef if there is no general relationship.
183
184=cut
185
186sub scoped_relationship {
187 my( $self, $rdga, $rdgb ) = @_;
188 my( $first, $second ) = sort( $rdga, $rdgb );
189 if( exists $self->scopedrels->{$first}->{$second} ) {
190 return $self->scopedrels->{$first}->{$second};
191 } else {
192 return undef;
193 }
194}
195
196=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197
198Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
199for the possible options) between the readings given in $source and $target. Sets
200up a scoped relationship between $sourcetext and $targettext if the relationship is
201scoped non-locally.
202
203Returns a status boolean and a list of all reading pairs connected by the call to
204add_relationship.
205
206=cut
207
208sub add_relationship {
209 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
210
ca6e6095 211 my $relationship;
212 my $thispaironly;
213 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214 $relationship = $options;
215 $thispaironly = 1; # If existing rel, set only where asked.
216 } else {
217 # Check the options
218 $options->{'scope'} = 'local' unless $options->{'scope'};
219
220 my( $is_valid, $reason ) =
221 $self->relationship_valid( $source, $target, $options->{'type'} );
222 unless( $is_valid ) {
223 throw( "Invalid relationship: $reason" );
224 }
225
226 # Try to create the relationship object.
227 $options->{'reading_a'} = $source_rdg->text;
228 $options->{'reading_b'} = $target_rdg->text;
229 $options->{'orig_a'} = $source;
230 $options->{'orig_b'} = $target;
0ac5e750 231 if( $options->{'scope'} ne 'local' ) {
232 # Is there a relationship with this a & b already?
233 my $otherrel = $self->scoped_relationship( $options->{reading_a},
234 $options->{reading_b} );
235 if( $otherrel && $otherrel->type eq $options->{type}
236 && $otherrel->scope eq $options->{scope} ) {
237 warn "Applying existing scoped relationship";
238 $relationship = $otherrel;
239 }
240 }
241 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 242 }
ca6e6095 243
22222af9 244
245 # Find all the pairs for which we need to set the relationship.
246 my @vectors = ( [ $source, $target ] );
ca6e6095 247 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
22222af9 248 my $c = $self->collation;
249 # Set the same relationship everywhere we can, throughout the graph.
250 my @identical_readings = grep { $_->text eq $relationship->reading_a }
251 $c->readings;
252 foreach my $ir ( @identical_readings ) {
cf6c01be 253 next if $ir->id eq $source;
22222af9 254 # Check to see if there is a target reading with the same text at
255 # the same rank.
256 my @itarget = grep
257 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
258 $c->readings;
259 if( @itarget ) {
260 # We found a hit.
261 warn "More than one reading with text " . $target_rdg->text
262 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 263 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 264 }
265 }
266 }
267
268 # Now set the relationship(s).
269 my @pairs_set;
270 foreach my $v ( @vectors ) {
3ae5e2ad 271 my $rel = $self->get_relationship( @$v );
ca6e6095 272 if( $rel && $rel ne $relationship ) {
63778331 273 if( $rel->nonlocal ) {
274 throw( "Found conflicting relationship at @$v" );
275 } else {
276 warn "Not overriding local relationship set at @$v";
277 }
3ae5e2ad 278 next;
22222af9 279 }
3ae5e2ad 280 $self->_set_relationship( $relationship, @$v );
22222af9 281 push( @pairs_set, $v );
282 }
283
63778331 284 return @pairs_set;
22222af9 285}
286
ee801e17 287=head2 del_relationship( $source, $target )
288
289Removes the relationship between the given readings. If the relationship is
290non-local, removes the relationship everywhere in the graph.
291
292=cut
293
294sub del_relationship {
295 my( $self, $source, $target ) = @_;
296 my $rel = $self->get_relationship( $source, $target );
297 throw( "No relationship defined between $source and $target" ) unless $rel;
298 my @vectors = ( [ $source, $target ] );
299 $self->_remove_relationship( $source, $target );
300 if( $rel->nonlocal ) {
301 # Remove the relationship wherever it occurs.
302 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
303 $self->relationships;
304 foreach my $re ( @rel_edges ) {
305 $self->_remove_relationship( @$re );
306 push( @vectors, $re );
307 }
308 }
309 return @vectors;
310}
311
ca6e6095 312sub _remove_relationship {
313 my( $self, @vector ) = @_;
314 $self->graph->delete_edge( @vector );
315}
316
22222af9 317=head2 relationship_valid( $source, $target, $type )
318
319Checks whether a relationship of type $type may exist between the readings given
320in $source and $target. Returns a tuple of ( status, message ) where status is
321a yes/no boolean and, if the answer is no, message gives the reason why.
322
323=cut
324
325sub relationship_valid {
326 my( $self, $source, $target, $rel ) = @_;
327 my $c = $self->collation;
328 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
329 # Check that the two readings do (for a repetition) or do not (for
330 # a transposition) appear in the same witness.
32e95735 331 # TODO this might be called before witness paths are set...
22222af9 332 my %seen_wits;
333 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
334 foreach my $w ( $c->reading_witnesses( $target ) ) {
335 if( $seen_wits{$w} ) {
336 return ( 0, "Readings both occur in witness $w" )
337 if $rel eq 'transposition';
338 return ( 1, "ok" ) if $rel eq 'repetition';
339 }
340 return $rel eq 'transposition' ? ( 1, "ok" )
341 : ( 0, "Readings occur only in distinct witnesses" );
342 }
343 } else {
344 # Check that linking the source and target in a relationship won't lead
a1615ee4 345 # to a path loop for any witness. If they have the same rank then fine.
346 return( 1, "ok" )
84d4ca78 347 if $c->reading( $source )->has_rank
348 && $c->reading( $target )->has_rank
349 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 350
351 # Otherwise, first make a lookup table of all the
22222af9 352 # readings related to either the source or the target.
353 my @proposed_related = ( $source, $target );
778251a6 354 # Drop the collation links of source and target, unless we want to
355 # add a collation relationship.
356 foreach my $r ( ( $source, $target ) ) {
357 $self->_drop_collations( $r ) unless $rel eq 'collated';
358 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
359 }
22222af9 360 my %pr_ids;
361 map { $pr_ids{ $_ } = 1 } @proposed_related;
362
a1615ee4 363 # The cumulative predecessors and successors of the proposed-related readings
364 # should not overlap.
365 my %all_pred;
366 my %all_succ;
22222af9 367 foreach my $pr ( keys %pr_ids ) {
a1615ee4 368 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
369 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
370 }
371 foreach my $k ( keys %all_pred ) {
372 return( 0, "Relationship would create witness loop" )
373 if exists $all_succ{$k};
374 }
375 foreach my $k ( keys %pr_ids ) {
376 return( 0, "Relationship would create witness loop" )
377 if exists $all_pred{$k} || exists $all_succ{$k};
378 }
22222af9 379 return ( 1, "ok" );
380 }
381}
382
778251a6 383sub _drop_collations {
384 my( $self, $reading ) = @_;
385 foreach my $n ( $self->graph->neighbors( $reading ) ) {
386 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
387 $self->del_relationship( $reading, $n );
388 }
389 }
390}
391
7f52eac8 392=head2 related_readings( $reading, $filter )
22222af9 393
394Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 395If $filter is set to a subroutine ref, returns only those related readings where
396$filter( $relationship ) returns a true value.
22222af9 397
398=cut
399
400sub related_readings {
7f52eac8 401 my( $self, $reading, $filter ) = @_;
22222af9 402 my $return_object;
403 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
404 $reading = $reading->id;
405 $return_object = 1;
406 }
c84275ff 407 my @answer;
7f52eac8 408 if( $filter ) {
409 # Backwards compat
410 if( $filter eq 'colocated' ) {
411 $filter = sub { $_[0]->colocated };
412 }
c84275ff 413 my %found = ( $reading => 1 );
414 my $check = [ $reading ];
415 my $iter = 0;
416 while( @$check ) {
c84275ff 417 my $more = [];
418 foreach my $r ( @$check ) {
419 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 420 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 421 push( @$more, $nr ) unless exists $found{$nr};
422 $found{$nr} = 1;
423 }
424 }
425 }
426 $check = $more;
22222af9 427 }
7f52eac8 428 delete $found{$reading};
c84275ff 429 @answer = keys %found;
430 } else {
431 @answer = $self->graph->all_reachable( $reading );
22222af9 432 }
433 if( $return_object ) {
434 my $c = $self->collation;
c84275ff 435 return map { $c->reading( $_ ) } @answer;
22222af9 436 } else {
c84275ff 437 return @answer;
22222af9 438 }
439}
440
441=head2 merge_readings( $kept, $deleted );
442
443Makes a best-effort merge of the relationship links between the given readings, and
444stops tracking the to-be-deleted reading.
445
446=cut
447
448sub merge_readings {
449 my( $self, $kept, $deleted, $combined ) = @_;
450 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
451 # Get the pair of kept / rel
452 my @vector = ( $kept );
453 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
454 next if $vector[0] eq $vector[1]; # Don't add a self loop
455
456 # If kept changes its text, drop the relationship.
457 next if $combined;
458
459 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 460 my $rel = $self->get_relationship( @vector );
461 if( $rel ) {
22222af9 462 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
463 next;
464 }
465
466 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 467 $rel = $self->get_relationship( @$edge );
468 $self->_set_relationship( $rel, @vector );
22222af9 469 }
470 $self->delete_reading( $deleted );
471}
472
027d819c 473sub _as_graphml {
2626f709 474 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 475
476 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
477 $rgraph->setAttribute( 'edgedefault', 'directed' );
478 $rgraph->setAttribute( 'id', 'relationships', );
479 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
480 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
481 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
482 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
483 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
484
485 # Add the vertices according to their XML IDs
2626f709 486 my %rdg_lookup = ( reverse %$node_hash );
487 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 488 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
489 $n_el->setAttribute( 'id', $n );
2626f709 490 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 491 }
492
493 # Add the relationship edges, with their object information
494 my $edge_ctr = 0;
495 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
496 # Add an edge and fill in its relationship info.
497 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
498 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
499 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
500 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
501
3ae5e2ad 502 my $rel_obj = $self->get_relationship( @$e );
c84275ff 503 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
504 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
3d14b48e 505 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
c84275ff 506 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
507 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
508 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
509 $rel_obj->non_independent ) if $rel_obj->nonind_set;
510 }
511}
512
513sub _by_xmlid {
2626f709 514 my $tmp_a = $a;
515 my $tmp_b = $b;
516 $tmp_a =~ s/\D//g;
517 $tmp_b =~ s/\D//g;
518 return $tmp_a <=> $tmp_b;
c84275ff 519}
520
521sub _add_graphml_data {
522 my( $el, $key, $value ) = @_;
523 return unless defined $value;
524 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
525 $data_el->setAttribute( 'key', $key );
526 $data_el->appendText( $value );
83d5ac3a 527}
528
63778331 529sub throw {
530 Text::Tradition::Error->throw(
531 'ident' => 'Relationship error',
532 'message' => $_[0],
533 );
534}
535
22222af9 536no Moose;
537__PACKAGE__->meta->make_immutable;
538
5391;