abandon 'collated' relationship removal
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
44my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
49@v2 = $c->del_relationship( 'n8', 'n13' );
50is( scalar @v2, 2, "Deleted second global relationship" );
51try {
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54} catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56}
57
3ae5e2ad 58=end testing
59
22222af9 60=head1 METHODS
61
62=head2 new( collation => $collation );
63
64Creates a new relationship store for the given collation.
65
66=cut
67
68has 'collation' => (
69 is => 'ro',
70 isa => 'Text::Tradition::Collation',
71 required => 1,
72 weak_ref => 1,
73 );
74
75has 'scopedrels' => (
76 is => 'ro',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
79 );
80
81has 'graph' => (
82 is => 'ro',
83 isa => 'Graph',
84 default => sub { Graph->new( undirected => 1 ) },
85 handles => {
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
4633f9e4 89 delete_relationship => 'delete_edge',
22222af9 90 },
91 );
92
4633f9e4 93around 'delete_relationship' => sub {
94 my $orig = shift;
95 my $self = shift;
96 my @vector;
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
99 my $edge = shift;
100 @vector = @$edge;
101 } else {
102 @vector = @_;
103 }
104 return $self->$orig( @vector );
105};
106
3ae5e2ad 107=head2 get_relationship
108
109Return the relationship object, if any, that exists between two readings.
110
111=cut
112
113sub get_relationship {
4633f9e4 114 my $self = shift;
115 my @vector;
116 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
117 # Dereference the edge arrayref that was passed.
118 my $edge = shift;
119 @vector = @$edge;
120 } else {
121 @vector = @_;
122 }
3ae5e2ad 123 my $relationship;
124 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
125 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
126 }
127 return $relationship;
128}
129
130sub _set_relationship {
131 my( $self, $relationship, @vector ) = @_;
132 $self->graph->add_edge( @vector );
133 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
134}
a1615ee4 135
136sub _remove_relationship {
137 my( $self, @vector ) = @_;
138 $self->graph->delete_edge( @vector );
139}
3ae5e2ad 140
22222af9 141=head2 create
142
143Create a new relationship with the given options and return it.
144Warn and return undef if the relationship cannot be created.
145
146=cut
147
148sub create {
149 my( $self, $options ) = @_;
150 # Check to see if a relationship exists between the two given readings
151 my $source = delete $options->{'orig_a'};
152 my $target = delete $options->{'orig_b'};
3ae5e2ad 153 my $rel = $self->get_relationship( $source, $target );
154 if( $rel ) {
3d14b48e 155 if( $rel->type eq 'collated' ) {
156 # Always replace a 'collated' relationship with a more descriptive
157 # one, if asked.
158 $self->del_relationship( $source, $target );
159 } elsif( $rel->type ne $options->{'type'} ) {
63778331 160 throw( "Another relationship of type " . $rel->type
161 . " already exists between $source and $target" );
22222af9 162 } else {
163 return $rel;
164 }
165 }
166
167 # Check to see if a nonlocal relationship is defined for the two readings
168 $rel = $self->scoped_relationship( $options->{'reading_a'},
169 $options->{'reading_b'} );
170 if( $rel && $rel->type eq $options->{'type'} ) {
171 return $rel;
172 } elsif( $rel ) {
63778331 173 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 174 } else {
175 $rel = Text::Tradition::Collation::Relationship->new( $options );
176 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
177 return $rel;
178 }
179}
180
181=head2 add_scoped_relationship( $rel )
182
183Keep track of relationships defined between specific readings that are scoped
184non-locally. Key on whichever reading occurs first alphabetically.
185
186=cut
187
188sub add_scoped_relationship {
189 my( $self, $rel ) = @_;
190 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
191 if( $r ) {
192 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
193 $r->type, $rel->reading_a, $rel->reading_b );
194 return;
195 }
196 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
197}
198
199=head2 scoped_relationship( $reading_a, $reading_b )
200
201Returns the general (document-level or global) relationship that has been defined
202between the two reading strings. Returns undef if there is no general relationship.
203
204=cut
205
206sub scoped_relationship {
207 my( $self, $rdga, $rdgb ) = @_;
208 my( $first, $second ) = sort( $rdga, $rdgb );
209 if( exists $self->scopedrels->{$first}->{$second} ) {
210 return $self->scopedrels->{$first}->{$second};
211 } else {
212 return undef;
213 }
214}
215
216=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
217
218Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
219for the possible options) between the readings given in $source and $target. Sets
220up a scoped relationship between $sourcetext and $targettext if the relationship is
221scoped non-locally.
222
223Returns a status boolean and a list of all reading pairs connected by the call to
224add_relationship.
225
226=cut
227
228sub add_relationship {
229 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
230
231 # Check the options
232 $options->{'scope'} = 'local' unless $options->{'scope'};
233
234 my( $is_valid, $reason ) =
235 $self->relationship_valid( $source, $target, $options->{'type'} );
236 unless( $is_valid ) {
63778331 237 throw( "Invalid relationship: $reason" );
22222af9 238 }
239
240 # Try to create the relationship object.
241 $options->{'reading_a'} = $source_rdg->text;
242 $options->{'reading_b'} = $target_rdg->text;
243 $options->{'orig_a'} = $source;
244 $options->{'orig_b'} = $target;
63778331 245 my $relationship = $self->create( $options ); # Will throw on error
22222af9 246
247 # Find all the pairs for which we need to set the relationship.
248 my @vectors = ( [ $source, $target ] );
249 if( $relationship->colocated && $relationship->nonlocal ) {
250 my $c = $self->collation;
251 # Set the same relationship everywhere we can, throughout the graph.
252 my @identical_readings = grep { $_->text eq $relationship->reading_a }
253 $c->readings;
254 foreach my $ir ( @identical_readings ) {
cf6c01be 255 next if $ir->id eq $source;
22222af9 256 # Check to see if there is a target reading with the same text at
257 # the same rank.
258 my @itarget = grep
259 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
260 $c->readings;
261 if( @itarget ) {
262 # We found a hit.
263 warn "More than one reading with text " . $target_rdg->text
264 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 265 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 266 }
267 }
268 }
269
270 # Now set the relationship(s).
271 my @pairs_set;
272 foreach my $v ( @vectors ) {
3ae5e2ad 273 my $rel = $self->get_relationship( @$v );
274 if( $rel ) {
63778331 275 if( $rel->nonlocal ) {
276 throw( "Found conflicting relationship at @$v" );
277 } else {
278 warn "Not overriding local relationship set at @$v";
279 }
3ae5e2ad 280 next;
22222af9 281 }
3ae5e2ad 282 $self->_set_relationship( $relationship, @$v );
22222af9 283 push( @pairs_set, $v );
284 }
285
63778331 286 return @pairs_set;
22222af9 287}
288
ee801e17 289=head2 del_relationship( $source, $target )
290
291Removes the relationship between the given readings. If the relationship is
292non-local, removes the relationship everywhere in the graph.
293
294=cut
295
296sub del_relationship {
297 my( $self, $source, $target ) = @_;
298 my $rel = $self->get_relationship( $source, $target );
299 throw( "No relationship defined between $source and $target" ) unless $rel;
300 my @vectors = ( [ $source, $target ] );
301 $self->_remove_relationship( $source, $target );
302 if( $rel->nonlocal ) {
303 # Remove the relationship wherever it occurs.
304 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
305 $self->relationships;
306 foreach my $re ( @rel_edges ) {
307 $self->_remove_relationship( @$re );
308 push( @vectors, $re );
309 }
310 }
311 return @vectors;
312}
313
22222af9 314=head2 relationship_valid( $source, $target, $type )
315
316Checks whether a relationship of type $type may exist between the readings given
317in $source and $target. Returns a tuple of ( status, message ) where status is
318a yes/no boolean and, if the answer is no, message gives the reason why.
319
320=cut
321
322sub relationship_valid {
323 my( $self, $source, $target, $rel ) = @_;
324 my $c = $self->collation;
325 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
326 # Check that the two readings do (for a repetition) or do not (for
327 # a transposition) appear in the same witness.
32e95735 328 # TODO this might be called before witness paths are set...
22222af9 329 my %seen_wits;
330 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
331 foreach my $w ( $c->reading_witnesses( $target ) ) {
332 if( $seen_wits{$w} ) {
333 return ( 0, "Readings both occur in witness $w" )
334 if $rel eq 'transposition';
335 return ( 1, "ok" ) if $rel eq 'repetition';
336 }
337 return $rel eq 'transposition' ? ( 1, "ok" )
338 : ( 0, "Readings occur only in distinct witnesses" );
339 }
340 } else {
341 # Check that linking the source and target in a relationship won't lead
a1615ee4 342 # to a path loop for any witness. If they have the same rank then fine.
343 return( 1, "ok" )
84d4ca78 344 if $c->reading( $source )->has_rank
345 && $c->reading( $target )->has_rank
346 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 347
348 # Otherwise, first make a lookup table of all the
22222af9 349 # readings related to either the source or the target.
350 my @proposed_related = ( $source, $target );
351 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
352 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
353 my %pr_ids;
354 map { $pr_ids{ $_ } = 1 } @proposed_related;
355
a1615ee4 356 # The cumulative predecessors and successors of the proposed-related readings
357 # should not overlap.
358 my %all_pred;
359 my %all_succ;
22222af9 360 foreach my $pr ( keys %pr_ids ) {
a1615ee4 361 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
362 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
363 }
364 foreach my $k ( keys %all_pred ) {
365 return( 0, "Relationship would create witness loop" )
366 if exists $all_succ{$k};
367 }
368 foreach my $k ( keys %pr_ids ) {
369 return( 0, "Relationship would create witness loop" )
370 if exists $all_pred{$k} || exists $all_succ{$k};
371 }
22222af9 372 return ( 1, "ok" );
373 }
374}
375
7f52eac8 376=head2 related_readings( $reading, $filter )
22222af9 377
378Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 379If $filter is set to a subroutine ref, returns only those related readings where
380$filter( $relationship ) returns a true value.
22222af9 381
382=cut
383
384sub related_readings {
7f52eac8 385 my( $self, $reading, $filter ) = @_;
22222af9 386 my $return_object;
387 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
388 $reading = $reading->id;
389 $return_object = 1;
390 }
c84275ff 391 my @answer;
7f52eac8 392 if( $filter ) {
393 # Backwards compat
394 if( $filter eq 'colocated' ) {
395 $filter = sub { $_[0]->colocated };
396 }
c84275ff 397 my %found = ( $reading => 1 );
398 my $check = [ $reading ];
399 my $iter = 0;
400 while( @$check ) {
c84275ff 401 my $more = [];
402 foreach my $r ( @$check ) {
403 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 404 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 405 push( @$more, $nr ) unless exists $found{$nr};
406 $found{$nr} = 1;
407 }
408 }
409 }
410 $check = $more;
22222af9 411 }
7f52eac8 412 delete $found{$reading};
c84275ff 413 @answer = keys %found;
414 } else {
415 @answer = $self->graph->all_reachable( $reading );
22222af9 416 }
417 if( $return_object ) {
418 my $c = $self->collation;
c84275ff 419 return map { $c->reading( $_ ) } @answer;
22222af9 420 } else {
c84275ff 421 return @answer;
22222af9 422 }
423}
424
425=head2 merge_readings( $kept, $deleted );
426
427Makes a best-effort merge of the relationship links between the given readings, and
428stops tracking the to-be-deleted reading.
429
430=cut
431
432sub merge_readings {
433 my( $self, $kept, $deleted, $combined ) = @_;
434 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
435 # Get the pair of kept / rel
436 my @vector = ( $kept );
437 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
438 next if $vector[0] eq $vector[1]; # Don't add a self loop
439
440 # If kept changes its text, drop the relationship.
441 next if $combined;
442
443 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 444 my $rel = $self->get_relationship( @vector );
445 if( $rel ) {
22222af9 446 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
447 next;
448 }
449
450 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 451 $rel = $self->get_relationship( @$edge );
452 $self->_set_relationship( $rel, @vector );
22222af9 453 }
454 $self->delete_reading( $deleted );
455}
456
027d819c 457sub _as_graphml {
2626f709 458 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 459
460 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
461 $rgraph->setAttribute( 'edgedefault', 'directed' );
462 $rgraph->setAttribute( 'id', 'relationships', );
463 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
464 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
465 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
466 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
467 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
468
469 # Add the vertices according to their XML IDs
2626f709 470 my %rdg_lookup = ( reverse %$node_hash );
471 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 472 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
473 $n_el->setAttribute( 'id', $n );
2626f709 474 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 475 }
476
477 # Add the relationship edges, with their object information
478 my $edge_ctr = 0;
479 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
480 # Add an edge and fill in its relationship info.
481 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
482 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
483 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
484 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
485
3ae5e2ad 486 my $rel_obj = $self->get_relationship( @$e );
c84275ff 487 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
488 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
3d14b48e 489 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
c84275ff 490 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
491 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
492 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
493 $rel_obj->non_independent ) if $rel_obj->nonind_set;
494 }
495}
496
497sub _by_xmlid {
2626f709 498 my $tmp_a = $a;
499 my $tmp_b = $b;
500 $tmp_a =~ s/\D//g;
501 $tmp_b =~ s/\D//g;
502 return $tmp_a <=> $tmp_b;
c84275ff 503}
504
505sub _add_graphml_data {
506 my( $el, $key, $value ) = @_;
507 return unless defined $value;
508 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
509 $data_el->setAttribute( 'key', $key );
510 $data_el->appendText( $value );
83d5ac3a 511}
512
63778331 513sub throw {
514 Text::Tradition::Error->throw(
515 'ident' => 'Relationship error',
516 'message' => $_[0],
517 );
518}
519
22222af9 520no Moose;
521__PACKAGE__->meta->make_immutable;
522
5231;