add relationship deletion functionality
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
44my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
49@v2 = $c->del_relationship( 'n8', 'n13' );
50is( scalar @v2, 2, "Deleted second global relationship" );
51try {
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54} catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56}
57
3ae5e2ad 58=end testing
59
22222af9 60=head1 METHODS
61
62=head2 new( collation => $collation );
63
64Creates a new relationship store for the given collation.
65
66=cut
67
68has 'collation' => (
69 is => 'ro',
70 isa => 'Text::Tradition::Collation',
71 required => 1,
72 weak_ref => 1,
73 );
74
75has 'scopedrels' => (
76 is => 'ro',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
79 );
80
81has 'graph' => (
82 is => 'ro',
83 isa => 'Graph',
84 default => sub { Graph->new( undirected => 1 ) },
85 handles => {
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
89 },
90 );
91
3ae5e2ad 92=head2 get_relationship
93
94Return the relationship object, if any, that exists between two readings.
95
96=cut
97
98sub get_relationship {
99 my( $self, @vector ) = @_;
100 my $relationship;
101 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
102 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
103 }
104 return $relationship;
105}
106
107sub _set_relationship {
108 my( $self, $relationship, @vector ) = @_;
109 $self->graph->add_edge( @vector );
110 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
111}
a1615ee4 112
113sub _remove_relationship {
114 my( $self, @vector ) = @_;
115 $self->graph->delete_edge( @vector );
116}
3ae5e2ad 117
22222af9 118=head2 create
119
120Create a new relationship with the given options and return it.
121Warn and return undef if the relationship cannot be created.
122
123=cut
124
125sub create {
126 my( $self, $options ) = @_;
127 # Check to see if a relationship exists between the two given readings
128 my $source = delete $options->{'orig_a'};
129 my $target = delete $options->{'orig_b'};
3ae5e2ad 130 my $rel = $self->get_relationship( $source, $target );
131 if( $rel ) {
a7037072 132 if( $rel->type ne $options->{'type'} ) {
63778331 133 throw( "Another relationship of type " . $rel->type
134 . " already exists between $source and $target" );
22222af9 135 } else {
136 return $rel;
137 }
138 }
139
140 # Check to see if a nonlocal relationship is defined for the two readings
141 $rel = $self->scoped_relationship( $options->{'reading_a'},
142 $options->{'reading_b'} );
143 if( $rel && $rel->type eq $options->{'type'} ) {
144 return $rel;
145 } elsif( $rel ) {
63778331 146 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 147 } else {
148 $rel = Text::Tradition::Collation::Relationship->new( $options );
149 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
150 return $rel;
151 }
152}
153
154=head2 add_scoped_relationship( $rel )
155
156Keep track of relationships defined between specific readings that are scoped
157non-locally. Key on whichever reading occurs first alphabetically.
158
159=cut
160
161sub add_scoped_relationship {
162 my( $self, $rel ) = @_;
163 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
164 if( $r ) {
165 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
166 $r->type, $rel->reading_a, $rel->reading_b );
167 return;
168 }
169 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
170}
171
172=head2 scoped_relationship( $reading_a, $reading_b )
173
174Returns the general (document-level or global) relationship that has been defined
175between the two reading strings. Returns undef if there is no general relationship.
176
177=cut
178
179sub scoped_relationship {
180 my( $self, $rdga, $rdgb ) = @_;
181 my( $first, $second ) = sort( $rdga, $rdgb );
182 if( exists $self->scopedrels->{$first}->{$second} ) {
183 return $self->scopedrels->{$first}->{$second};
184 } else {
185 return undef;
186 }
187}
188
189=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
190
191Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
192for the possible options) between the readings given in $source and $target. Sets
193up a scoped relationship between $sourcetext and $targettext if the relationship is
194scoped non-locally.
195
196Returns a status boolean and a list of all reading pairs connected by the call to
197add_relationship.
198
199=cut
200
201sub add_relationship {
202 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
203
204 # Check the options
205 $options->{'scope'} = 'local' unless $options->{'scope'};
206
207 my( $is_valid, $reason ) =
208 $self->relationship_valid( $source, $target, $options->{'type'} );
209 unless( $is_valid ) {
63778331 210 throw( "Invalid relationship: $reason" );
22222af9 211 }
212
213 # Try to create the relationship object.
214 $options->{'reading_a'} = $source_rdg->text;
215 $options->{'reading_b'} = $target_rdg->text;
216 $options->{'orig_a'} = $source;
217 $options->{'orig_b'} = $target;
63778331 218 my $relationship = $self->create( $options ); # Will throw on error
22222af9 219
220 # Find all the pairs for which we need to set the relationship.
221 my @vectors = ( [ $source, $target ] );
222 if( $relationship->colocated && $relationship->nonlocal ) {
223 my $c = $self->collation;
224 # Set the same relationship everywhere we can, throughout the graph.
225 my @identical_readings = grep { $_->text eq $relationship->reading_a }
226 $c->readings;
227 foreach my $ir ( @identical_readings ) {
cf6c01be 228 next if $ir->id eq $source;
22222af9 229 # Check to see if there is a target reading with the same text at
230 # the same rank.
231 my @itarget = grep
232 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
233 $c->readings;
234 if( @itarget ) {
235 # We found a hit.
236 warn "More than one reading with text " . $target_rdg->text
237 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 238 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 239 }
240 }
241 }
242
243 # Now set the relationship(s).
244 my @pairs_set;
245 foreach my $v ( @vectors ) {
3ae5e2ad 246 my $rel = $self->get_relationship( @$v );
247 if( $rel ) {
63778331 248 if( $rel->nonlocal ) {
249 throw( "Found conflicting relationship at @$v" );
250 } else {
251 warn "Not overriding local relationship set at @$v";
252 }
3ae5e2ad 253 next;
22222af9 254 }
3ae5e2ad 255 $self->_set_relationship( $relationship, @$v );
22222af9 256 push( @pairs_set, $v );
257 }
258
63778331 259 return @pairs_set;
22222af9 260}
261
ee801e17 262=head2 del_relationship( $source, $target )
263
264Removes the relationship between the given readings. If the relationship is
265non-local, removes the relationship everywhere in the graph.
266
267=cut
268
269sub del_relationship {
270 my( $self, $source, $target ) = @_;
271 my $rel = $self->get_relationship( $source, $target );
272 throw( "No relationship defined between $source and $target" ) unless $rel;
273 my @vectors = ( [ $source, $target ] );
274 $self->_remove_relationship( $source, $target );
275 if( $rel->nonlocal ) {
276 # Remove the relationship wherever it occurs.
277 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
278 $self->relationships;
279 foreach my $re ( @rel_edges ) {
280 $self->_remove_relationship( @$re );
281 push( @vectors, $re );
282 }
283 }
284 return @vectors;
285}
286
22222af9 287=head2 relationship_valid( $source, $target, $type )
288
289Checks whether a relationship of type $type may exist between the readings given
290in $source and $target. Returns a tuple of ( status, message ) where status is
291a yes/no boolean and, if the answer is no, message gives the reason why.
292
293=cut
294
295sub relationship_valid {
296 my( $self, $source, $target, $rel ) = @_;
297 my $c = $self->collation;
298 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
299 # Check that the two readings do (for a repetition) or do not (for
300 # a transposition) appear in the same witness.
301 my %seen_wits;
302 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
303 foreach my $w ( $c->reading_witnesses( $target ) ) {
304 if( $seen_wits{$w} ) {
305 return ( 0, "Readings both occur in witness $w" )
306 if $rel eq 'transposition';
307 return ( 1, "ok" ) if $rel eq 'repetition';
308 }
309 return $rel eq 'transposition' ? ( 1, "ok" )
310 : ( 0, "Readings occur only in distinct witnesses" );
311 }
312 } else {
313 # Check that linking the source and target in a relationship won't lead
a1615ee4 314 # to a path loop for any witness. If they have the same rank then fine.
315 return( 1, "ok" )
316 if $c->reading( $source )->rank == $c->reading( $target )->rank;
317
318 # Otherwise, first make a lookup table of all the
22222af9 319 # readings related to either the source or the target.
320 my @proposed_related = ( $source, $target );
321 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
322 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
323 my %pr_ids;
324 map { $pr_ids{ $_ } = 1 } @proposed_related;
325
a1615ee4 326 # The cumulative predecessors and successors of the proposed-related readings
327 # should not overlap.
328 my %all_pred;
329 my %all_succ;
22222af9 330 foreach my $pr ( keys %pr_ids ) {
a1615ee4 331 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
332 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
333 }
334 foreach my $k ( keys %all_pred ) {
335 return( 0, "Relationship would create witness loop" )
336 if exists $all_succ{$k};
337 }
338 foreach my $k ( keys %pr_ids ) {
339 return( 0, "Relationship would create witness loop" )
340 if exists $all_pred{$k} || exists $all_succ{$k};
341 }
22222af9 342 return ( 1, "ok" );
343 }
344}
345
346=head2 related_readings( $reading, $colocated_only )
347
348Returns a list of readings that are connected via relationship links to $reading.
349If $colocated_only is true, restricts the list to those readings that are in the
350same logical location (and therefore have the same rank in the collation graph.)
351
352=cut
353
354sub related_readings {
355 my( $self, $reading, $colocated ) = @_;
356 my $return_object;
357 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
358 $reading = $reading->id;
359 $return_object = 1;
360 }
c84275ff 361 my @answer;
22222af9 362 if( $colocated ) {
c84275ff 363 my %found = ( $reading => 1 );
364 my $check = [ $reading ];
365 my $iter = 0;
366 while( @$check ) {
c84275ff 367 my $more = [];
368 foreach my $r ( @$check ) {
369 foreach my $nr ( $self->graph->neighbors( $r ) ) {
3ae5e2ad 370 if( $self->get_relationship( $r, $nr )->colocated ) {
c84275ff 371 push( @$more, $nr ) unless exists $found{$nr};
372 $found{$nr} = 1;
373 }
374 }
375 }
376 $check = $more;
22222af9 377 }
c84275ff 378 @answer = keys %found;
379 } else {
380 @answer = $self->graph->all_reachable( $reading );
22222af9 381 }
382 if( $return_object ) {
383 my $c = $self->collation;
c84275ff 384 return map { $c->reading( $_ ) } @answer;
22222af9 385 } else {
c84275ff 386 return @answer;
22222af9 387 }
388}
389
390=head2 merge_readings( $kept, $deleted );
391
392Makes a best-effort merge of the relationship links between the given readings, and
393stops tracking the to-be-deleted reading.
394
395=cut
396
397sub merge_readings {
398 my( $self, $kept, $deleted, $combined ) = @_;
399 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
400 # Get the pair of kept / rel
401 my @vector = ( $kept );
402 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
403 next if $vector[0] eq $vector[1]; # Don't add a self loop
404
405 # If kept changes its text, drop the relationship.
406 next if $combined;
407
408 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 409 my $rel = $self->get_relationship( @vector );
410 if( $rel ) {
22222af9 411 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
412 next;
413 }
414
415 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 416 $rel = $self->get_relationship( @$edge );
417 $self->_set_relationship( $rel, @vector );
22222af9 418 }
419 $self->delete_reading( $deleted );
420}
421
027d819c 422sub _as_graphml {
2626f709 423 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 424
425 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
426 $rgraph->setAttribute( 'edgedefault', 'directed' );
427 $rgraph->setAttribute( 'id', 'relationships', );
428 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
429 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
430 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
431 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
432 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
433
434 # Add the vertices according to their XML IDs
2626f709 435 my %rdg_lookup = ( reverse %$node_hash );
436 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 437 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
438 $n_el->setAttribute( 'id', $n );
2626f709 439 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 440 }
441
442 # Add the relationship edges, with their object information
443 my $edge_ctr = 0;
444 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
445 # Add an edge and fill in its relationship info.
446 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
447 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
448 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
449 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
450
3ae5e2ad 451 my $rel_obj = $self->get_relationship( @$e );
c84275ff 452 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
453 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
454 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
455 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
456 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
457 $rel_obj->non_independent ) if $rel_obj->nonind_set;
458 }
459}
460
461sub _by_xmlid {
2626f709 462 my $tmp_a = $a;
463 my $tmp_b = $b;
464 $tmp_a =~ s/\D//g;
465 $tmp_b =~ s/\D//g;
466 return $tmp_a <=> $tmp_b;
c84275ff 467}
468
469sub _add_graphml_data {
470 my( $el, $key, $value ) = @_;
471 return unless defined $value;
472 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
473 $data_el->setAttribute( 'key', $key );
474 $data_el->appendText( $value );
83d5ac3a 475}
476
63778331 477sub throw {
478 Text::Tradition::Error->throw(
479 'ident' => 'Relationship error',
480 'message' => $_[0],
481 );
482}
483
22222af9 484no Moose;
485__PACKAGE__->meta->make_immutable;
486
4871;