relationship deletions should include scoped objects
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
3ae5e2ad 88=head2 get_relationship
89
90Return the relationship object, if any, that exists between two readings.
91
92=cut
93
94sub get_relationship {
4633f9e4 95 my $self = shift;
96 my @vector;
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
99 my $edge = shift;
100 @vector = @$edge;
101 } else {
102 @vector = @_;
103 }
3ae5e2ad 104 my $relationship;
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 107 }
3ae5e2ad 108 return $relationship;
109}
110
111sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115}
a1615ee4 116
22222af9 117=head2 create
118
119Create a new relationship with the given options and return it.
120Warn and return undef if the relationship cannot be created.
121
122=cut
123
124sub create {
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
3ae5e2ad 129 my $rel = $self->get_relationship( $source, $target );
130 if( $rel ) {
3d14b48e 131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
133 # one, if asked.
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
63778331 136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
22222af9 138 } else {
139 return $rel;
140 }
141 }
142
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
147 return $rel;
148 } elsif( $rel ) {
63778331 149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 150 } else {
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153 return $rel;
154 }
155}
156
157=head2 add_scoped_relationship( $rel )
158
159Keep track of relationships defined between specific readings that are scoped
160non-locally. Key on whichever reading occurs first alphabetically.
161
162=cut
163
164sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
f222800e 166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 169 if( $r ) {
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 171 $r->type, $rdga, $rdgb );
22222af9 172 return;
173 }
f222800e 174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 176}
177
178=head2 scoped_relationship( $reading_a, $reading_b )
179
180Returns the general (document-level or global) relationship that has been defined
181between the two reading strings. Returns undef if there is no general relationship.
182
183=cut
184
185sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
190 } else {
191 return undef;
192 }
193}
194
195=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198for the possible options) between the readings given in $source and $target. Sets
199up a scoped relationship between $sourcetext and $targettext if the relationship is
200scoped non-locally.
201
202Returns a status boolean and a list of all reading pairs connected by the call to
203add_relationship.
204
205=cut
206
207sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
ca6e6095 210 my $relationship;
211 my $thispaironly;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
215 } else {
216 # Check the options
217 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
ca6e6095 219
220 my( $is_valid, $reason ) =
221 $self->relationship_valid( $source, $target, $options->{'type'} );
222 unless( $is_valid ) {
223 throw( "Invalid relationship: $reason" );
224 }
225
226 # Try to create the relationship object.
227 $options->{'reading_a'} = $source_rdg->text;
228 $options->{'reading_b'} = $target_rdg->text;
229 $options->{'orig_a'} = $source;
230 $options->{'orig_b'} = $target;
0ac5e750 231 if( $options->{'scope'} ne 'local' ) {
232 # Is there a relationship with this a & b already?
f222800e 233 # Case-insensitive for non-orthographics.
234 my $rdga = $options->{'type'} eq 'orthographic'
235 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
236 my $rdgb = $options->{'type'} eq 'orthographic'
237 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
238 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 239 if( $otherrel && $otherrel->type eq $options->{type}
240 && $otherrel->scope eq $options->{scope} ) {
241 warn "Applying existing scoped relationship";
242 $relationship = $otherrel;
243 }
244 }
245 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 246 }
ca6e6095 247
22222af9 248
249 # Find all the pairs for which we need to set the relationship.
bf6e338d 250 my @vectors = [ $source, $target ];
ca6e6095 251 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 252 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 253 }
bf6e338d 254
22222af9 255 # Now set the relationship(s).
256 my @pairs_set;
257 foreach my $v ( @vectors ) {
3ae5e2ad 258 my $rel = $self->get_relationship( @$v );
ca6e6095 259 if( $rel && $rel ne $relationship ) {
63778331 260 if( $rel->nonlocal ) {
261 throw( "Found conflicting relationship at @$v" );
a2ed66b2 262 } elsif( $rel->type ne 'collated' ) {
263 # Replace a collation relationship; leave any other sort in place.
63778331 264 warn "Not overriding local relationship set at @$v";
a2ed66b2 265 next;
63778331 266 }
22222af9 267 }
3ae5e2ad 268 $self->_set_relationship( $relationship, @$v );
22222af9 269 push( @pairs_set, $v );
270 }
271
63778331 272 return @pairs_set;
22222af9 273}
274
9d829138 275=head2 del_scoped_relationship( $reading_a, $reading_b )
276
277Returns the general (document-level or global) relationship that has been defined
278between the two reading strings. Returns undef if there is no general relationship.
279
280=cut
281
282sub del_scoped_relationship {
283 my( $self, $rdga, $rdgb ) = @_;
284 my( $first, $second ) = sort( $rdga, $rdgb );
285 return delete $self->scopedrels->{$first}->{$second};
286}
287
bf6e338d 288sub _find_applicable {
289 my( $self, $rel ) = @_;
290 my $c = $self->collation;
291 # TODO Someday we might use a case sensitive language.
292 my $lang = $c->tradition->language;
293 my @vectors;
294 my @identical_readings;
295 if( $rel->type eq 'orthographic' ) {
296 @identical_readings = grep { $_->text eq $rel->reading_a }
297 $c->readings;
298 } else {
299 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
300 $c->readings;
301 }
302 foreach my $ir ( @identical_readings ) {
303 my @itarget;
304 if( $rel->type eq 'orthographic' ) {
305 @itarget = grep { $_->rank == $ir->rank
306 && $_->text eq $rel->reading_b } $c->readings;
307 } else {
308 @itarget = grep { $_->rank == $ir->rank
309 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
310 }
311 if( @itarget ) {
312 # Warn if there is more than one hit with no orth link between them.
313 my $itmain = shift @itarget;
314 if( @itarget ) {
315 my %all_targets;
316 map { $all_targets{$_} = 1 } @itarget;
317 map { delete $all_targets{$_} }
318 $self->related_readings( $itmain,
319 sub { $_[0]->type eq 'orthographic' } );
320 warn "More than one unrelated reading with text " . $itmain->text
321 . " at rank " . $ir->rank . "!" if keys %all_targets;
322 }
323 push( @vectors, [ $ir->id, $itmain->id ] );
324 }
325 }
326 return @vectors;
327}
328
ee801e17 329=head2 del_relationship( $source, $target )
330
331Removes the relationship between the given readings. If the relationship is
332non-local, removes the relationship everywhere in the graph.
333
334=cut
335
336sub del_relationship {
337 my( $self, $source, $target ) = @_;
338 my $rel = $self->get_relationship( $source, $target );
681893aa 339 return () unless $rel; # Nothing to delete; return an empty set.
ee801e17 340 my @vectors = ( [ $source, $target ] );
341 $self->_remove_relationship( $source, $target );
342 if( $rel->nonlocal ) {
343 # Remove the relationship wherever it occurs.
9d829138 344 # Remove the relationship wherever it occurs.
ee801e17 345 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
346 $self->relationships;
347 foreach my $re ( @rel_edges ) {
348 $self->_remove_relationship( @$re );
349 push( @vectors, $re );
350 }
9d829138 351 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 352 }
353 return @vectors;
354}
355
ca6e6095 356sub _remove_relationship {
357 my( $self, @vector ) = @_;
358 $self->graph->delete_edge( @vector );
359}
360
22222af9 361=head2 relationship_valid( $source, $target, $type )
362
363Checks whether a relationship of type $type may exist between the readings given
364in $source and $target. Returns a tuple of ( status, message ) where status is
365a yes/no boolean and, if the answer is no, message gives the reason why.
366
367=cut
368
369sub relationship_valid {
370 my( $self, $source, $target, $rel ) = @_;
371 my $c = $self->collation;
372 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
373 # Check that the two readings do (for a repetition) or do not (for
374 # a transposition) appear in the same witness.
32e95735 375 # TODO this might be called before witness paths are set...
22222af9 376 my %seen_wits;
377 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
378 foreach my $w ( $c->reading_witnesses( $target ) ) {
379 if( $seen_wits{$w} ) {
380 return ( 0, "Readings both occur in witness $w" )
381 if $rel eq 'transposition';
382 return ( 1, "ok" ) if $rel eq 'repetition';
383 }
384 return $rel eq 'transposition' ? ( 1, "ok" )
385 : ( 0, "Readings occur only in distinct witnesses" );
386 }
387 } else {
388 # Check that linking the source and target in a relationship won't lead
a1615ee4 389 # to a path loop for any witness. If they have the same rank then fine.
390 return( 1, "ok" )
84d4ca78 391 if $c->reading( $source )->has_rank
392 && $c->reading( $target )->has_rank
393 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 394
395 # Otherwise, first make a lookup table of all the
22222af9 396 # readings related to either the source or the target.
397 my @proposed_related = ( $source, $target );
778251a6 398 # Drop the collation links of source and target, unless we want to
399 # add a collation relationship.
400 foreach my $r ( ( $source, $target ) ) {
401 $self->_drop_collations( $r ) unless $rel eq 'collated';
402 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
403 }
22222af9 404 my %pr_ids;
405 map { $pr_ids{ $_ } = 1 } @proposed_related;
406
a1615ee4 407 # The cumulative predecessors and successors of the proposed-related readings
408 # should not overlap.
409 my %all_pred;
410 my %all_succ;
22222af9 411 foreach my $pr ( keys %pr_ids ) {
a1615ee4 412 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
413 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
414 }
415 foreach my $k ( keys %all_pred ) {
416 return( 0, "Relationship would create witness loop" )
417 if exists $all_succ{$k};
418 }
419 foreach my $k ( keys %pr_ids ) {
420 return( 0, "Relationship would create witness loop" )
421 if exists $all_pred{$k} || exists $all_succ{$k};
422 }
22222af9 423 return ( 1, "ok" );
424 }
425}
426
778251a6 427sub _drop_collations {
428 my( $self, $reading ) = @_;
429 foreach my $n ( $self->graph->neighbors( $reading ) ) {
430 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
431 $self->del_relationship( $reading, $n );
432 }
433 }
434}
435
7f52eac8 436=head2 related_readings( $reading, $filter )
22222af9 437
438Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 439If $filter is set to a subroutine ref, returns only those related readings where
440$filter( $relationship ) returns a true value.
22222af9 441
442=cut
443
444sub related_readings {
7f52eac8 445 my( $self, $reading, $filter ) = @_;
22222af9 446 my $return_object;
447 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
448 $reading = $reading->id;
449 $return_object = 1;
450 }
c84275ff 451 my @answer;
7f52eac8 452 if( $filter ) {
453 # Backwards compat
454 if( $filter eq 'colocated' ) {
455 $filter = sub { $_[0]->colocated };
456 }
c84275ff 457 my %found = ( $reading => 1 );
458 my $check = [ $reading ];
459 my $iter = 0;
460 while( @$check ) {
c84275ff 461 my $more = [];
462 foreach my $r ( @$check ) {
463 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 464 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 465 push( @$more, $nr ) unless exists $found{$nr};
466 $found{$nr} = 1;
467 }
468 }
469 }
470 $check = $more;
22222af9 471 }
7f52eac8 472 delete $found{$reading};
c84275ff 473 @answer = keys %found;
474 } else {
475 @answer = $self->graph->all_reachable( $reading );
22222af9 476 }
477 if( $return_object ) {
478 my $c = $self->collation;
c84275ff 479 return map { $c->reading( $_ ) } @answer;
22222af9 480 } else {
c84275ff 481 return @answer;
22222af9 482 }
483}
484
485=head2 merge_readings( $kept, $deleted );
486
487Makes a best-effort merge of the relationship links between the given readings, and
488stops tracking the to-be-deleted reading.
489
490=cut
491
492sub merge_readings {
493 my( $self, $kept, $deleted, $combined ) = @_;
494 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
495 # Get the pair of kept / rel
496 my @vector = ( $kept );
497 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
498 next if $vector[0] eq $vector[1]; # Don't add a self loop
499
500 # If kept changes its text, drop the relationship.
501 next if $combined;
502
f222800e 503 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 504 my $rel = $self->get_relationship( @vector );
f222800e 505 next if $rel;
22222af9 506
507 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 508 $rel = $self->get_relationship( @$edge );
509 $self->_set_relationship( $rel, @vector );
22222af9 510 }
511 $self->delete_reading( $deleted );
512}
513
027d819c 514sub _as_graphml {
2626f709 515 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 516
517 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
518 $rgraph->setAttribute( 'edgedefault', 'directed' );
519 $rgraph->setAttribute( 'id', 'relationships', );
520 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
521 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
522 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
523 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
524 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
525
526 # Add the vertices according to their XML IDs
2626f709 527 my %rdg_lookup = ( reverse %$node_hash );
528 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 529 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
530 $n_el->setAttribute( 'id', $n );
2626f709 531 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 532 }
533
534 # Add the relationship edges, with their object information
535 my $edge_ctr = 0;
536 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
537 # Add an edge and fill in its relationship info.
538 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
539 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
540 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
541 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
542
3ae5e2ad 543 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 544 foreach my $key ( keys %$edge_keys ) {
545 my $value = $rel_obj->$key;
546 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
547 if defined $value;
548 }
c84275ff 549 }
550}
551
552sub _by_xmlid {
2626f709 553 my $tmp_a = $a;
554 my $tmp_b = $b;
555 $tmp_a =~ s/\D//g;
556 $tmp_b =~ s/\D//g;
557 return $tmp_a <=> $tmp_b;
c84275ff 558}
559
560sub _add_graphml_data {
561 my( $el, $key, $value ) = @_;
562 return unless defined $value;
563 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
564 $data_el->setAttribute( 'key', $key );
565 $data_el->appendText( $value );
83d5ac3a 566}
567
63778331 568sub throw {
569 Text::Tradition::Error->throw(
570 'ident' => 'Relationship error',
571 'message' => $_[0],
572 );
573}
574
22222af9 575no Moose;
576__PACKAGE__->meta->make_immutable;
577
5781;