drop collations on global relationship setting too
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
3ae5e2ad 88=head2 get_relationship
89
90Return the relationship object, if any, that exists between two readings.
91
92=cut
93
94sub get_relationship {
4633f9e4 95 my $self = shift;
96 my @vector;
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
99 my $edge = shift;
100 @vector = @$edge;
101 } else {
102 @vector = @_;
103 }
3ae5e2ad 104 my $relationship;
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 107 }
3ae5e2ad 108 return $relationship;
109}
110
111sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115}
a1615ee4 116
22222af9 117=head2 create
118
119Create a new relationship with the given options and return it.
120Warn and return undef if the relationship cannot be created.
121
122=cut
123
124sub create {
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
3ae5e2ad 129 my $rel = $self->get_relationship( $source, $target );
130 if( $rel ) {
3d14b48e 131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
133 # one, if asked.
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
63778331 136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
22222af9 138 } else {
139 return $rel;
140 }
141 }
142
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
147 return $rel;
148 } elsif( $rel ) {
63778331 149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 150 } else {
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153 return $rel;
154 }
155}
156
157=head2 add_scoped_relationship( $rel )
158
159Keep track of relationships defined between specific readings that are scoped
160non-locally. Key on whichever reading occurs first alphabetically.
161
162=cut
163
164sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
f222800e 166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 169 if( $r ) {
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 171 $r->type, $rdga, $rdgb );
22222af9 172 return;
173 }
f222800e 174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 176}
177
178=head2 scoped_relationship( $reading_a, $reading_b )
179
180Returns the general (document-level or global) relationship that has been defined
181between the two reading strings. Returns undef if there is no general relationship.
182
183=cut
184
185sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
190 } else {
191 return undef;
192 }
193}
194
195=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198for the possible options) between the readings given in $source and $target. Sets
199up a scoped relationship between $sourcetext and $targettext if the relationship is
200scoped non-locally.
201
202Returns a status boolean and a list of all reading pairs connected by the call to
203add_relationship.
204
205=cut
206
207sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
ca6e6095 210 my $relationship;
211 my $thispaironly;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
215 } else {
216 # Check the options
217 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
8d5c8893 219 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
ca6e6095 220
221 my( $is_valid, $reason ) =
222 $self->relationship_valid( $source, $target, $options->{'type'} );
223 unless( $is_valid ) {
224 throw( "Invalid relationship: $reason" );
225 }
226
227 # Try to create the relationship object.
228 $options->{'reading_a'} = $source_rdg->text;
229 $options->{'reading_b'} = $target_rdg->text;
230 $options->{'orig_a'} = $source;
231 $options->{'orig_b'} = $target;
0ac5e750 232 if( $options->{'scope'} ne 'local' ) {
233 # Is there a relationship with this a & b already?
f222800e 234 # Case-insensitive for non-orthographics.
235 my $rdga = $options->{'type'} eq 'orthographic'
236 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
237 my $rdgb = $options->{'type'} eq 'orthographic'
238 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
239 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 240 if( $otherrel && $otherrel->type eq $options->{type}
241 && $otherrel->scope eq $options->{scope} ) {
242 warn "Applying existing scoped relationship";
243 $relationship = $otherrel;
244 }
245 }
246 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 247 }
ca6e6095 248
22222af9 249
250 # Find all the pairs for which we need to set the relationship.
bf6e338d 251 my @vectors = [ $source, $target ];
ca6e6095 252 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 253 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 254 }
bf6e338d 255
22222af9 256 # Now set the relationship(s).
257 my @pairs_set;
258 foreach my $v ( @vectors ) {
3ae5e2ad 259 my $rel = $self->get_relationship( @$v );
ca6e6095 260 if( $rel && $rel ne $relationship ) {
63778331 261 if( $rel->nonlocal ) {
262 throw( "Found conflicting relationship at @$v" );
a2ed66b2 263 } elsif( $rel->type ne 'collated' ) {
264 # Replace a collation relationship; leave any other sort in place.
63778331 265 warn "Not overriding local relationship set at @$v";
a2ed66b2 266 next;
63778331 267 }
22222af9 268 }
8d5c8893 269 map { $self->_drop_collations( $_ ) } @$v;
3ae5e2ad 270 $self->_set_relationship( $relationship, @$v );
22222af9 271 push( @pairs_set, $v );
272 }
273
63778331 274 return @pairs_set;
22222af9 275}
276
9d829138 277=head2 del_scoped_relationship( $reading_a, $reading_b )
278
279Returns the general (document-level or global) relationship that has been defined
280between the two reading strings. Returns undef if there is no general relationship.
281
282=cut
283
284sub del_scoped_relationship {
285 my( $self, $rdga, $rdgb ) = @_;
286 my( $first, $second ) = sort( $rdga, $rdgb );
287 return delete $self->scopedrels->{$first}->{$second};
288}
289
bf6e338d 290sub _find_applicable {
291 my( $self, $rel ) = @_;
292 my $c = $self->collation;
293 # TODO Someday we might use a case sensitive language.
294 my $lang = $c->tradition->language;
295 my @vectors;
296 my @identical_readings;
297 if( $rel->type eq 'orthographic' ) {
298 @identical_readings = grep { $_->text eq $rel->reading_a }
299 $c->readings;
300 } else {
301 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
302 $c->readings;
303 }
304 foreach my $ir ( @identical_readings ) {
305 my @itarget;
306 if( $rel->type eq 'orthographic' ) {
307 @itarget = grep { $_->rank == $ir->rank
308 && $_->text eq $rel->reading_b } $c->readings;
309 } else {
310 @itarget = grep { $_->rank == $ir->rank
311 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
312 }
313 if( @itarget ) {
314 # Warn if there is more than one hit with no orth link between them.
315 my $itmain = shift @itarget;
316 if( @itarget ) {
317 my %all_targets;
318 map { $all_targets{$_} = 1 } @itarget;
319 map { delete $all_targets{$_} }
320 $self->related_readings( $itmain,
321 sub { $_[0]->type eq 'orthographic' } );
322 warn "More than one unrelated reading with text " . $itmain->text
323 . " at rank " . $ir->rank . "!" if keys %all_targets;
324 }
325 push( @vectors, [ $ir->id, $itmain->id ] );
326 }
327 }
328 return @vectors;
329}
330
ee801e17 331=head2 del_relationship( $source, $target )
332
333Removes the relationship between the given readings. If the relationship is
334non-local, removes the relationship everywhere in the graph.
335
336=cut
337
338sub del_relationship {
339 my( $self, $source, $target ) = @_;
340 my $rel = $self->get_relationship( $source, $target );
681893aa 341 return () unless $rel; # Nothing to delete; return an empty set.
ee801e17 342 my @vectors = ( [ $source, $target ] );
343 $self->_remove_relationship( $source, $target );
344 if( $rel->nonlocal ) {
345 # Remove the relationship wherever it occurs.
9d829138 346 # Remove the relationship wherever it occurs.
ee801e17 347 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
348 $self->relationships;
349 foreach my $re ( @rel_edges ) {
350 $self->_remove_relationship( @$re );
351 push( @vectors, $re );
352 }
9d829138 353 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 354 }
355 return @vectors;
356}
357
ca6e6095 358sub _remove_relationship {
359 my( $self, @vector ) = @_;
360 $self->graph->delete_edge( @vector );
361}
362
22222af9 363=head2 relationship_valid( $source, $target, $type )
364
365Checks whether a relationship of type $type may exist between the readings given
366in $source and $target. Returns a tuple of ( status, message ) where status is
367a yes/no boolean and, if the answer is no, message gives the reason why.
368
369=cut
370
371sub relationship_valid {
372 my( $self, $source, $target, $rel ) = @_;
373 my $c = $self->collation;
374 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
375 # Check that the two readings do (for a repetition) or do not (for
376 # a transposition) appear in the same witness.
32e95735 377 # TODO this might be called before witness paths are set...
22222af9 378 my %seen_wits;
379 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
380 foreach my $w ( $c->reading_witnesses( $target ) ) {
381 if( $seen_wits{$w} ) {
382 return ( 0, "Readings both occur in witness $w" )
383 if $rel eq 'transposition';
384 return ( 1, "ok" ) if $rel eq 'repetition';
385 }
386 return $rel eq 'transposition' ? ( 1, "ok" )
387 : ( 0, "Readings occur only in distinct witnesses" );
388 }
389 } else {
390 # Check that linking the source and target in a relationship won't lead
a1615ee4 391 # to a path loop for any witness. If they have the same rank then fine.
392 return( 1, "ok" )
84d4ca78 393 if $c->reading( $source )->has_rank
394 && $c->reading( $target )->has_rank
395 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 396
397 # Otherwise, first make a lookup table of all the
22222af9 398 # readings related to either the source or the target.
399 my @proposed_related = ( $source, $target );
778251a6 400 # Drop the collation links of source and target, unless we want to
401 # add a collation relationship.
402 foreach my $r ( ( $source, $target ) ) {
403 $self->_drop_collations( $r ) unless $rel eq 'collated';
404 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
405 }
22222af9 406 my %pr_ids;
407 map { $pr_ids{ $_ } = 1 } @proposed_related;
408
a1615ee4 409 # The cumulative predecessors and successors of the proposed-related readings
410 # should not overlap.
411 my %all_pred;
412 my %all_succ;
22222af9 413 foreach my $pr ( keys %pr_ids ) {
a1615ee4 414 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
415 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
416 }
417 foreach my $k ( keys %all_pred ) {
418 return( 0, "Relationship would create witness loop" )
419 if exists $all_succ{$k};
420 }
421 foreach my $k ( keys %pr_ids ) {
422 return( 0, "Relationship would create witness loop" )
423 if exists $all_pred{$k} || exists $all_succ{$k};
424 }
22222af9 425 return ( 1, "ok" );
426 }
427}
428
778251a6 429sub _drop_collations {
430 my( $self, $reading ) = @_;
431 foreach my $n ( $self->graph->neighbors( $reading ) ) {
432 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
433 $self->del_relationship( $reading, $n );
434 }
435 }
436}
437
7f52eac8 438=head2 related_readings( $reading, $filter )
22222af9 439
440Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 441If $filter is set to a subroutine ref, returns only those related readings where
442$filter( $relationship ) returns a true value.
22222af9 443
444=cut
445
446sub related_readings {
7f52eac8 447 my( $self, $reading, $filter ) = @_;
22222af9 448 my $return_object;
449 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
450 $reading = $reading->id;
451 $return_object = 1;
452 }
c84275ff 453 my @answer;
7f52eac8 454 if( $filter ) {
455 # Backwards compat
456 if( $filter eq 'colocated' ) {
457 $filter = sub { $_[0]->colocated };
458 }
c84275ff 459 my %found = ( $reading => 1 );
460 my $check = [ $reading ];
461 my $iter = 0;
462 while( @$check ) {
c84275ff 463 my $more = [];
464 foreach my $r ( @$check ) {
465 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 466 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 467 push( @$more, $nr ) unless exists $found{$nr};
468 $found{$nr} = 1;
469 }
470 }
471 }
472 $check = $more;
22222af9 473 }
7f52eac8 474 delete $found{$reading};
c84275ff 475 @answer = keys %found;
476 } else {
477 @answer = $self->graph->all_reachable( $reading );
22222af9 478 }
479 if( $return_object ) {
480 my $c = $self->collation;
c84275ff 481 return map { $c->reading( $_ ) } @answer;
22222af9 482 } else {
c84275ff 483 return @answer;
22222af9 484 }
485}
486
487=head2 merge_readings( $kept, $deleted );
488
489Makes a best-effort merge of the relationship links between the given readings, and
490stops tracking the to-be-deleted reading.
491
492=cut
493
494sub merge_readings {
495 my( $self, $kept, $deleted, $combined ) = @_;
496 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
497 # Get the pair of kept / rel
498 my @vector = ( $kept );
499 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
500 next if $vector[0] eq $vector[1]; # Don't add a self loop
501
502 # If kept changes its text, drop the relationship.
503 next if $combined;
504
f222800e 505 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 506 my $rel = $self->get_relationship( @vector );
f222800e 507 next if $rel;
22222af9 508
509 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 510 $rel = $self->get_relationship( @$edge );
511 $self->_set_relationship( $rel, @vector );
22222af9 512 }
513 $self->delete_reading( $deleted );
514}
515
027d819c 516sub _as_graphml {
2626f709 517 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 518
519 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
520 $rgraph->setAttribute( 'edgedefault', 'directed' );
521 $rgraph->setAttribute( 'id', 'relationships', );
522 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
523 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
524 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
525 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
526 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
527
528 # Add the vertices according to their XML IDs
2626f709 529 my %rdg_lookup = ( reverse %$node_hash );
530 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 531 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
532 $n_el->setAttribute( 'id', $n );
2626f709 533 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 534 }
535
536 # Add the relationship edges, with their object information
537 my $edge_ctr = 0;
538 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
539 # Add an edge and fill in its relationship info.
540 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
541 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
542 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
543 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
544
3ae5e2ad 545 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 546 foreach my $key ( keys %$edge_keys ) {
547 my $value = $rel_obj->$key;
548 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
549 if defined $value;
550 }
c84275ff 551 }
552}
553
554sub _by_xmlid {
2626f709 555 my $tmp_a = $a;
556 my $tmp_b = $b;
557 $tmp_a =~ s/\D//g;
558 $tmp_b =~ s/\D//g;
559 return $tmp_a <=> $tmp_b;
c84275ff 560}
561
562sub _add_graphml_data {
563 my( $el, $key, $value ) = @_;
564 return unless defined $value;
565 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
566 $data_el->setAttribute( 'key', $key );
567 $data_el->appendText( $value );
83d5ac3a 568}
569
63778331 570sub throw {
571 Text::Tradition::Error->throw(
572 'ident' => 'Relationship error',
573 'message' => $_[0],
574 );
575}
576
22222af9 577no Moose;
578__PACKAGE__->meta->make_immutable;
579
5801;