relationship deletions should include scoped objects
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
53
54 =end testing
55
56 =head1 METHODS
57
58 =head2 new( collation => $collation );
59
60 Creates a new relationship store for the given collation.
61
62 =cut
63
64 has 'collation' => (
65         is => 'ro',
66         isa => 'Text::Tradition::Collation',
67         required => 1,
68         weak_ref => 1,
69         );
70
71 has 'scopedrels' => (
72         is => 'ro',
73         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74         default => sub { {} },
75         );
76
77 has 'graph' => (
78         is => 'ro',
79         isa => 'Graph',
80         default => sub { Graph->new( undirected => 1 ) },
81     handles => {
82         relationships => 'edges',
83         add_reading => 'add_vertex',
84         delete_reading => 'delete_vertex',
85     },
86         );
87         
88 =head2 get_relationship
89
90 Return the relationship object, if any, that exists between two readings.
91
92 =cut
93
94 sub get_relationship {
95         my $self = shift;
96         my @vector;
97         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98                 # Dereference the edge arrayref that was passed.
99                 my $edge = shift;
100                 @vector = @$edge;
101         } else {
102                 @vector = @_;
103         }
104         my $relationship;
105         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
107         } 
108         return $relationship;
109 }
110
111 sub _set_relationship {
112         my( $self, $relationship, @vector ) = @_;
113         $self->graph->add_edge( @vector );
114         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115 }
116
117 =head2 create
118
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
121
122 =cut
123
124 sub create {
125         my( $self, $options ) = @_;
126         # Check to see if a relationship exists between the two given readings
127         my $source = delete $options->{'orig_a'};
128         my $target = delete $options->{'orig_b'};
129         my $rel = $self->get_relationship( $source, $target );
130         if( $rel ) {
131                 if( $rel->type eq 'collated' ) {
132                         # Always replace a 'collated' relationship with a more descriptive
133                         # one, if asked.
134                         $self->del_relationship( $source, $target );
135                 } elsif( $rel->type ne $options->{'type'} ) {
136                         throw( "Another relationship of type " . $rel->type 
137                                 . " already exists between $source and $target" );
138                 } else {
139                         return $rel;
140                 }
141         }
142         
143         # Check to see if a nonlocal relationship is defined for the two readings
144         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
145                 $options->{'reading_b'} );
146         if( $rel && $rel->type eq $options->{'type'} ) {
147                 return $rel;
148         } elsif( $rel ) {
149                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
150         } else {
151                 $rel = Text::Tradition::Collation::Relationship->new( $options );
152                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153                 return $rel;
154         }
155 }
156
157 =head2 add_scoped_relationship( $rel )
158
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally.  Key on whichever reading occurs first alphabetically.
161
162 =cut
163
164 sub add_scoped_relationship {
165         my( $self, $rel ) = @_;
166         my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167         my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );      
168         my $r = $self->scoped_relationship( $rdga, $rdgb );
169         if( $r ) {
170                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171                         $r->type, $rdga, $rdgb );
172                 return;
173         }
174         my( $first, $second ) = sort ( $rdga, $rdgb );
175         $self->scopedrels->{$first}->{$second} = $rel;
176 }
177
178 =head2 scoped_relationship( $reading_a, $reading_b )
179
180 Returns the general (document-level or global) relationship that has been defined 
181 between the two reading strings. Returns undef if there is no general relationship.
182
183 =cut
184
185 sub scoped_relationship {
186         my( $self, $rdga, $rdgb ) = @_;
187         my( $first, $second ) = sort( $rdga, $rdgb );
188         if( exists $self->scopedrels->{$first}->{$second} ) {
189                 return $self->scopedrels->{$first}->{$second};
190         } else {
191                 return undef;
192         }
193 }
194
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
198 for the possible options) between the readings given in $source and $target.  Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
200 scoped non-locally.
201
202 Returns a status boolean and a list of all reading pairs connected by the call to
203 add_relationship.
204
205 =cut
206
207 sub add_relationship {
208         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
210         my $relationship;
211         my $thispaironly;
212         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213                 $relationship = $options;
214                 $thispaironly = 1;  # If existing rel, set only where asked.
215         } else {
216                 # Check the options
217                 $options->{'scope'} = 'local' unless $options->{'scope'};
218                 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
219                 
220                 my( $is_valid, $reason ) = 
221                         $self->relationship_valid( $source, $target, $options->{'type'} );
222                 unless( $is_valid ) {
223                         throw( "Invalid relationship: $reason" );
224                 }
225                 
226                 # Try to create the relationship object.
227                 $options->{'reading_a'} = $source_rdg->text;
228                 $options->{'reading_b'} = $target_rdg->text;
229                 $options->{'orig_a'} = $source;
230                 $options->{'orig_b'} = $target;
231         if( $options->{'scope'} ne 'local' ) {
232                         # Is there a relationship with this a & b already?
233                         # Case-insensitive for non-orthographics.
234                         my $rdga = $options->{'type'} eq 'orthographic' 
235                                 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
236                         my $rdgb = $options->{'type'} eq 'orthographic' 
237                                 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
238                         my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
239                         if( $otherrel && $otherrel->type eq $options->{type}
240                                 && $otherrel->scope eq $options->{scope} ) {
241                                 warn "Applying existing scoped relationship";
242                                 $relationship = $otherrel;
243                         }
244         }
245                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
246     }
247
248
249         # Find all the pairs for which we need to set the relationship.
250         my @vectors = [ $source, $target ];
251     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
252         push( @vectors, $self->_find_applicable( $relationship ) );
253     }
254         
255     # Now set the relationship(s).
256     my @pairs_set;
257     foreach my $v ( @vectors ) {
258                 my $rel = $self->get_relationship( @$v );
259         if( $rel && $rel ne $relationship ) {
260                 if( $rel->nonlocal ) {
261                         throw( "Found conflicting relationship at @$v" );
262                 } elsif( $rel->type ne 'collated' ) {
263                         # Replace a collation relationship; leave any other sort in place.
264                         warn "Not overriding local relationship set at @$v";
265                                 next;
266                 }
267         }
268         $self->_set_relationship( $relationship, @$v );
269         push( @pairs_set, $v );
270     }
271     
272     return @pairs_set;
273 }
274
275 =head2 del_scoped_relationship( $reading_a, $reading_b )
276
277 Returns the general (document-level or global) relationship that has been defined 
278 between the two reading strings. Returns undef if there is no general relationship.
279
280 =cut
281
282 sub del_scoped_relationship {
283         my( $self, $rdga, $rdgb ) = @_;
284         my( $first, $second ) = sort( $rdga, $rdgb );
285         return delete $self->scopedrels->{$first}->{$second};
286 }
287
288 sub _find_applicable {
289         my( $self, $rel ) = @_;
290         my $c = $self->collation;
291         # TODO Someday we might use a case sensitive language.
292         my $lang = $c->tradition->language;
293         my @vectors;
294         my @identical_readings;
295         if( $rel->type eq 'orthographic' ) {
296                 @identical_readings = grep { $_->text eq $rel->reading_a } 
297                         $c->readings;
298         } else {
299                 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
300                         $c->readings;
301         }
302         foreach my $ir ( @identical_readings ) {
303                 my @itarget;
304                 if( $rel->type eq 'orthographic' ) {
305                         @itarget = grep { $_->rank == $ir->rank 
306                                                           && $_->text eq $rel->reading_b } $c->readings;
307                 } else {
308                         @itarget = grep { $_->rank == $ir->rank 
309                                                           && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
310                 }
311                 if( @itarget ) {
312                         # Warn if there is more than one hit with no orth link between them.
313                         my $itmain = shift @itarget;
314                         if( @itarget ) {
315                                 my %all_targets;
316                                 map { $all_targets{$_} = 1 } @itarget;
317                                 map { delete $all_targets{$_} } 
318                                         $self->related_readings( $itmain, 
319                                                 sub { $_[0]->type eq 'orthographic' } );
320                         warn "More than one unrelated reading with text " . $itmain->text
321                                 . " at rank " . $ir->rank . "!" if keys %all_targets;
322                         }
323                         push( @vectors, [ $ir->id, $itmain->id ] );
324                 }
325         }
326         return @vectors;
327 }
328
329 =head2 del_relationship( $source, $target )
330
331 Removes the relationship between the given readings. If the relationship is
332 non-local, removes the relationship everywhere in the graph.
333
334 =cut
335
336 sub del_relationship {
337         my( $self, $source, $target ) = @_;
338         my $rel = $self->get_relationship( $source, $target );
339         return () unless $rel; # Nothing to delete; return an empty set.
340         my @vectors = ( [ $source, $target ] );
341         $self->_remove_relationship( $source, $target );
342         if( $rel->nonlocal ) {
343                 # Remove the relationship wherever it occurs.
344                 # Remove the relationship wherever it occurs.
345                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
346                         $self->relationships;
347                 foreach my $re ( @rel_edges ) {
348                         $self->_remove_relationship( @$re );
349                         push( @vectors, $re );
350                 }
351                 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
352         }
353         return @vectors;
354 }
355
356 sub _remove_relationship {
357         my( $self, @vector ) = @_;
358         $self->graph->delete_edge( @vector );
359 }
360         
361 =head2 relationship_valid( $source, $target, $type )
362
363 Checks whether a relationship of type $type may exist between the readings given
364 in $source and $target.  Returns a tuple of ( status, message ) where status is
365 a yes/no boolean and, if the answer is no, message gives the reason why.
366
367 =cut
368
369 sub relationship_valid {
370     my( $self, $source, $target, $rel ) = @_;
371     my $c = $self->collation;
372     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
373                 # Check that the two readings do (for a repetition) or do not (for
374                 # a transposition) appear in the same witness.
375                 # TODO this might be called before witness paths are set...
376                 my %seen_wits;
377                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
378                 foreach my $w ( $c->reading_witnesses( $target ) ) {
379                         if( $seen_wits{$w} ) {
380                                 return ( 0, "Readings both occur in witness $w" ) 
381                                         if $rel eq 'transposition';
382                                 return ( 1, "ok" ) if $rel eq 'repetition';
383                 }
384                 return $rel eq 'transposition' ? ( 1, "ok" )
385                         : ( 0, "Readings occur only in distinct witnesses" );
386                 }
387         } else {
388                 # Check that linking the source and target in a relationship won't lead
389                 # to a path loop for any witness.  If they have the same rank then fine.
390                 return( 1, "ok" ) 
391                         if $c->reading( $source )->has_rank
392                                 && $c->reading( $target )->has_rank
393                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
394                 
395                 # Otherwise, first make a lookup table of all the
396                 # readings related to either the source or the target.
397                 my @proposed_related = ( $source, $target );
398                 # Drop the collation links of source and target, unless we want to
399                 # add a collation relationship.
400                 foreach my $r ( ( $source, $target ) ) {
401                         $self->_drop_collations( $r ) unless $rel eq 'collated';
402                         push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
403                 }
404                 my %pr_ids;
405                 map { $pr_ids{ $_ } = 1 } @proposed_related;
406         
407                 # The cumulative predecessors and successors of the proposed-related readings
408                 # should not overlap.
409                 my %all_pred;
410                 my %all_succ;
411                 foreach my $pr ( keys %pr_ids ) {
412                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
413                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
414                 }
415                 foreach my $k ( keys %all_pred ) {
416                         return( 0, "Relationship would create witness loop" )
417                                 if exists $all_succ{$k};
418                 }
419                 foreach my $k ( keys %pr_ids ) {
420                         return( 0, "Relationship would create witness loop" )
421                                 if exists $all_pred{$k} || exists $all_succ{$k};
422                 }
423                 return ( 1, "ok" );
424         }
425 }
426
427 sub _drop_collations {
428         my( $self, $reading ) = @_;
429         foreach my $n ( $self->graph->neighbors( $reading ) ) {
430                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
431                         $self->del_relationship( $reading, $n );
432                 }
433         }
434 }
435
436 =head2 related_readings( $reading, $filter )
437
438 Returns a list of readings that are connected via relationship links to $reading.
439 If $filter is set to a subroutine ref, returns only those related readings where
440 $filter( $relationship ) returns a true value.
441
442 =cut
443
444 sub related_readings {
445         my( $self, $reading, $filter ) = @_;
446         my $return_object;
447         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
448                 $reading = $reading->id;
449                 $return_object = 1;
450         }
451         my @answer;
452         if( $filter ) {
453                 # Backwards compat
454                 if( $filter eq 'colocated' ) {
455                         $filter = sub { $_[0]->colocated };
456                 }
457                 my %found = ( $reading => 1 );
458                 my $check = [ $reading ];
459                 my $iter = 0;
460                 while( @$check ) {
461                         my $more = [];
462                         foreach my $r ( @$check ) {
463                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
464                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
465                                                 push( @$more, $nr ) unless exists $found{$nr};
466                                                 $found{$nr} = 1;
467                                         }
468                                 }
469                         }
470                         $check = $more;
471                 }
472                 delete $found{$reading};
473                 @answer = keys %found;
474         } else {
475                 @answer = $self->graph->all_reachable( $reading );
476         }
477         if( $return_object ) {
478                 my $c = $self->collation;
479                 return map { $c->reading( $_ ) } @answer;
480         } else {
481                 return @answer;
482         }
483 }
484
485 =head2 merge_readings( $kept, $deleted );
486
487 Makes a best-effort merge of the relationship links between the given readings, and
488 stops tracking the to-be-deleted reading.
489
490 =cut
491
492 sub merge_readings {
493         my( $self, $kept, $deleted, $combined ) = @_;
494         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
495                 # Get the pair of kept / rel
496                 my @vector = ( $kept );
497                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
498                 next if $vector[0] eq $vector[1]; # Don't add a self loop
499                 
500                 # If kept changes its text, drop the relationship.
501                 next if $combined;
502                         
503                 # If kept / rel already has a relationship, just keep the old
504                 my $rel = $self->get_relationship( @vector );
505                 next if $rel;
506                 
507                 # Otherwise, adopt the relationship that would be deleted.
508                 $rel = $self->get_relationship( @$edge );
509                 $self->_set_relationship( $rel, @vector );
510         }
511         $self->delete_reading( $deleted );
512 }
513
514 sub _as_graphml { 
515         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
516         
517     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
518         $rgraph->setAttribute( 'edgedefault', 'directed' );
519     $rgraph->setAttribute( 'id', 'relationships', );
520     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
521     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
522     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
523     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
524     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
525     
526     # Add the vertices according to their XML IDs
527     my %rdg_lookup = ( reverse %$node_hash );
528     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
529         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
530         $n_el->setAttribute( 'id', $n );
531         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
532     }
533     
534     # Add the relationship edges, with their object information
535     my $edge_ctr = 0;
536     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
537         # Add an edge and fill in its relationship info.
538                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
539                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
540                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
541                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
542
543                 my $rel_obj = $self->get_relationship( @$e );
544                 foreach my $key ( keys %$edge_keys ) {
545                         my $value = $rel_obj->$key;
546                         _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
547                                 if defined $value;
548                 }
549         }
550 }
551
552 sub _by_xmlid {
553         my $tmp_a = $a;
554         my $tmp_b = $b;
555         $tmp_a =~ s/\D//g;
556         $tmp_b =~ s/\D//g;
557         return $tmp_a <=> $tmp_b;
558 }
559
560 sub _add_graphml_data {
561     my( $el, $key, $value ) = @_;
562     return unless defined $value;
563     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
564     $data_el->setAttribute( 'key', $key );
565     $data_el->appendText( $value );
566 }
567
568 sub throw {
569         Text::Tradition::Error->throw( 
570                 'ident' => 'Relationship error',
571                 'message' => $_[0],
572                 );
573 }
574
575 no Moose;
576 __PACKAGE__->meta->make_immutable;
577
578 1;