remove refs to deprecated relationship type 'meaning'
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
53
54 =end testing
55
56 =head1 METHODS
57
58 =head2 new( collation => $collation );
59
60 Creates a new relationship store for the given collation.
61
62 =cut
63
64 has 'collation' => (
65         is => 'ro',
66         isa => 'Text::Tradition::Collation',
67         required => 1,
68         weak_ref => 1,
69         );
70
71 has 'scopedrels' => (
72         is => 'ro',
73         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74         default => sub { {} },
75         );
76
77 has 'graph' => (
78         is => 'ro',
79         isa => 'Graph',
80         default => sub { Graph->new( undirected => 1 ) },
81     handles => {
82         relationships => 'edges',
83         add_reading => 'add_vertex',
84         delete_reading => 'delete_vertex',
85     },
86         );
87         
88 =head2 get_relationship
89
90 Return the relationship object, if any, that exists between two readings.
91
92 =cut
93
94 sub get_relationship {
95         my $self = shift;
96         my @vector;
97         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98                 # Dereference the edge arrayref that was passed.
99                 my $edge = shift;
100                 @vector = @$edge;
101         } else {
102                 @vector = @_;
103         }
104         my $relationship;
105         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
107         } 
108         return $relationship;
109 }
110
111 sub _set_relationship {
112         my( $self, $relationship, @vector ) = @_;
113         $self->graph->add_edge( @vector );
114         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115 }
116
117 =head2 create
118
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
121
122 =cut
123
124 sub create {
125         my( $self, $options ) = @_;
126         # Check to see if a relationship exists between the two given readings
127         my $source = delete $options->{'orig_a'};
128         my $target = delete $options->{'orig_b'};
129         my $rel = $self->get_relationship( $source, $target );
130         if( $rel ) {
131                 if( $rel->type eq 'collated' ) {
132                         # Always replace a 'collated' relationship with a more descriptive
133                         # one, if asked.
134                         $self->del_relationship( $source, $target );
135                 } elsif( $rel->type ne $options->{'type'} ) {
136                         throw( "Another relationship of type " . $rel->type 
137                                 . " already exists between $source and $target" );
138                 } else {
139                         return $rel;
140                 }
141         }
142         
143         # Check to see if a nonlocal relationship is defined for the two readings
144         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
145                 $options->{'reading_b'} );
146         if( $rel && $rel->type eq $options->{'type'} ) {
147                 return $rel;
148         } elsif( $rel ) {
149                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
150         } else {
151                 $rel = Text::Tradition::Collation::Relationship->new( $options );
152                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153                 return $rel;
154         }
155 }
156
157 =head2 add_scoped_relationship( $rel )
158
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally.  Key on whichever reading occurs first alphabetically.
161
162 =cut
163
164 sub add_scoped_relationship {
165         my( $self, $rel ) = @_;
166         my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167         my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );      
168         my $r = $self->scoped_relationship( $rdga, $rdgb );
169         if( $r ) {
170                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171                         $r->type, $rdga, $rdgb );
172                 return;
173         }
174         my( $first, $second ) = sort ( $rdga, $rdgb );
175         $self->scopedrels->{$first}->{$second} = $rel;
176 }
177
178 =head2 scoped_relationship( $reading_a, $reading_b )
179
180 Returns the general (document-level or global) relationship that has been defined 
181 between the two reading strings. Returns undef if there is no general relationship.
182
183 =cut
184
185 sub scoped_relationship {
186         my( $self, $rdga, $rdgb ) = @_;
187         my( $first, $second ) = sort( $rdga, $rdgb );
188         if( exists $self->scopedrels->{$first}->{$second} ) {
189                 return $self->scopedrels->{$first}->{$second};
190         } else {
191                 return undef;
192         }
193 }
194
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
198 for the possible options) between the readings given in $source and $target.  Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
200 scoped non-locally.
201
202 Returns a status boolean and a list of all reading pairs connected by the call to
203 add_relationship.
204
205 =cut
206
207 sub add_relationship {
208         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
210         my $relationship;
211         my $thispaironly;
212         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213                 $relationship = $options;
214                 $thispaironly = 1;  # If existing rel, set only where asked.
215         } else {
216                 # Check the options
217                 $options->{'scope'} = 'local' unless $options->{'scope'};
218                 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
219                 
220                 my( $is_valid, $reason ) = 
221                         $self->relationship_valid( $source, $target, $options->{'type'} );
222                 unless( $is_valid ) {
223                         throw( "Invalid relationship: $reason" );
224                 }
225                 
226                 # Try to create the relationship object.
227                 $options->{'reading_a'} = $source_rdg->text;
228                 $options->{'reading_b'} = $target_rdg->text;
229                 $options->{'orig_a'} = $source;
230                 $options->{'orig_b'} = $target;
231         if( $options->{'scope'} ne 'local' ) {
232                         # Is there a relationship with this a & b already?
233                         # Case-insensitive for non-orthographics.
234                         my $rdga = $options->{'type'} eq 'orthographic' 
235                                 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
236                         my $rdgb = $options->{'type'} eq 'orthographic' 
237                                 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
238                         my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
239                         if( $otherrel && $otherrel->type eq $options->{type}
240                                 && $otherrel->scope eq $options->{scope} ) {
241                                 warn "Applying existing scoped relationship";
242                                 $relationship = $otherrel;
243                         }
244         }
245                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
246     }
247
248
249         # Find all the pairs for which we need to set the relationship.
250         my @vectors = [ $source, $target ];
251     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
252         push( @vectors, $self->_find_applicable( $relationship ) );
253     }
254         
255     # Now set the relationship(s).
256     my @pairs_set;
257     foreach my $v ( @vectors ) {
258                 my $rel = $self->get_relationship( @$v );
259         if( $rel && $rel ne $relationship ) {
260                 if( $rel->nonlocal ) {
261                         throw( "Found conflicting relationship at @$v" );
262                 } elsif( $rel->type ne 'collated' ) {
263                         # Replace a collation relationship; leave any other sort in place.
264                         warn "Not overriding local relationship set at @$v";
265                                 next;
266                 }
267         }
268         $self->_set_relationship( $relationship, @$v );
269         push( @pairs_set, $v );
270     }
271     
272     return @pairs_set;
273 }
274
275 sub _find_applicable {
276         my( $self, $rel ) = @_;
277         my $c = $self->collation;
278         # TODO Someday we might use a case sensitive language.
279         my $lang = $c->tradition->language;
280         my @vectors;
281         my @identical_readings;
282         if( $rel->type eq 'orthographic' ) {
283                 @identical_readings = grep { $_->text eq $rel->reading_a } 
284                         $c->readings;
285         } else {
286                 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
287                         $c->readings;
288         }
289         foreach my $ir ( @identical_readings ) {
290                 my @itarget;
291                 if( $rel->type eq 'orthographic' ) {
292                         @itarget = grep { $_->rank == $ir->rank 
293                                                           && $_->text eq $rel->reading_b } $c->readings;
294                 } else {
295                         @itarget = grep { $_->rank == $ir->rank 
296                                                           && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
297                 }
298                 if( @itarget ) {
299                         # Warn if there is more than one hit with no orth link between them.
300                         my $itmain = shift @itarget;
301                         if( @itarget ) {
302                                 my %all_targets;
303                                 map { $all_targets{$_} = 1 } @itarget;
304                                 map { delete $all_targets{$_} } 
305                                         $self->related_readings( $itmain, 
306                                                 sub { $_[0]->type eq 'orthographic' } );
307                         warn "More than one unrelated reading with text " . $itmain->text
308                                 . " at rank " . $ir->rank . "!" if keys %all_targets;
309                         }
310                         push( @vectors, [ $ir->id, $itmain->id ] );
311                 }
312         }
313         return @vectors;
314 }
315
316 =head2 del_relationship( $source, $target )
317
318 Removes the relationship between the given readings. If the relationship is
319 non-local, removes the relationship everywhere in the graph.
320
321 =cut
322
323 sub del_relationship {
324         my( $self, $source, $target ) = @_;
325         my $rel = $self->get_relationship( $source, $target );
326         return () unless $rel; # Nothing to delete; return an empty set.
327         my @vectors = ( [ $source, $target ] );
328         $self->_remove_relationship( $source, $target );
329         if( $rel->nonlocal ) {
330                 # Remove the relationship wherever it occurs.
331                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
332                         $self->relationships;
333                 foreach my $re ( @rel_edges ) {
334                         $self->_remove_relationship( @$re );
335                         push( @vectors, $re );
336                 }
337         }
338         return @vectors;
339 }
340
341 sub _remove_relationship {
342         my( $self, @vector ) = @_;
343         $self->graph->delete_edge( @vector );
344 }
345         
346 =head2 relationship_valid( $source, $target, $type )
347
348 Checks whether a relationship of type $type may exist between the readings given
349 in $source and $target.  Returns a tuple of ( status, message ) where status is
350 a yes/no boolean and, if the answer is no, message gives the reason why.
351
352 =cut
353
354 sub relationship_valid {
355     my( $self, $source, $target, $rel ) = @_;
356     my $c = $self->collation;
357     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
358                 # Check that the two readings do (for a repetition) or do not (for
359                 # a transposition) appear in the same witness.
360                 # TODO this might be called before witness paths are set...
361                 my %seen_wits;
362                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
363                 foreach my $w ( $c->reading_witnesses( $target ) ) {
364                         if( $seen_wits{$w} ) {
365                                 return ( 0, "Readings both occur in witness $w" ) 
366                                         if $rel eq 'transposition';
367                                 return ( 1, "ok" ) if $rel eq 'repetition';
368                 }
369                 return $rel eq 'transposition' ? ( 1, "ok" )
370                         : ( 0, "Readings occur only in distinct witnesses" );
371                 }
372         } else {
373                 # Check that linking the source and target in a relationship won't lead
374                 # to a path loop for any witness.  If they have the same rank then fine.
375                 return( 1, "ok" ) 
376                         if $c->reading( $source )->has_rank
377                                 && $c->reading( $target )->has_rank
378                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
379                 
380                 # Otherwise, first make a lookup table of all the
381                 # readings related to either the source or the target.
382                 my @proposed_related = ( $source, $target );
383                 # Drop the collation links of source and target, unless we want to
384                 # add a collation relationship.
385                 foreach my $r ( ( $source, $target ) ) {
386                         $self->_drop_collations( $r ) unless $rel eq 'collated';
387                         push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
388                 }
389                 my %pr_ids;
390                 map { $pr_ids{ $_ } = 1 } @proposed_related;
391         
392                 # The cumulative predecessors and successors of the proposed-related readings
393                 # should not overlap.
394                 my %all_pred;
395                 my %all_succ;
396                 foreach my $pr ( keys %pr_ids ) {
397                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
398                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
399                 }
400                 foreach my $k ( keys %all_pred ) {
401                         return( 0, "Relationship would create witness loop" )
402                                 if exists $all_succ{$k};
403                 }
404                 foreach my $k ( keys %pr_ids ) {
405                         return( 0, "Relationship would create witness loop" )
406                                 if exists $all_pred{$k} || exists $all_succ{$k};
407                 }
408                 return ( 1, "ok" );
409         }
410 }
411
412 sub _drop_collations {
413         my( $self, $reading ) = @_;
414         foreach my $n ( $self->graph->neighbors( $reading ) ) {
415                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
416                         $self->del_relationship( $reading, $n );
417                 }
418         }
419 }
420
421 =head2 related_readings( $reading, $filter )
422
423 Returns a list of readings that are connected via relationship links to $reading.
424 If $filter is set to a subroutine ref, returns only those related readings where
425 $filter( $relationship ) returns a true value.
426
427 =cut
428
429 sub related_readings {
430         my( $self, $reading, $filter ) = @_;
431         my $return_object;
432         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
433                 $reading = $reading->id;
434                 $return_object = 1;
435         }
436         my @answer;
437         if( $filter ) {
438                 # Backwards compat
439                 if( $filter eq 'colocated' ) {
440                         $filter = sub { $_[0]->colocated };
441                 }
442                 my %found = ( $reading => 1 );
443                 my $check = [ $reading ];
444                 my $iter = 0;
445                 while( @$check ) {
446                         my $more = [];
447                         foreach my $r ( @$check ) {
448                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
449                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
450                                                 push( @$more, $nr ) unless exists $found{$nr};
451                                                 $found{$nr} = 1;
452                                         }
453                                 }
454                         }
455                         $check = $more;
456                 }
457                 delete $found{$reading};
458                 @answer = keys %found;
459         } else {
460                 @answer = $self->graph->all_reachable( $reading );
461         }
462         if( $return_object ) {
463                 my $c = $self->collation;
464                 return map { $c->reading( $_ ) } @answer;
465         } else {
466                 return @answer;
467         }
468 }
469
470 =head2 merge_readings( $kept, $deleted );
471
472 Makes a best-effort merge of the relationship links between the given readings, and
473 stops tracking the to-be-deleted reading.
474
475 =cut
476
477 sub merge_readings {
478         my( $self, $kept, $deleted, $combined ) = @_;
479         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
480                 # Get the pair of kept / rel
481                 my @vector = ( $kept );
482                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
483                 next if $vector[0] eq $vector[1]; # Don't add a self loop
484                 
485                 # If kept changes its text, drop the relationship.
486                 next if $combined;
487                         
488                 # If kept / rel already has a relationship, just keep the old
489                 my $rel = $self->get_relationship( @vector );
490                 next if $rel;
491                 
492                 # Otherwise, adopt the relationship that would be deleted.
493                 $rel = $self->get_relationship( @$edge );
494                 $self->_set_relationship( $rel, @vector );
495         }
496         $self->delete_reading( $deleted );
497 }
498
499 sub _as_graphml { 
500         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
501         
502     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
503         $rgraph->setAttribute( 'edgedefault', 'directed' );
504     $rgraph->setAttribute( 'id', 'relationships', );
505     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
506     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
507     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
508     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
509     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
510     
511     # Add the vertices according to their XML IDs
512     my %rdg_lookup = ( reverse %$node_hash );
513     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
514         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
515         $n_el->setAttribute( 'id', $n );
516         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
517     }
518     
519     # Add the relationship edges, with their object information
520     my $edge_ctr = 0;
521     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
522         # Add an edge and fill in its relationship info.
523                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
524                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
525                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
526                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
527
528                 my $rel_obj = $self->get_relationship( @$e );
529                 foreach my $key ( keys %$edge_keys ) {
530                         my $value = $rel_obj->$key;
531                         _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
532                                 if defined $value;
533                 }
534         }
535 }
536
537 sub _by_xmlid {
538         my $tmp_a = $a;
539         my $tmp_b = $b;
540         $tmp_a =~ s/\D//g;
541         $tmp_b =~ s/\D//g;
542         return $tmp_a <=> $tmp_b;
543 }
544
545 sub _add_graphml_data {
546     my( $el, $key, $value ) = @_;
547     return unless defined $value;
548     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
549     $data_el->setAttribute( 'key', $key );
550     $data_el->appendText( $value );
551 }
552
553 sub throw {
554         Text::Tradition::Error->throw( 
555                 'ident' => 'Relationship error',
556                 'message' => $_[0],
557                 );
558 }
559
560 no Moose;
561 __PACKAGE__->meta->make_immutable;
562
563 1;