allow case-insensitive relationships on all but orthography
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89     },
90         );
91         
92 =head2 get_relationship
93
94 Return the relationship object, if any, that exists between two readings.
95
96 =cut
97
98 sub get_relationship {
99         my $self = shift;
100         my @vector;
101         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102                 # Dereference the edge arrayref that was passed.
103                 my $edge = shift;
104                 @vector = @$edge;
105         } else {
106                 @vector = @_;
107         }
108         my $relationship;
109         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
111         } 
112         return $relationship;
113 }
114
115 sub _set_relationship {
116         my( $self, $relationship, @vector ) = @_;
117         $self->graph->add_edge( @vector );
118         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 }
120
121 =head2 create
122
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
125
126 =cut
127
128 sub create {
129         my( $self, $options ) = @_;
130         # Check to see if a relationship exists between the two given readings
131         my $source = delete $options->{'orig_a'};
132         my $target = delete $options->{'orig_b'};
133         my $rel = $self->get_relationship( $source, $target );
134         if( $rel ) {
135                 if( $rel->type eq 'collated' ) {
136                         # Always replace a 'collated' relationship with a more descriptive
137                         # one, if asked.
138                         $self->del_relationship( $source, $target );
139                 } elsif( $rel->type ne $options->{'type'} ) {
140                         throw( "Another relationship of type " . $rel->type 
141                                 . " already exists between $source and $target" );
142                 } else {
143                         return $rel;
144                 }
145         }
146         
147         # Check to see if a nonlocal relationship is defined for the two readings
148         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
149                 $options->{'reading_b'} );
150         if( $rel && $rel->type eq $options->{'type'} ) {
151                 return $rel;
152         } elsif( $rel ) {
153                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
154         } else {
155                 $rel = Text::Tradition::Collation::Relationship->new( $options );
156                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157                 return $rel;
158         }
159 }
160
161 =head2 add_scoped_relationship( $rel )
162
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally.  Key on whichever reading occurs first alphabetically.
165
166 =cut
167
168 sub add_scoped_relationship {
169         my( $self, $rel ) = @_;
170         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
171         if( $r ) {
172                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173                         $r->type, $rel->reading_a, $rel->reading_b );
174                 return;
175         }
176         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
177 }
178
179 =head2 scoped_relationship( $reading_a, $reading_b )
180
181 Returns the general (document-level or global) relationship that has been defined 
182 between the two reading strings. Returns undef if there is no general relationship.
183
184 =cut
185
186 sub scoped_relationship {
187         my( $self, $rdga, $rdgb ) = @_;
188         my( $first, $second ) = sort( $rdga, $rdgb );
189         if( exists $self->scopedrels->{$first}->{$second} ) {
190                 return $self->scopedrels->{$first}->{$second};
191         } else {
192                 return undef;
193         }
194 }
195
196 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197
198 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
199 for the possible options) between the readings given in $source and $target.  Sets
200 up a scoped relationship between $sourcetext and $targettext if the relationship is
201 scoped non-locally.
202
203 Returns a status boolean and a list of all reading pairs connected by the call to
204 add_relationship.
205
206 =cut
207
208 sub add_relationship {
209         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
210
211         my $relationship;
212         my $thispaironly;
213         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214                 $relationship = $options;
215                 $thispaironly = 1;  # If existing rel, set only where asked.
216         } else {
217                 # Check the options
218                 $options->{'scope'} = 'local' unless $options->{'scope'};
219                 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
220                 
221                 my( $is_valid, $reason ) = 
222                         $self->relationship_valid( $source, $target, $options->{'type'} );
223                 unless( $is_valid ) {
224                         throw( "Invalid relationship: $reason" );
225                 }
226                 
227                 # Try to create the relationship object.
228                 $options->{'reading_a'} = $source_rdg->text;
229                 $options->{'reading_b'} = $target_rdg->text;
230                 $options->{'orig_a'} = $source;
231                 $options->{'orig_b'} = $target;
232         if( $options->{'scope'} ne 'local' ) {
233                         # Is there a relationship with this a & b already?
234                         my $otherrel = $self->scoped_relationship( $options->{reading_a}, 
235                                 $options->{reading_b} );
236                         if( $otherrel && $otherrel->type eq $options->{type}
237                                 && $otherrel->scope eq $options->{scope} ) {
238                                 warn "Applying existing scoped relationship";
239                                 $relationship = $otherrel;
240                         }
241         }
242                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
243     }
244
245
246         # Find all the pairs for which we need to set the relationship.
247         my @vectors = [ $source, $target ];
248     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
249         push( @vectors, $self->_find_applicable( $relationship ) );
250     } 
251         
252     # Now set the relationship(s).
253     my @pairs_set;
254     foreach my $v ( @vectors ) {
255                 my $rel = $self->get_relationship( @$v );
256         if( $rel && $rel ne $relationship ) {
257                 if( $rel->nonlocal ) {
258                         throw( "Found conflicting relationship at @$v" );
259                 } else {
260                         warn "Not overriding local relationship set at @$v";
261                 }
262                 next;
263         }
264         $self->_set_relationship( $relationship, @$v );
265         push( @pairs_set, $v );
266     }
267     
268     return @pairs_set;
269 }
270
271 sub _find_applicable {
272         my( $self, $rel ) = @_;
273         my $c = $self->collation;
274         # TODO Someday we might use a case sensitive language.
275         my $lang = $c->tradition->language;
276         my @vectors;
277         my @identical_readings;
278         if( $rel->type eq 'orthographic' ) {
279                 @identical_readings = grep { $_->text eq $rel->reading_a } 
280                         $c->readings;
281         } else {
282                 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
283                         $c->readings;
284         }
285         foreach my $ir ( @identical_readings ) {
286                 my @itarget;
287                 if( $rel->type eq 'orthographic' ) {
288                         @itarget = grep { $_->rank == $ir->rank 
289                                                           && $_->text eq $rel->reading_b } $c->readings;
290                 } else {
291                         @itarget = grep { $_->rank == $ir->rank 
292                                                           && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
293                 }
294                 if( @itarget ) {
295                         # Warn if there is more than one hit with no orth link between them.
296                         my $itmain = shift @itarget;
297                         if( @itarget ) {
298                                 my %all_targets;
299                                 map { $all_targets{$_} = 1 } @itarget;
300                                 map { delete $all_targets{$_} } 
301                                         $self->related_readings( $itmain, 
302                                                 sub { $_[0]->type eq 'orthographic' } );
303                         warn "More than one unrelated reading with text " . $itmain->text
304                                 . " at rank " . $ir->rank . "!" if keys %all_targets;
305                         }
306                         push( @vectors, [ $ir->id, $itmain->id ] );
307                 }
308         }
309         return @vectors;
310 }
311
312 =head2 del_relationship( $source, $target )
313
314 Removes the relationship between the given readings. If the relationship is
315 non-local, removes the relationship everywhere in the graph.
316
317 =cut
318
319 sub del_relationship {
320         my( $self, $source, $target ) = @_;
321         my $rel = $self->get_relationship( $source, $target );
322         throw( "No relationship defined between $source and $target" ) unless $rel;
323         my @vectors = ( [ $source, $target ] );
324         $self->_remove_relationship( $source, $target );
325         if( $rel->nonlocal ) {
326                 # Remove the relationship wherever it occurs.
327                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
328                         $self->relationships;
329                 foreach my $re ( @rel_edges ) {
330                         $self->_remove_relationship( @$re );
331                         push( @vectors, $re );
332                 }
333         }
334         return @vectors;
335 }
336
337 sub _remove_relationship {
338         my( $self, @vector ) = @_;
339         $self->graph->delete_edge( @vector );
340 }
341         
342 =head2 relationship_valid( $source, $target, $type )
343
344 Checks whether a relationship of type $type may exist between the readings given
345 in $source and $target.  Returns a tuple of ( status, message ) where status is
346 a yes/no boolean and, if the answer is no, message gives the reason why.
347
348 =cut
349
350 sub relationship_valid {
351     my( $self, $source, $target, $rel ) = @_;
352     my $c = $self->collation;
353     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
354                 # Check that the two readings do (for a repetition) or do not (for
355                 # a transposition) appear in the same witness.
356                 # TODO this might be called before witness paths are set...
357                 my %seen_wits;
358                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
359                 foreach my $w ( $c->reading_witnesses( $target ) ) {
360                         if( $seen_wits{$w} ) {
361                                 return ( 0, "Readings both occur in witness $w" ) 
362                                         if $rel eq 'transposition';
363                                 return ( 1, "ok" ) if $rel eq 'repetition';
364                 }
365                 return $rel eq 'transposition' ? ( 1, "ok" )
366                         : ( 0, "Readings occur only in distinct witnesses" );
367                 }
368         } else {
369                 # Check that linking the source and target in a relationship won't lead
370                 # to a path loop for any witness.  If they have the same rank then fine.
371                 return( 1, "ok" ) 
372                         if $c->reading( $source )->has_rank
373                                 && $c->reading( $target )->has_rank
374                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
375                 
376                 # Otherwise, first make a lookup table of all the
377                 # readings related to either the source or the target.
378                 my @proposed_related = ( $source, $target );
379                 # Drop the collation links of source and target, unless we want to
380                 # add a collation relationship.
381                 foreach my $r ( ( $source, $target ) ) {
382                         $self->_drop_collations( $r ) unless $rel eq 'collated';
383                         push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
384                 }
385                 my %pr_ids;
386                 map { $pr_ids{ $_ } = 1 } @proposed_related;
387         
388                 # The cumulative predecessors and successors of the proposed-related readings
389                 # should not overlap.
390                 my %all_pred;
391                 my %all_succ;
392                 foreach my $pr ( keys %pr_ids ) {
393                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
394                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
395                 }
396                 foreach my $k ( keys %all_pred ) {
397                         return( 0, "Relationship would create witness loop" )
398                                 if exists $all_succ{$k};
399                 }
400                 foreach my $k ( keys %pr_ids ) {
401                         return( 0, "Relationship would create witness loop" )
402                                 if exists $all_pred{$k} || exists $all_succ{$k};
403                 }
404                 return ( 1, "ok" );
405         }
406 }
407
408 sub _drop_collations {
409         my( $self, $reading ) = @_;
410         foreach my $n ( $self->graph->neighbors( $reading ) ) {
411                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
412                         $self->del_relationship( $reading, $n );
413                 }
414         }
415 }
416
417 =head2 related_readings( $reading, $filter )
418
419 Returns a list of readings that are connected via relationship links to $reading.
420 If $filter is set to a subroutine ref, returns only those related readings where
421 $filter( $relationship ) returns a true value.
422
423 =cut
424
425 sub related_readings {
426         my( $self, $reading, $filter ) = @_;
427         my $return_object;
428         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
429                 $reading = $reading->id;
430                 $return_object = 1;
431         }
432         my @answer;
433         if( $filter ) {
434                 # Backwards compat
435                 if( $filter eq 'colocated' ) {
436                         $filter = sub { $_[0]->colocated };
437                 }
438                 my %found = ( $reading => 1 );
439                 my $check = [ $reading ];
440                 my $iter = 0;
441                 while( @$check ) {
442                         my $more = [];
443                         foreach my $r ( @$check ) {
444                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
445                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
446                                                 push( @$more, $nr ) unless exists $found{$nr};
447                                                 $found{$nr} = 1;
448                                         }
449                                 }
450                         }
451                         $check = $more;
452                 }
453                 delete $found{$reading};
454                 @answer = keys %found;
455         } else {
456                 @answer = $self->graph->all_reachable( $reading );
457         }
458         if( $return_object ) {
459                 my $c = $self->collation;
460                 return map { $c->reading( $_ ) } @answer;
461         } else {
462                 return @answer;
463         }
464 }
465
466 =head2 merge_readings( $kept, $deleted );
467
468 Makes a best-effort merge of the relationship links between the given readings, and
469 stops tracking the to-be-deleted reading.
470
471 =cut
472
473 sub merge_readings {
474         my( $self, $kept, $deleted, $combined ) = @_;
475         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
476                 # Get the pair of kept / rel
477                 my @vector = ( $kept );
478                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
479                 next if $vector[0] eq $vector[1]; # Don't add a self loop
480                 
481                 # If kept changes its text, drop the relationship.
482                 next if $combined;
483                         
484                 # If kept / rel already has a relationship, warn and keep the old
485                 my $rel = $self->get_relationship( @vector );
486                 if( $rel ) {
487                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
488                         next;
489                 }
490                 
491                 # Otherwise, adopt the relationship that would be deleted.
492                 $rel = $self->get_relationship( @$edge );
493                 $self->_set_relationship( $rel, @vector );
494         }
495         $self->delete_reading( $deleted );
496 }
497
498 sub _as_graphml { 
499         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
500         
501     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
502         $rgraph->setAttribute( 'edgedefault', 'directed' );
503     $rgraph->setAttribute( 'id', 'relationships', );
504     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
505     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
506     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
507     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
508     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
509     
510     # Add the vertices according to their XML IDs
511     my %rdg_lookup = ( reverse %$node_hash );
512     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
513         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
514         $n_el->setAttribute( 'id', $n );
515         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
516     }
517     
518     # Add the relationship edges, with their object information
519     my $edge_ctr = 0;
520     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
521         # Add an edge and fill in its relationship info.
522                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
523                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
524                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
525                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
526
527                 my $rel_obj = $self->get_relationship( @$e );
528                 foreach my $key ( keys %$edge_keys ) {
529                         my $value = $rel_obj->$key;
530                         _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
531                                 if defined $value;
532                 }
533         }
534 }
535
536 sub _by_xmlid {
537         my $tmp_a = $a;
538         my $tmp_b = $b;
539         $tmp_a =~ s/\D//g;
540         $tmp_b =~ s/\D//g;
541         return $tmp_a <=> $tmp_b;
542 }
543
544 sub _add_graphml_data {
545     my( $el, $key, $value ) = @_;
546     return unless defined $value;
547     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
548     $data_el->setAttribute( 'key', $key );
549     $data_el->appendText( $value );
550 }
551
552 sub throw {
553         Text::Tradition::Error->throw( 
554                 'ident' => 'Relationship error',
555                 'message' => $_[0],
556                 );
557 }
558
559 no Moose;
560 __PACKAGE__->meta->make_immutable;
561
562 1;