handle some cases for existing scoped relationships etc.
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89     },
90         );
91         
92 =head2 get_relationship
93
94 Return the relationship object, if any, that exists between two readings.
95
96 =cut
97
98 sub get_relationship {
99         my $self = shift;
100         my @vector;
101         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102                 # Dereference the edge arrayref that was passed.
103                 my $edge = shift;
104                 @vector = @$edge;
105         } else {
106                 @vector = @_;
107         }
108         my $relationship;
109         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
111         } 
112         return $relationship;
113 }
114
115 sub _set_relationship {
116         my( $self, $relationship, @vector ) = @_;
117         $self->graph->add_edge( @vector );
118         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 }
120
121 =head2 create
122
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
125
126 =cut
127
128 sub create {
129         my( $self, $options ) = @_;
130         # Check to see if a relationship exists between the two given readings
131         my $source = delete $options->{'orig_a'};
132         my $target = delete $options->{'orig_b'};
133         my $rel = $self->get_relationship( $source, $target );
134         if( $rel ) {
135                 if( $rel->type eq 'collated' ) {
136                         # Always replace a 'collated' relationship with a more descriptive
137                         # one, if asked.
138                         $self->del_relationship( $source, $target );
139                 } elsif( $rel->type ne $options->{'type'} ) {
140                         throw( "Another relationship of type " . $rel->type 
141                                 . " already exists between $source and $target" );
142                 } else {
143                         return $rel;
144                 }
145         }
146         
147         # Check to see if a nonlocal relationship is defined for the two readings
148         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
149                 $options->{'reading_b'} );
150         if( $rel && $rel->type eq $options->{'type'} ) {
151                 return $rel;
152         } elsif( $rel ) {
153                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
154         } else {
155                 $rel = Text::Tradition::Collation::Relationship->new( $options );
156                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157                 return $rel;
158         }
159 }
160
161 =head2 add_scoped_relationship( $rel )
162
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally.  Key on whichever reading occurs first alphabetically.
165
166 =cut
167
168 sub add_scoped_relationship {
169         my( $self, $rel ) = @_;
170         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
171         if( $r ) {
172                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173                         $r->type, $rel->reading_a, $rel->reading_b );
174                 return;
175         }
176         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
177 }
178
179 =head2 scoped_relationship( $reading_a, $reading_b )
180
181 Returns the general (document-level or global) relationship that has been defined 
182 between the two reading strings. Returns undef if there is no general relationship.
183
184 =cut
185
186 sub scoped_relationship {
187         my( $self, $rdga, $rdgb ) = @_;
188         my( $first, $second ) = sort( $rdga, $rdgb );
189         if( exists $self->scopedrels->{$first}->{$second} ) {
190                 return $self->scopedrels->{$first}->{$second};
191         } else {
192                 return undef;
193         }
194 }
195
196 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197
198 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
199 for the possible options) between the readings given in $source and $target.  Sets
200 up a scoped relationship between $sourcetext and $targettext if the relationship is
201 scoped non-locally.
202
203 Returns a status boolean and a list of all reading pairs connected by the call to
204 add_relationship.
205
206 =cut
207
208 sub add_relationship {
209         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
210
211         my $relationship;
212         my $thispaironly;
213         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214                 $relationship = $options;
215                 $thispaironly = 1;  # If existing rel, set only where asked.
216         } else {
217                 # Check the options
218                 $options->{'scope'} = 'local' unless $options->{'scope'};
219                 
220                 my( $is_valid, $reason ) = 
221                         $self->relationship_valid( $source, $target, $options->{'type'} );
222                 unless( $is_valid ) {
223                         throw( "Invalid relationship: $reason" );
224                 }
225                 
226                 # Try to create the relationship object.
227                 $options->{'reading_a'} = $source_rdg->text;
228                 $options->{'reading_b'} = $target_rdg->text;
229                 $options->{'orig_a'} = $source;
230                 $options->{'orig_b'} = $target;
231                 $relationship = $self->create( $options );  # Will throw on error
232     }
233
234
235         # Find all the pairs for which we need to set the relationship.
236         my @vectors = ( [ $source, $target ] ); 
237     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
238         # Is there a relationship with this a & b already?
239         my $otherrel = $self->scoped_relationship( $relationship->reading_a, 
240                 $relationship->reading_b );
241         if( $otherrel && $otherrel->type eq $relationship->type 
242                 && $otherrel->scope eq $relationship->scope ) {
243                 warn "Applying existing scoped relationship";
244                 $relationship = $otherrel;
245         }
246         my $c = $self->collation;
247         # Set the same relationship everywhere we can, throughout the graph.
248         my @identical_readings = grep { $_->text eq $relationship->reading_a }
249                 $c->readings;
250         foreach my $ir ( @identical_readings ) {
251                 next if $ir->id eq $source;
252                 # Check to see if there is a target reading with the same text at
253                 # the same rank.
254                 my @itarget = grep 
255                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
256                         $c->readings;
257                 if( @itarget ) {
258                         # We found a hit.
259                         warn "More than one reading with text " . $target_rdg->text
260                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
261                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
262                 }
263         }       
264     }
265     
266     # Now set the relationship(s).
267     my @pairs_set;
268     foreach my $v ( @vectors ) {
269                 my $rel = $self->get_relationship( @$v );
270         if( $rel && $rel ne $relationship ) {
271                 if( $rel->nonlocal ) {
272                         $DB::single = 1;
273                         throw( "Found conflicting relationship at @$v" );
274                 } else {
275                         warn "Not overriding local relationship set at @$v";
276                 }
277                 next;
278         }
279         $self->_set_relationship( $relationship, @$v );
280         push( @pairs_set, $v );
281     }
282     
283     return @pairs_set;
284 }
285
286 =head2 del_relationship( $source, $target )
287
288 Removes the relationship between the given readings. If the relationship is
289 non-local, removes the relationship everywhere in the graph.
290
291 =cut
292
293 sub del_relationship {
294         my( $self, $source, $target ) = @_;
295         my $rel = $self->get_relationship( $source, $target );
296         throw( "No relationship defined between $source and $target" ) unless $rel;
297         my @vectors = ( [ $source, $target ] );
298         $self->_remove_relationship( $source, $target );
299         if( $rel->nonlocal ) {
300                 # Remove the relationship wherever it occurs.
301                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
302                         $self->relationships;
303                 foreach my $re ( @rel_edges ) {
304                         $self->_remove_relationship( @$re );
305                         push( @vectors, $re );
306                 }
307         }
308         return @vectors;
309 }
310
311 sub _remove_relationship {
312         my( $self, @vector ) = @_;
313         $self->graph->delete_edge( @vector );
314 }
315         
316 =head2 relationship_valid( $source, $target, $type )
317
318 Checks whether a relationship of type $type may exist between the readings given
319 in $source and $target.  Returns a tuple of ( status, message ) where status is
320 a yes/no boolean and, if the answer is no, message gives the reason why.
321
322 =cut
323
324 sub relationship_valid {
325     my( $self, $source, $target, $rel ) = @_;
326     my $c = $self->collation;
327     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
328                 # Check that the two readings do (for a repetition) or do not (for
329                 # a transposition) appear in the same witness.
330                 # TODO this might be called before witness paths are set...
331                 my %seen_wits;
332                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
333                 foreach my $w ( $c->reading_witnesses( $target ) ) {
334                         if( $seen_wits{$w} ) {
335                                 return ( 0, "Readings both occur in witness $w" ) 
336                                         if $rel eq 'transposition';
337                                 return ( 1, "ok" ) if $rel eq 'repetition';
338                 }
339                 return $rel eq 'transposition' ? ( 1, "ok" )
340                         : ( 0, "Readings occur only in distinct witnesses" );
341                 }
342         } else {
343                 # Check that linking the source and target in a relationship won't lead
344                 # to a path loop for any witness.  If they have the same rank then fine.
345                 return( 1, "ok" ) 
346                         if $c->reading( $source )->has_rank
347                                 && $c->reading( $target )->has_rank
348                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
349                 
350                 # Otherwise, first make a lookup table of all the
351                 # readings related to either the source or the target.
352                 my @proposed_related = ( $source, $target );
353                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
354                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
355                 my %pr_ids;
356                 map { $pr_ids{ $_ } = 1 } @proposed_related;
357         
358                 # The cumulative predecessors and successors of the proposed-related readings
359                 # should not overlap.
360                 my %all_pred;
361                 my %all_succ;
362                 foreach my $pr ( keys %pr_ids ) {
363                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
364                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
365                 }
366                 foreach my $k ( keys %all_pred ) {
367                         return( 0, "Relationship would create witness loop" )
368                                 if exists $all_succ{$k};
369                 }
370                 foreach my $k ( keys %pr_ids ) {
371                         return( 0, "Relationship would create witness loop" )
372                                 if exists $all_pred{$k} || exists $all_succ{$k};
373                 }
374                 return ( 1, "ok" );
375         }
376 }
377
378 =head2 related_readings( $reading, $filter )
379
380 Returns a list of readings that are connected via relationship links to $reading.
381 If $filter is set to a subroutine ref, returns only those related readings where
382 $filter( $relationship ) returns a true value.
383
384 =cut
385
386 sub related_readings {
387         my( $self, $reading, $filter ) = @_;
388         my $return_object;
389         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
390                 $reading = $reading->id;
391                 $return_object = 1;
392         }
393         my @answer;
394         if( $filter ) {
395                 # Backwards compat
396                 if( $filter eq 'colocated' ) {
397                         $filter = sub { $_[0]->colocated };
398                 }
399                 my %found = ( $reading => 1 );
400                 my $check = [ $reading ];
401                 my $iter = 0;
402                 while( @$check ) {
403                         my $more = [];
404                         foreach my $r ( @$check ) {
405                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
406                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
407                                                 push( @$more, $nr ) unless exists $found{$nr};
408                                                 $found{$nr} = 1;
409                                         }
410                                 }
411                         }
412                         $check = $more;
413                 }
414                 delete $found{$reading};
415                 @answer = keys %found;
416         } else {
417                 @answer = $self->graph->all_reachable( $reading );
418         }
419         if( $return_object ) {
420                 my $c = $self->collation;
421                 return map { $c->reading( $_ ) } @answer;
422         } else {
423                 return @answer;
424         }
425 }
426
427 =head2 merge_readings( $kept, $deleted );
428
429 Makes a best-effort merge of the relationship links between the given readings, and
430 stops tracking the to-be-deleted reading.
431
432 =cut
433
434 sub merge_readings {
435         my( $self, $kept, $deleted, $combined ) = @_;
436         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
437                 # Get the pair of kept / rel
438                 my @vector = ( $kept );
439                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
440                 next if $vector[0] eq $vector[1]; # Don't add a self loop
441                 
442                 # If kept changes its text, drop the relationship.
443                 next if $combined;
444                         
445                 # If kept / rel already has a relationship, warn and keep the old
446                 my $rel = $self->get_relationship( @vector );
447                 if( $rel ) {
448                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
449                         next;
450                 }
451                 
452                 # Otherwise, adopt the relationship that would be deleted.
453                 $rel = $self->get_relationship( @$edge );
454                 $self->_set_relationship( $rel, @vector );
455         }
456         $self->delete_reading( $deleted );
457 }
458
459 sub _as_graphml { 
460         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
461         
462     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
463         $rgraph->setAttribute( 'edgedefault', 'directed' );
464     $rgraph->setAttribute( 'id', 'relationships', );
465     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
466     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
467     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
468     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
469     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
470     
471     # Add the vertices according to their XML IDs
472     my %rdg_lookup = ( reverse %$node_hash );
473     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
474         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
475         $n_el->setAttribute( 'id', $n );
476         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
477     }
478     
479     # Add the relationship edges, with their object information
480     my $edge_ctr = 0;
481     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
482         # Add an edge and fill in its relationship info.
483                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
484                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
485                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
486                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
487
488                 my $rel_obj = $self->get_relationship( @$e );
489                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
490                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
491                 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
492                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
493                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
494                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
495                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
496         }
497 }
498
499 sub _by_xmlid {
500         my $tmp_a = $a;
501         my $tmp_b = $b;
502         $tmp_a =~ s/\D//g;
503         $tmp_b =~ s/\D//g;
504         return $tmp_a <=> $tmp_b;
505 }
506
507 sub _add_graphml_data {
508     my( $el, $key, $value ) = @_;
509     return unless defined $value;
510     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
511     $data_el->setAttribute( 'key', $key );
512     $data_el->appendText( $value );
513 }
514
515 sub throw {
516         Text::Tradition::Error->throw( 
517                 'ident' => 'Relationship error',
518                 'message' => $_[0],
519                 );
520 }
521
522 no Moose;
523 __PACKAGE__->meta->make_immutable;
524
525 1;