abandon 'collated' relationship removal
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89         delete_relationship => 'delete_edge',
90     },
91         );
92         
93 around 'delete_relationship' => sub {
94         my $orig = shift;
95         my $self = shift;
96         my @vector;
97         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98                 # Dereference the edge arrayref that was passed.
99                 my $edge = shift;
100                 @vector = @$edge;
101         } else {
102                 @vector = @_;
103         }
104         return $self->$orig( @vector );
105 };
106         
107 =head2 get_relationship
108
109 Return the relationship object, if any, that exists between two readings.
110
111 =cut
112
113 sub get_relationship {
114         my $self = shift;
115         my @vector;
116         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
117                 # Dereference the edge arrayref that was passed.
118                 my $edge = shift;
119                 @vector = @$edge;
120         } else {
121                 @vector = @_;
122         }
123         my $relationship;
124         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
125                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
126         }
127         return $relationship;
128 }
129
130 sub _set_relationship {
131         my( $self, $relationship, @vector ) = @_;
132         $self->graph->add_edge( @vector );
133         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
134 }
135
136 sub _remove_relationship {
137         my( $self, @vector ) = @_;
138         $self->graph->delete_edge( @vector );
139 }
140         
141 =head2 create
142
143 Create a new relationship with the given options and return it.
144 Warn and return undef if the relationship cannot be created.
145
146 =cut
147
148 sub create {
149         my( $self, $options ) = @_;
150         # Check to see if a relationship exists between the two given readings
151         my $source = delete $options->{'orig_a'};
152         my $target = delete $options->{'orig_b'};
153         my $rel = $self->get_relationship( $source, $target );
154         if( $rel ) {
155                 if( $rel->type eq 'collated' ) {
156                         # Always replace a 'collated' relationship with a more descriptive
157                         # one, if asked.
158                         $self->del_relationship( $source, $target );
159                 } elsif( $rel->type ne $options->{'type'} ) {
160                         throw( "Another relationship of type " . $rel->type 
161                                 . " already exists between $source and $target" );
162                 } else {
163                         return $rel;
164                 }
165         }
166         
167         # Check to see if a nonlocal relationship is defined for the two readings
168         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
169                 $options->{'reading_b'} );
170         if( $rel && $rel->type eq $options->{'type'} ) {
171                 return $rel;
172         } elsif( $rel ) {
173                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
174         } else {
175                 $rel = Text::Tradition::Collation::Relationship->new( $options );
176                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
177                 return $rel;
178         }
179 }
180
181 =head2 add_scoped_relationship( $rel )
182
183 Keep track of relationships defined between specific readings that are scoped
184 non-locally.  Key on whichever reading occurs first alphabetically.
185
186 =cut
187
188 sub add_scoped_relationship {
189         my( $self, $rel ) = @_;
190         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
191         if( $r ) {
192                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
193                         $r->type, $rel->reading_a, $rel->reading_b );
194                 return;
195         }
196         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
197 }
198
199 =head2 scoped_relationship( $reading_a, $reading_b )
200
201 Returns the general (document-level or global) relationship that has been defined 
202 between the two reading strings. Returns undef if there is no general relationship.
203
204 =cut
205
206 sub scoped_relationship {
207         my( $self, $rdga, $rdgb ) = @_;
208         my( $first, $second ) = sort( $rdga, $rdgb );
209         if( exists $self->scopedrels->{$first}->{$second} ) {
210                 return $self->scopedrels->{$first}->{$second};
211         } else {
212                 return undef;
213         }
214 }
215
216 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
217
218 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
219 for the possible options) between the readings given in $source and $target.  Sets
220 up a scoped relationship between $sourcetext and $targettext if the relationship is
221 scoped non-locally.
222
223 Returns a status boolean and a list of all reading pairs connected by the call to
224 add_relationship.
225
226 =cut
227
228 sub add_relationship {
229         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
230
231         # Check the options
232         $options->{'scope'} = 'local' unless $options->{'scope'};
233         
234         my( $is_valid, $reason ) = 
235                 $self->relationship_valid( $source, $target, $options->{'type'} );
236     unless( $is_valid ) {
237         throw( "Invalid relationship: $reason" );
238     }
239     
240     # Try to create the relationship object.
241     $options->{'reading_a'} = $source_rdg->text;
242     $options->{'reading_b'} = $target_rdg->text;
243     $options->{'orig_a'} = $source;
244     $options->{'orig_b'} = $target;
245     my $relationship = $self->create( $options );  # Will throw on error
246
247         # Find all the pairs for which we need to set the relationship.
248         my @vectors = ( [ $source, $target ] ); 
249     if( $relationship->colocated && $relationship->nonlocal ) {
250         my $c = $self->collation;
251         # Set the same relationship everywhere we can, throughout the graph.
252         my @identical_readings = grep { $_->text eq $relationship->reading_a }
253                 $c->readings;
254         foreach my $ir ( @identical_readings ) {
255                 next if $ir->id eq $source;
256                 # Check to see if there is a target reading with the same text at
257                 # the same rank.
258                 my @itarget = grep 
259                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
260                         $c->readings;
261                 if( @itarget ) {
262                         # We found a hit.
263                         warn "More than one reading with text " . $target_rdg->text
264                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
265                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
266                 }
267         }       
268     }
269     
270     # Now set the relationship(s).
271     my @pairs_set;
272     foreach my $v ( @vectors ) {
273                 my $rel = $self->get_relationship( @$v );
274         if( $rel ) {
275                 if( $rel->nonlocal ) {
276                         throw( "Found conflicting relationship at @$v" );
277                 } else {
278                         warn "Not overriding local relationship set at @$v";
279                 }
280                 next;
281         }
282         $self->_set_relationship( $relationship, @$v );
283         push( @pairs_set, $v );
284     }
285     
286     return @pairs_set;
287 }
288
289 =head2 del_relationship( $source, $target )
290
291 Removes the relationship between the given readings. If the relationship is
292 non-local, removes the relationship everywhere in the graph.
293
294 =cut
295
296 sub del_relationship {
297         my( $self, $source, $target ) = @_;
298         my $rel = $self->get_relationship( $source, $target );
299         throw( "No relationship defined between $source and $target" ) unless $rel;
300         my @vectors = ( [ $source, $target ] );
301         $self->_remove_relationship( $source, $target );
302         if( $rel->nonlocal ) {
303                 # Remove the relationship wherever it occurs.
304                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
305                         $self->relationships;
306                 foreach my $re ( @rel_edges ) {
307                         $self->_remove_relationship( @$re );
308                         push( @vectors, $re );
309                 }
310         }
311         return @vectors;
312 }
313
314 =head2 relationship_valid( $source, $target, $type )
315
316 Checks whether a relationship of type $type may exist between the readings given
317 in $source and $target.  Returns a tuple of ( status, message ) where status is
318 a yes/no boolean and, if the answer is no, message gives the reason why.
319
320 =cut
321
322 sub relationship_valid {
323     my( $self, $source, $target, $rel ) = @_;
324     my $c = $self->collation;
325     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
326                 # Check that the two readings do (for a repetition) or do not (for
327                 # a transposition) appear in the same witness.
328                 # TODO this might be called before witness paths are set...
329                 my %seen_wits;
330                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
331                 foreach my $w ( $c->reading_witnesses( $target ) ) {
332                         if( $seen_wits{$w} ) {
333                                 return ( 0, "Readings both occur in witness $w" ) 
334                                         if $rel eq 'transposition';
335                                 return ( 1, "ok" ) if $rel eq 'repetition';
336                 }
337                 return $rel eq 'transposition' ? ( 1, "ok" )
338                         : ( 0, "Readings occur only in distinct witnesses" );
339                 }
340         } else {
341                 # Check that linking the source and target in a relationship won't lead
342                 # to a path loop for any witness.  If they have the same rank then fine.
343                 return( 1, "ok" ) 
344                         if $c->reading( $source )->has_rank
345                                 && $c->reading( $target )->has_rank
346                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
347                 
348                 # Otherwise, first make a lookup table of all the
349                 # readings related to either the source or the target.
350                 my @proposed_related = ( $source, $target );
351                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
352                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
353                 my %pr_ids;
354                 map { $pr_ids{ $_ } = 1 } @proposed_related;
355         
356                 # The cumulative predecessors and successors of the proposed-related readings
357                 # should not overlap.
358                 my %all_pred;
359                 my %all_succ;
360                 foreach my $pr ( keys %pr_ids ) {
361                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
362                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
363                 }
364                 foreach my $k ( keys %all_pred ) {
365                         return( 0, "Relationship would create witness loop" )
366                                 if exists $all_succ{$k};
367                 }
368                 foreach my $k ( keys %pr_ids ) {
369                         return( 0, "Relationship would create witness loop" )
370                                 if exists $all_pred{$k} || exists $all_succ{$k};
371                 }
372                 return ( 1, "ok" );
373         }
374 }
375
376 =head2 related_readings( $reading, $filter )
377
378 Returns a list of readings that are connected via relationship links to $reading.
379 If $filter is set to a subroutine ref, returns only those related readings where
380 $filter( $relationship ) returns a true value.
381
382 =cut
383
384 sub related_readings {
385         my( $self, $reading, $filter ) = @_;
386         my $return_object;
387         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
388                 $reading = $reading->id;
389                 $return_object = 1;
390         }
391         my @answer;
392         if( $filter ) {
393                 # Backwards compat
394                 if( $filter eq 'colocated' ) {
395                         $filter = sub { $_[0]->colocated };
396                 }
397                 my %found = ( $reading => 1 );
398                 my $check = [ $reading ];
399                 my $iter = 0;
400                 while( @$check ) {
401                         my $more = [];
402                         foreach my $r ( @$check ) {
403                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
404                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
405                                                 push( @$more, $nr ) unless exists $found{$nr};
406                                                 $found{$nr} = 1;
407                                         }
408                                 }
409                         }
410                         $check = $more;
411                 }
412                 delete $found{$reading};
413                 @answer = keys %found;
414         } else {
415                 @answer = $self->graph->all_reachable( $reading );
416         }
417         if( $return_object ) {
418                 my $c = $self->collation;
419                 return map { $c->reading( $_ ) } @answer;
420         } else {
421                 return @answer;
422         }
423 }
424
425 =head2 merge_readings( $kept, $deleted );
426
427 Makes a best-effort merge of the relationship links between the given readings, and
428 stops tracking the to-be-deleted reading.
429
430 =cut
431
432 sub merge_readings {
433         my( $self, $kept, $deleted, $combined ) = @_;
434         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
435                 # Get the pair of kept / rel
436                 my @vector = ( $kept );
437                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
438                 next if $vector[0] eq $vector[1]; # Don't add a self loop
439                 
440                 # If kept changes its text, drop the relationship.
441                 next if $combined;
442                         
443                 # If kept / rel already has a relationship, warn and keep the old
444                 my $rel = $self->get_relationship( @vector );
445                 if( $rel ) {
446                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
447                         next;
448                 }
449                 
450                 # Otherwise, adopt the relationship that would be deleted.
451                 $rel = $self->get_relationship( @$edge );
452                 $self->_set_relationship( $rel, @vector );
453         }
454         $self->delete_reading( $deleted );
455 }
456
457 sub _as_graphml { 
458         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
459         
460     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
461         $rgraph->setAttribute( 'edgedefault', 'directed' );
462     $rgraph->setAttribute( 'id', 'relationships', );
463     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
464     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
465     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
466     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
467     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
468     
469     # Add the vertices according to their XML IDs
470     my %rdg_lookup = ( reverse %$node_hash );
471     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
472         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
473         $n_el->setAttribute( 'id', $n );
474         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
475     }
476     
477     # Add the relationship edges, with their object information
478     my $edge_ctr = 0;
479     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
480         # Add an edge and fill in its relationship info.
481                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
482                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
483                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
484                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
485
486                 my $rel_obj = $self->get_relationship( @$e );
487                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
488                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
489                 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
490                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
491                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
492                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
493                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
494         }
495 }
496
497 sub _by_xmlid {
498         my $tmp_a = $a;
499         my $tmp_b = $b;
500         $tmp_a =~ s/\D//g;
501         $tmp_b =~ s/\D//g;
502         return $tmp_a <=> $tmp_b;
503 }
504
505 sub _add_graphml_data {
506     my( $el, $key, $value ) = @_;
507     return unless defined $value;
508     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
509     $data_el->setAttribute( 'key', $key );
510     $data_el->appendText( $value );
511 }
512
513 sub throw {
514         Text::Tradition::Error->throw( 
515                 'ident' => 'Relationship error',
516                 'message' => $_[0],
517                 );
518 }
519
520 no Moose;
521 __PACKAGE__->meta->make_immutable;
522
523 1;