add facilities for removing 'collated' relationship, and for annotation
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26
27 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
28
29 =end testing
30
31 =head1 METHODS
32
33 =head2 new( collation => $collation );
34
35 Creates a new relationship store for the given collation.
36
37 =cut
38
39 has 'collation' => (
40         is => 'ro',
41         isa => 'Text::Tradition::Collation',
42         required => 1,
43         weak_ref => 1,
44         );
45
46 has 'scopedrels' => (
47         is => 'ro',
48         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
49         default => sub { {} },
50         );
51
52 has 'graph' => (
53         is => 'ro',
54         isa => 'Graph',
55         default => sub { Graph->new( undirected => 1 ) },
56     handles => {
57         relationships => 'edges',
58         add_reading => 'add_vertex',
59         delete_reading => 'delete_vertex',
60         delete_relationship => 'delete_edge',
61     },
62         );
63         
64 around 'delete_relationship' => sub {
65         my $orig = shift;
66         my $self = shift;
67         my @vector;
68         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
69                 # Dereference the edge arrayref that was passed.
70                 my $edge = shift;
71                 @vector = @$edge;
72         } else {
73                 @vector = @_;
74         }
75         return $self->$orig( @vector );
76 };
77         
78 =head2 get_relationship
79
80 Return the relationship object, if any, that exists between two readings.
81
82 =cut
83
84 sub get_relationship {
85         my $self = shift;
86         my @vector;
87         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
88                 # Dereference the edge arrayref that was passed.
89                 my $edge = shift;
90                 @vector = @$edge;
91         } else {
92                 @vector = @_;
93         }
94         my $relationship;
95         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
96                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
97         }
98         return $relationship;
99 }
100
101 sub _set_relationship {
102         my( $self, $relationship, @vector ) = @_;
103         $self->graph->add_edge( @vector );
104         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
105 }
106
107 sub _remove_relationship {
108         my( $self, @vector ) = @_;
109         $self->graph->delete_edge( @vector );
110 }
111         
112 =head2 create
113
114 Create a new relationship with the given options and return it.
115 Warn and return undef if the relationship cannot be created.
116
117 =cut
118
119 sub create {
120         my( $self, $options ) = @_;
121         # Check to see if a relationship exists between the two given readings
122         my $source = delete $options->{'orig_a'};
123         my $target = delete $options->{'orig_b'};
124         my $rel = $self->get_relationship( $source, $target );
125         if( $rel ) {
126                 if( $rel->type ne $options->{'type'} ) {
127                         throw( "Another relationship of type " . $rel->type 
128                                 . " already exists between $source and $target" );
129                 } else {
130                         return $rel;
131                 }
132         }
133         
134         # Check to see if a nonlocal relationship is defined for the two readings
135         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
136                 $options->{'reading_b'} );
137         if( $rel && $rel->type eq $options->{'type'} ) {
138                 return $rel;
139         } elsif( $rel ) {
140                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
141         } else {
142                 $rel = Text::Tradition::Collation::Relationship->new( $options );
143                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
144                 return $rel;
145         }
146 }
147
148 =head2 add_scoped_relationship( $rel )
149
150 Keep track of relationships defined between specific readings that are scoped
151 non-locally.  Key on whichever reading occurs first alphabetically.
152
153 =cut
154
155 sub add_scoped_relationship {
156         my( $self, $rel ) = @_;
157         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
158         if( $r ) {
159                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
160                         $r->type, $rel->reading_a, $rel->reading_b );
161                 return;
162         }
163         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
164 }
165
166 =head2 scoped_relationship( $reading_a, $reading_b )
167
168 Returns the general (document-level or global) relationship that has been defined 
169 between the two reading strings. Returns undef if there is no general relationship.
170
171 =cut
172
173 sub scoped_relationship {
174         my( $self, $rdga, $rdgb ) = @_;
175         my( $first, $second ) = sort( $rdga, $rdgb );
176         if( exists $self->scopedrels->{$first}->{$second} ) {
177                 return $self->scopedrels->{$first}->{$second};
178         } else {
179                 return undef;
180         }
181 }
182
183 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
184
185 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
186 for the possible options) between the readings given in $source and $target.  Sets
187 up a scoped relationship between $sourcetext and $targettext if the relationship is
188 scoped non-locally.
189
190 Returns a status boolean and a list of all reading pairs connected by the call to
191 add_relationship.
192
193 =cut
194
195 sub add_relationship {
196         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
197
198         # Check the options
199         $options->{'scope'} = 'local' unless $options->{'scope'};
200         
201         my( $is_valid, $reason ) = 
202                 $self->relationship_valid( $source, $target, $options->{'type'} );
203     unless( $is_valid ) {
204         throw( "Invalid relationship: $reason" );
205     }
206     
207     # Try to create the relationship object.
208     $options->{'reading_a'} = $source_rdg->text;
209     $options->{'reading_b'} = $target_rdg->text;
210     $options->{'orig_a'} = $source;
211     $options->{'orig_b'} = $target;
212     my $relationship = $self->create( $options );  # Will throw on error
213
214         # Find all the pairs for which we need to set the relationship.
215         my @vectors = ( [ $source, $target ] ); 
216     if( $relationship->colocated && $relationship->nonlocal ) {
217         my $c = $self->collation;
218         # Set the same relationship everywhere we can, throughout the graph.
219         my @identical_readings = grep { $_->text eq $relationship->reading_a }
220                 $c->readings;
221         foreach my $ir ( @identical_readings ) {
222                 next if $ir->id eq $source;
223                 # Check to see if there is a target reading with the same text at
224                 # the same rank.
225                 my @itarget = grep 
226                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
227                         $c->readings;
228                 if( @itarget ) {
229                         # We found a hit.
230                         warn "More than one reading with text " . $target_rdg->text
231                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
232                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
233                 }
234         }       
235     }
236     
237     # Now set the relationship(s).
238     my @pairs_set;
239     foreach my $v ( @vectors ) {
240                 my $rel = $self->get_relationship( @$v );
241         if( $rel ) {
242                 if( $rel->nonlocal ) {
243                         throw( "Found conflicting relationship at @$v" );
244                 } else {
245                         warn "Not overriding local relationship set at @$v";
246                 }
247                 next;
248         }
249         $self->_set_relationship( $relationship, @$v );
250         push( @pairs_set, $v );
251     }
252     
253     return @pairs_set;
254 }
255
256 =head2 relationship_valid( $source, $target, $type )
257
258 Checks whether a relationship of type $type may exist between the readings given
259 in $source and $target.  Returns a tuple of ( status, message ) where status is
260 a yes/no boolean and, if the answer is no, message gives the reason why.
261
262 =cut
263
264 sub relationship_valid {
265     my( $self, $source, $target, $rel ) = @_;
266     my $c = $self->collation;
267     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
268                 # Check that the two readings do (for a repetition) or do not (for
269                 # a transposition) appear in the same witness.
270                 my %seen_wits;
271                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
272                 foreach my $w ( $c->reading_witnesses( $target ) ) {
273                         if( $seen_wits{$w} ) {
274                                 return ( 0, "Readings both occur in witness $w" ) 
275                                         if $rel eq 'transposition';
276                                 return ( 1, "ok" ) if $rel eq 'repetition';
277                 }
278                 return $rel eq 'transposition' ? ( 1, "ok" )
279                         : ( 0, "Readings occur only in distinct witnesses" );
280                 }
281         } else {
282                 # Check that linking the source and target in a relationship won't lead
283                 # to a path loop for any witness.  If they have the same rank then fine.
284                 return( 1, "ok" ) 
285                         if $c->reading( $source )->has_rank
286                                 && $c->reading( $target )->has_rank
287                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
288                 
289                 # Otherwise, first make a lookup table of all the
290                 # readings related to either the source or the target.
291                 my @proposed_related = ( $source, $target );
292                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
293                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
294                 my %pr_ids;
295                 map { $pr_ids{ $_ } = 1 } @proposed_related;
296         
297                 # The cumulative predecessors and successors of the proposed-related readings
298                 # should not overlap.
299                 my %all_pred;
300                 my %all_succ;
301                 foreach my $pr ( keys %pr_ids ) {
302                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
303                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
304                 }
305                 foreach my $k ( keys %all_pred ) {
306                         return( 0, "Relationship would create witness loop" )
307                                 if exists $all_succ{$k};
308                 }
309                 foreach my $k ( keys %pr_ids ) {
310                         return( 0, "Relationship would create witness loop" )
311                                 if exists $all_pred{$k} || exists $all_succ{$k};
312                 }
313                 return ( 1, "ok" );
314         }
315 }
316
317 =head2 related_readings( $reading, $colocated_only )
318
319 Returns a list of readings that are connected via relationship links to $reading.
320 If $colocated_only is true, restricts the list to those readings that are in the
321 same logical location (and therefore have the same rank in the collation graph.)
322
323 =cut
324
325 sub related_readings {
326         my( $self, $reading, $colocated ) = @_;
327         my $return_object;
328         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
329                 $reading = $reading->id;
330                 $return_object = 1;
331         }
332         my @answer;
333         if( $colocated ) {
334                 my %found = ( $reading => 1 );
335                 my $check = [ $reading ];
336                 my $iter = 0;
337                 while( @$check ) {
338                         my $more = [];
339                         foreach my $r ( @$check ) {
340                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
341                                         if( $self->get_relationship( $r, $nr )->colocated ) {
342                                                 push( @$more, $nr ) unless exists $found{$nr};
343                                                 $found{$nr} = 1;
344                                         }
345                                 }
346                         }
347                         $check = $more;
348                 }
349                 @answer = keys %found;
350         } else {
351                 @answer = $self->graph->all_reachable( $reading );
352         }
353         if( $return_object ) {
354                 my $c = $self->collation;
355                 return map { $c->reading( $_ ) } @answer;
356         } else {
357                 return @answer;
358         }
359 }
360
361 =head2 merge_readings( $kept, $deleted );
362
363 Makes a best-effort merge of the relationship links between the given readings, and
364 stops tracking the to-be-deleted reading.
365
366 =cut
367
368 sub merge_readings {
369         my( $self, $kept, $deleted, $combined ) = @_;
370         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
371                 # Get the pair of kept / rel
372                 my @vector = ( $kept );
373                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
374                 next if $vector[0] eq $vector[1]; # Don't add a self loop
375                 
376                 # If kept changes its text, drop the relationship.
377                 next if $combined;
378                         
379                 # If kept / rel already has a relationship, warn and keep the old
380                 my $rel = $self->get_relationship( @vector );
381                 if( $rel ) {
382                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
383                         next;
384                 }
385                 
386                 # Otherwise, adopt the relationship that would be deleted.
387                 $rel = $self->get_relationship( @$edge );
388                 $self->_set_relationship( $rel, @vector );
389         }
390         $self->delete_reading( $deleted );
391 }
392
393 sub _as_graphml { 
394         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
395         
396     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
397         $rgraph->setAttribute( 'edgedefault', 'directed' );
398     $rgraph->setAttribute( 'id', 'relationships', );
399     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
400     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
401     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
402     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
403     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
404     
405     # Add the vertices according to their XML IDs
406     my %rdg_lookup = ( reverse %$node_hash );
407     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
408         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
409         $n_el->setAttribute( 'id', $n );
410         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
411     }
412     
413     # Add the relationship edges, with their object information
414     my $edge_ctr = 0;
415     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
416         # Add an edge and fill in its relationship info.
417                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
418                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
419                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
420                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
421
422                 my $rel_obj = $self->get_relationship( @$e );
423                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
424                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
425                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
426                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
427                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
428                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
429         }
430 }
431
432 sub _by_xmlid {
433         my $tmp_a = $a;
434         my $tmp_b = $b;
435         $tmp_a =~ s/\D//g;
436         $tmp_b =~ s/\D//g;
437         return $tmp_a <=> $tmp_b;
438 }
439
440 sub _add_graphml_data {
441     my( $el, $key, $value ) = @_;
442     return unless defined $value;
443     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
444     $data_el->setAttribute( 'key', $key );
445     $data_el->appendText( $value );
446 }
447
448 sub throw {
449         Text::Tradition::Error->throw( 
450                 'ident' => 'Relationship error',
451                 'message' => $_[0],
452                 );
453 }
454
455 no Moose;
456 __PACKAGE__->meta->make_immutable;
457
458 1;