keep from stripping meta readings, oops
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26
27 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
28
29 =end testing
30
31 =head1 METHODS
32
33 =head2 new( collation => $collation );
34
35 Creates a new relationship store for the given collation.
36
37 =cut
38
39 has 'collation' => (
40         is => 'ro',
41         isa => 'Text::Tradition::Collation',
42         required => 1,
43         weak_ref => 1,
44         );
45
46 has 'scopedrels' => (
47         is => 'ro',
48         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
49         default => sub { {} },
50         );
51
52 has 'graph' => (
53         is => 'ro',
54         isa => 'Graph',
55         default => sub { Graph->new( undirected => 1 ) },
56     handles => {
57         relationships => 'edges',
58         add_reading => 'add_vertex',
59         delete_reading => 'delete_vertex',
60         delete_relationship => 'delete_edge',
61     },
62         );
63         
64 around 'delete_relationship' => sub {
65         my $orig = shift;
66         my $self = shift;
67         my @vector;
68         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
69                 # Dereference the edge arrayref that was passed.
70                 my $edge = shift;
71                 @vector = @$edge;
72         } else {
73                 @vector = @_;
74         }
75         return $self->$orig( @vector );
76 };
77         
78 =head2 get_relationship
79
80 Return the relationship object, if any, that exists between two readings.
81
82 =cut
83
84 sub get_relationship {
85         my $self = shift;
86         my @vector;
87         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
88                 # Dereference the edge arrayref that was passed.
89                 my $edge = shift;
90                 @vector = @$edge;
91         } else {
92                 @vector = @_;
93         }
94         my $relationship;
95         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
96                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
97         }
98         return $relationship;
99 }
100
101 sub _set_relationship {
102         my( $self, $relationship, @vector ) = @_;
103         $self->graph->add_edge( @vector );
104         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
105 }
106
107 sub _remove_relationship {
108         my( $self, @vector ) = @_;
109         $self->graph->delete_edge( @vector );
110 }
111         
112 =head2 create
113
114 Create a new relationship with the given options and return it.
115 Warn and return undef if the relationship cannot be created.
116
117 =cut
118
119 sub create {
120         my( $self, $options ) = @_;
121         # Check to see if a relationship exists between the two given readings
122         my $source = delete $options->{'orig_a'};
123         my $target = delete $options->{'orig_b'};
124         my $rel = $self->get_relationship( $source, $target );
125         if( $rel ) {
126                 if( $rel->type ne $options->{'type'} ) {
127                         throw( "Another relationship of type " . $rel->type 
128                                 . " already exists between $source and $target" );
129                 } else {
130                         return $rel;
131                 }
132         }
133         
134         # Check to see if a nonlocal relationship is defined for the two readings
135         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
136                 $options->{'reading_b'} );
137         if( $rel && $rel->type eq $options->{'type'} ) {
138                 return $rel;
139         } elsif( $rel ) {
140                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
141         } else {
142                 $rel = Text::Tradition::Collation::Relationship->new( $options );
143                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
144                 return $rel;
145         }
146 }
147
148 =head2 add_scoped_relationship( $rel )
149
150 Keep track of relationships defined between specific readings that are scoped
151 non-locally.  Key on whichever reading occurs first alphabetically.
152
153 =cut
154
155 sub add_scoped_relationship {
156         my( $self, $rel ) = @_;
157         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
158         if( $r ) {
159                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
160                         $r->type, $rel->reading_a, $rel->reading_b );
161                 return;
162         }
163         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
164 }
165
166 =head2 scoped_relationship( $reading_a, $reading_b )
167
168 Returns the general (document-level or global) relationship that has been defined 
169 between the two reading strings. Returns undef if there is no general relationship.
170
171 =cut
172
173 sub scoped_relationship {
174         my( $self, $rdga, $rdgb ) = @_;
175         my( $first, $second ) = sort( $rdga, $rdgb );
176         if( exists $self->scopedrels->{$first}->{$second} ) {
177                 return $self->scopedrels->{$first}->{$second};
178         } else {
179                 return undef;
180         }
181 }
182
183 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
184
185 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
186 for the possible options) between the readings given in $source and $target.  Sets
187 up a scoped relationship between $sourcetext and $targettext if the relationship is
188 scoped non-locally.
189
190 Returns a status boolean and a list of all reading pairs connected by the call to
191 add_relationship.
192
193 =cut
194
195 sub add_relationship {
196         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
197
198         # Check the options
199         $options->{'scope'} = 'local' unless $options->{'scope'};
200         
201         my( $is_valid, $reason ) = 
202                 $self->relationship_valid( $source, $target, $options->{'type'} );
203     unless( $is_valid ) {
204         throw( "Invalid relationship: $reason" );
205     }
206     
207     # Try to create the relationship object.
208     $options->{'reading_a'} = $source_rdg->text;
209     $options->{'reading_b'} = $target_rdg->text;
210     $options->{'orig_a'} = $source;
211     $options->{'orig_b'} = $target;
212     my $relationship = $self->create( $options );  # Will throw on error
213
214         # Find all the pairs for which we need to set the relationship.
215         my @vectors = ( [ $source, $target ] ); 
216     if( $relationship->colocated && $relationship->nonlocal ) {
217         my $c = $self->collation;
218         # Set the same relationship everywhere we can, throughout the graph.
219         my @identical_readings = grep { $_->text eq $relationship->reading_a }
220                 $c->readings;
221         foreach my $ir ( @identical_readings ) {
222                 next if $ir->id eq $source;
223                 # Check to see if there is a target reading with the same text at
224                 # the same rank.
225                 my @itarget = grep 
226                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
227                         $c->readings;
228                 if( @itarget ) {
229                         # We found a hit.
230                         warn "More than one reading with text " . $target_rdg->text
231                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
232                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
233                 }
234         }       
235     }
236     
237     # Now set the relationship(s).
238     my @pairs_set;
239     foreach my $v ( @vectors ) {
240                 my $rel = $self->get_relationship( @$v );
241         if( $rel ) {
242                 if( $rel->nonlocal ) {
243                         throw( "Found conflicting relationship at @$v" );
244                 } else {
245                         warn "Not overriding local relationship set at @$v";
246                 }
247                 next;
248         }
249         $self->_set_relationship( $relationship, @$v );
250         push( @pairs_set, $v );
251     }
252     
253     return @pairs_set;
254 }
255
256 =head2 relationship_valid( $source, $target, $type )
257
258 Checks whether a relationship of type $type may exist between the readings given
259 in $source and $target.  Returns a tuple of ( status, message ) where status is
260 a yes/no boolean and, if the answer is no, message gives the reason why.
261
262 =cut
263
264 sub relationship_valid {
265     my( $self, $source, $target, $rel ) = @_;
266     my $c = $self->collation;
267     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
268                 # Check that the two readings do (for a repetition) or do not (for
269                 # a transposition) appear in the same witness.
270                 # TODO this might be called before witness paths are set...
271                 my %seen_wits;
272                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
273                 foreach my $w ( $c->reading_witnesses( $target ) ) {
274                         if( $seen_wits{$w} ) {
275                                 return ( 0, "Readings both occur in witness $w" ) 
276                                         if $rel eq 'transposition';
277                                 return ( 1, "ok" ) if $rel eq 'repetition';
278                 }
279                 return $rel eq 'transposition' ? ( 1, "ok" )
280                         : ( 0, "Readings occur only in distinct witnesses" );
281                 }
282         } else {
283                 # Check that linking the source and target in a relationship won't lead
284                 # to a path loop for any witness.  If they have the same rank then fine.
285                 return( 1, "ok" ) 
286                         if $c->reading( $source )->has_rank
287                                 && $c->reading( $target )->has_rank
288                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
289                 
290                 # Otherwise, first make a lookup table of all the
291                 # readings related to either the source or the target.
292                 my @proposed_related = ( $source, $target );
293                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
294                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
295                 my %pr_ids;
296                 map { $pr_ids{ $_ } = 1 } @proposed_related;
297         
298                 # The cumulative predecessors and successors of the proposed-related readings
299                 # should not overlap.
300                 my %all_pred;
301                 my %all_succ;
302                 foreach my $pr ( keys %pr_ids ) {
303                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
304                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
305                 }
306                 foreach my $k ( keys %all_pred ) {
307                         return( 0, "Relationship would create witness loop" )
308                                 if exists $all_succ{$k};
309                 }
310                 foreach my $k ( keys %pr_ids ) {
311                         return( 0, "Relationship would create witness loop" )
312                                 if exists $all_pred{$k} || exists $all_succ{$k};
313                 }
314                 return ( 1, "ok" );
315         }
316 }
317
318 =head2 related_readings( $reading, $colocated_only )
319
320 Returns a list of readings that are connected via relationship links to $reading.
321 If $colocated_only is true, restricts the list to those readings that are in the
322 same logical location (and therefore have the same rank in the collation graph.)
323
324 =cut
325
326 sub related_readings {
327         my( $self, $reading, $colocated ) = @_;
328         my $return_object;
329         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
330                 $reading = $reading->id;
331                 $return_object = 1;
332         }
333         my @answer;
334         if( $colocated ) {
335                 my %found = ( $reading => 1 );
336                 my $check = [ $reading ];
337                 my $iter = 0;
338                 while( @$check ) {
339                         my $more = [];
340                         foreach my $r ( @$check ) {
341                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
342                                         if( $self->get_relationship( $r, $nr )->colocated ) {
343                                                 push( @$more, $nr ) unless exists $found{$nr};
344                                                 $found{$nr} = 1;
345                                         }
346                                 }
347                         }
348                         $check = $more;
349                 }
350                 @answer = keys %found;
351         } else {
352                 @answer = $self->graph->all_reachable( $reading );
353         }
354         if( $return_object ) {
355                 my $c = $self->collation;
356                 return map { $c->reading( $_ ) } @answer;
357         } else {
358                 return @answer;
359         }
360 }
361
362 =head2 merge_readings( $kept, $deleted );
363
364 Makes a best-effort merge of the relationship links between the given readings, and
365 stops tracking the to-be-deleted reading.
366
367 =cut
368
369 sub merge_readings {
370         my( $self, $kept, $deleted, $combined ) = @_;
371         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
372                 # Get the pair of kept / rel
373                 my @vector = ( $kept );
374                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
375                 next if $vector[0] eq $vector[1]; # Don't add a self loop
376                 
377                 # If kept changes its text, drop the relationship.
378                 next if $combined;
379                         
380                 # If kept / rel already has a relationship, warn and keep the old
381                 my $rel = $self->get_relationship( @vector );
382                 if( $rel ) {
383                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
384                         next;
385                 }
386                 
387                 # Otherwise, adopt the relationship that would be deleted.
388                 $rel = $self->get_relationship( @$edge );
389                 $self->_set_relationship( $rel, @vector );
390         }
391         $self->delete_reading( $deleted );
392 }
393
394 sub _as_graphml { 
395         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
396         
397     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
398         $rgraph->setAttribute( 'edgedefault', 'directed' );
399     $rgraph->setAttribute( 'id', 'relationships', );
400     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
401     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
402     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
403     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
404     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
405     
406     # Add the vertices according to their XML IDs
407     my %rdg_lookup = ( reverse %$node_hash );
408     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
409         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
410         $n_el->setAttribute( 'id', $n );
411         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
412     }
413     
414     # Add the relationship edges, with their object information
415     my $edge_ctr = 0;
416     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
417         # Add an edge and fill in its relationship info.
418                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
419                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
420                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
421                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
422
423                 my $rel_obj = $self->get_relationship( @$e );
424                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
425                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
426                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
427                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
428                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
429                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
430         }
431 }
432
433 sub _by_xmlid {
434         my $tmp_a = $a;
435         my $tmp_b = $b;
436         $tmp_a =~ s/\D//g;
437         $tmp_b =~ s/\D//g;
438         return $tmp_a <=> $tmp_b;
439 }
440
441 sub _add_graphml_data {
442     my( $el, $key, $value ) = @_;
443     return unless defined $value;
444     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
445     $data_el->setAttribute( 'key', $key );
446     $data_el->appendText( $value );
447 }
448
449 sub throw {
450         Text::Tradition::Error->throw( 
451                 'ident' => 'Relationship error',
452                 'message' => $_[0],
453                 );
454 }
455
456 no Moose;
457 __PACKAGE__->meta->make_immutable;
458
459 1;