a74c890733dfd9ed60c9cfd40aea82e92645297e
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26
27 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
28
29 =end testing
30
31 =head1 METHODS
32
33 =head2 new( collation => $collation );
34
35 Creates a new relationship store for the given collation.
36
37 =cut
38
39 has 'collation' => (
40         is => 'ro',
41         isa => 'Text::Tradition::Collation',
42         required => 1,
43         weak_ref => 1,
44         );
45
46 has 'scopedrels' => (
47         is => 'ro',
48         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
49         default => sub { {} },
50         );
51
52 has 'graph' => (
53         is => 'ro',
54         isa => 'Graph',
55         default => sub { Graph->new( undirected => 1 ) },
56     handles => {
57         relationships => 'edges',
58         add_reading => 'add_vertex',
59         delete_reading => 'delete_vertex',
60     },
61         );
62         
63 =head2 get_relationship
64
65 Return the relationship object, if any, that exists between two readings.
66
67 =cut
68
69 sub get_relationship {
70         my( $self, @vector ) = @_;
71         my $relationship;
72         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
73                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
74         }
75         return $relationship;
76 }
77
78 sub _set_relationship {
79         my( $self, $relationship, @vector ) = @_;
80         $self->graph->add_edge( @vector );
81         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
82 }
83
84 sub _remove_relationship {
85         my( $self, @vector ) = @_;
86         $self->graph->delete_edge( @vector );
87 }
88         
89 =head2 create
90
91 Create a new relationship with the given options and return it.
92 Warn and return undef if the relationship cannot be created.
93
94 =cut
95
96 sub create {
97         my( $self, $options ) = @_;
98         # Check to see if a relationship exists between the two given readings
99         my $source = delete $options->{'orig_a'};
100         my $target = delete $options->{'orig_b'};
101         my $rel = $self->get_relationship( $source, $target );
102         if( $rel ) {
103                 if( $rel->type ne $options->{'type'} ) {
104                         throw( "Another relationship of type " . $rel->type 
105                                 . " already exists between $source and $target" );
106                 } else {
107                         return $rel;
108                 }
109         }
110         
111         # Check to see if a nonlocal relationship is defined for the two readings
112         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
113                 $options->{'reading_b'} );
114         if( $rel && $rel->type eq $options->{'type'} ) {
115                 return $rel;
116         } elsif( $rel ) {
117                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
118         } else {
119                 $rel = Text::Tradition::Collation::Relationship->new( $options );
120                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
121                 return $rel;
122         }
123 }
124
125 =head2 add_scoped_relationship( $rel )
126
127 Keep track of relationships defined between specific readings that are scoped
128 non-locally.  Key on whichever reading occurs first alphabetically.
129
130 =cut
131
132 sub add_scoped_relationship {
133         my( $self, $rel ) = @_;
134         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
135         if( $r ) {
136                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
137                         $r->type, $rel->reading_a, $rel->reading_b );
138                 return;
139         }
140         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
141 }
142
143 =head2 scoped_relationship( $reading_a, $reading_b )
144
145 Returns the general (document-level or global) relationship that has been defined 
146 between the two reading strings. Returns undef if there is no general relationship.
147
148 =cut
149
150 sub scoped_relationship {
151         my( $self, $rdga, $rdgb ) = @_;
152         my( $first, $second ) = sort( $rdga, $rdgb );
153         if( exists $self->scopedrels->{$first}->{$second} ) {
154                 return $self->scopedrels->{$first}->{$second};
155         } else {
156                 return undef;
157         }
158 }
159
160 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
161
162 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
163 for the possible options) between the readings given in $source and $target.  Sets
164 up a scoped relationship between $sourcetext and $targettext if the relationship is
165 scoped non-locally.
166
167 Returns a status boolean and a list of all reading pairs connected by the call to
168 add_relationship.
169
170 =cut
171
172 sub add_relationship {
173         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
174
175         # Check the options
176         $options->{'scope'} = 'local' unless $options->{'scope'};
177         
178         my( $is_valid, $reason ) = 
179                 $self->relationship_valid( $source, $target, $options->{'type'} );
180     unless( $is_valid ) {
181         throw( "Invalid relationship: $reason" );
182     }
183     
184     # Try to create the relationship object.
185     $options->{'reading_a'} = $source_rdg->text;
186     $options->{'reading_b'} = $target_rdg->text;
187     $options->{'orig_a'} = $source;
188     $options->{'orig_b'} = $target;
189     my $relationship = $self->create( $options );  # Will throw on error
190
191         # Find all the pairs for which we need to set the relationship.
192         my @vectors = ( [ $source, $target ] ); 
193     if( $relationship->colocated && $relationship->nonlocal ) {
194         my $c = $self->collation;
195         # Set the same relationship everywhere we can, throughout the graph.
196         my @identical_readings = grep { $_->text eq $relationship->reading_a }
197                 $c->readings;
198         foreach my $ir ( @identical_readings ) {
199                 next if $ir->id eq $source;
200                 # Check to see if there is a target reading with the same text at
201                 # the same rank.
202                 my @itarget = grep 
203                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
204                         $c->readings;
205                 if( @itarget ) {
206                         # We found a hit.
207                         warn "More than one reading with text " . $target_rdg->text
208                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
209                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
210                 }
211         }       
212     }
213     
214     # Now set the relationship(s).
215     my @pairs_set;
216     foreach my $v ( @vectors ) {
217                 my $rel = $self->get_relationship( @$v );
218         if( $rel ) {
219                 if( $rel->nonlocal ) {
220                         throw( "Found conflicting relationship at @$v" );
221                 } else {
222                         warn "Not overriding local relationship set at @$v";
223                 }
224                 next;
225         }
226         $self->_set_relationship( $relationship, @$v );
227         push( @pairs_set, $v );
228     }
229     
230     return @pairs_set;
231 }
232
233 =head2 relationship_valid( $source, $target, $type )
234
235 Checks whether a relationship of type $type may exist between the readings given
236 in $source and $target.  Returns a tuple of ( status, message ) where status is
237 a yes/no boolean and, if the answer is no, message gives the reason why.
238
239 =cut
240
241 sub relationship_valid {
242     my( $self, $source, $target, $rel ) = @_;
243     my $c = $self->collation;
244     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
245                 # Check that the two readings do (for a repetition) or do not (for
246                 # a transposition) appear in the same witness.
247                 my %seen_wits;
248                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
249                 foreach my $w ( $c->reading_witnesses( $target ) ) {
250                         if( $seen_wits{$w} ) {
251                                 return ( 0, "Readings both occur in witness $w" ) 
252                                         if $rel eq 'transposition';
253                                 return ( 1, "ok" ) if $rel eq 'repetition';
254                 }
255                 return $rel eq 'transposition' ? ( 1, "ok" )
256                         : ( 0, "Readings occur only in distinct witnesses" );
257                 }
258         } else {
259                 # Check that linking the source and target in a relationship won't lead
260                 # to a path loop for any witness.  If they have the same rank then fine.
261                 return( 1, "ok" ) 
262                         if $c->reading( $source )->has_rank
263                                 && $c->reading( $target )->has_rank
264                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
265                 
266                 # Otherwise, first make a lookup table of all the
267                 # readings related to either the source or the target.
268                 my @proposed_related = ( $source, $target );
269                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
270                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
271                 my %pr_ids;
272                 map { $pr_ids{ $_ } = 1 } @proposed_related;
273         
274                 # The cumulative predecessors and successors of the proposed-related readings
275                 # should not overlap.
276                 my %all_pred;
277                 my %all_succ;
278                 foreach my $pr ( keys %pr_ids ) {
279                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
280                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
281                 }
282                 foreach my $k ( keys %all_pred ) {
283                         return( 0, "Relationship would create witness loop" )
284                                 if exists $all_succ{$k};
285                 }
286                 foreach my $k ( keys %pr_ids ) {
287                         return( 0, "Relationship would create witness loop" )
288                                 if exists $all_pred{$k} || exists $all_succ{$k};
289                 }
290                 return ( 1, "ok" );
291         }
292 }
293
294 =head2 related_readings( $reading, $colocated_only )
295
296 Returns a list of readings that are connected via relationship links to $reading.
297 If $colocated_only is true, restricts the list to those readings that are in the
298 same logical location (and therefore have the same rank in the collation graph.)
299
300 =cut
301
302 sub related_readings {
303         my( $self, $reading, $colocated ) = @_;
304         my $return_object;
305         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
306                 $reading = $reading->id;
307                 $return_object = 1;
308         }
309         my @answer;
310         if( $colocated ) {
311                 my %found = ( $reading => 1 );
312                 my $check = [ $reading ];
313                 my $iter = 0;
314                 while( @$check ) {
315                         my $more = [];
316                         foreach my $r ( @$check ) {
317                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
318                                         if( $self->get_relationship( $r, $nr )->colocated ) {
319                                                 push( @$more, $nr ) unless exists $found{$nr};
320                                                 $found{$nr} = 1;
321                                         }
322                                 }
323                         }
324                         $check = $more;
325                 }
326                 @answer = keys %found;
327         } else {
328                 @answer = $self->graph->all_reachable( $reading );
329         }
330         if( $return_object ) {
331                 my $c = $self->collation;
332                 return map { $c->reading( $_ ) } @answer;
333         } else {
334                 return @answer;
335         }
336 }
337
338 =head2 merge_readings( $kept, $deleted );
339
340 Makes a best-effort merge of the relationship links between the given readings, and
341 stops tracking the to-be-deleted reading.
342
343 =cut
344
345 sub merge_readings {
346         my( $self, $kept, $deleted, $combined ) = @_;
347         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
348                 # Get the pair of kept / rel
349                 my @vector = ( $kept );
350                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
351                 next if $vector[0] eq $vector[1]; # Don't add a self loop
352                 
353                 # If kept changes its text, drop the relationship.
354                 next if $combined;
355                         
356                 # If kept / rel already has a relationship, warn and keep the old
357                 my $rel = $self->get_relationship( @vector );
358                 if( $rel ) {
359                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
360                         next;
361                 }
362                 
363                 # Otherwise, adopt the relationship that would be deleted.
364                 $rel = $self->get_relationship( @$edge );
365                 $self->_set_relationship( $rel, @vector );
366         }
367         $self->delete_reading( $deleted );
368 }
369
370 sub _as_graphml { 
371         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
372         
373     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
374         $rgraph->setAttribute( 'edgedefault', 'directed' );
375     $rgraph->setAttribute( 'id', 'relationships', );
376     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
377     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
378     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
379     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
380     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
381     
382     # Add the vertices according to their XML IDs
383     my %rdg_lookup = ( reverse %$node_hash );
384     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
385         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
386         $n_el->setAttribute( 'id', $n );
387         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
388     }
389     
390     # Add the relationship edges, with their object information
391     my $edge_ctr = 0;
392     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
393         # Add an edge and fill in its relationship info.
394                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
395                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
396                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
397                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
398
399                 my $rel_obj = $self->get_relationship( @$e );
400                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
401                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
402                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
403                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
404                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
405                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
406         }
407 }
408
409 sub _by_xmlid {
410         my $tmp_a = $a;
411         my $tmp_b = $b;
412         $tmp_a =~ s/\D//g;
413         $tmp_b =~ s/\D//g;
414         return $tmp_a <=> $tmp_b;
415 }
416
417 sub _add_graphml_data {
418     my( $el, $key, $value ) = @_;
419     return unless defined $value;
420     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
421     $data_el->setAttribute( 'key', $key );
422     $data_el->appendText( $value );
423 }
424
425 sub throw {
426         Text::Tradition::Error->throw( 
427                 'ident' => 'Relationship error',
428                 'message' => $_[0],
429                 );
430 }
431
432 no Moose;
433 __PACKAGE__->meta->make_immutable;
434
435 1;