f33d12db2ee82c250e9d7a3b776ddf60843f1cba
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Collation::Relationship;
6
7 use Moose;
8
9 =head1 NAME
10
11 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
12 between readings in a given collation
13     
14 =head1 DESCRIPTION
15
16 Text::Tradition is a library for representation and analysis of collated
17 texts, particularly medieval ones.  The RelationshipStore is an internal object
18 of the collation, to keep track of the defined relationships (both specific and
19 general) between readings.
20
21 =begin testing
22
23 use Text::Tradition;
24
25 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
26
27 =end testing
28
29 =head1 METHODS
30
31 =head2 new( collation => $collation );
32
33 Creates a new relationship store for the given collation.
34
35 =cut
36
37 has 'collation' => (
38         is => 'ro',
39         isa => 'Text::Tradition::Collation',
40         required => 1,
41         weak_ref => 1,
42         );
43
44 has 'scopedrels' => (
45         is => 'ro',
46         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
47         default => sub { {} },
48         );
49
50 has 'graph' => (
51         is => 'ro',
52         isa => 'Graph',
53         default => sub { Graph->new( undirected => 1 ) },
54     handles => {
55         relationships => 'edges',
56         add_reading => 'add_vertex',
57         delete_reading => 'delete_vertex',
58     },
59         );
60         
61 =head2 get_relationship
62
63 Return the relationship object, if any, that exists between two readings.
64
65 =cut
66
67 sub get_relationship {
68         my( $self, @vector ) = @_;
69         my $relationship;
70         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
71                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
72         }
73         return $relationship;
74 }
75
76 sub _set_relationship {
77         my( $self, $relationship, @vector ) = @_;
78         $self->graph->add_edge( @vector );
79         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
80 }
81         
82 =head2 create
83
84 Create a new relationship with the given options and return it.
85 Warn and return undef if the relationship cannot be created.
86
87 =cut
88
89 sub create {
90         my( $self, $options ) = @_;
91         # Check to see if a relationship exists between the two given readings
92         my $source = delete $options->{'orig_a'};
93         my $target = delete $options->{'orig_b'};
94         my $rel = $self->get_relationship( $source, $target );
95         if( $rel ) {
96                 if( $rel->type ne $options->{'type'} ) {
97                         warn "Another relationship of type " . $rel->type 
98                                 . " already exists between $source and $target";
99                         return;
100                 } else {
101                         return $rel;
102                 }
103         }
104         
105         # Check to see if a nonlocal relationship is defined for the two readings
106         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
107                 $options->{'reading_b'} );
108         if( $rel && $rel->type eq $options->{'type'} ) {
109                 return $rel;
110         } elsif( $rel ) {
111                 warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} );
112                 return;
113         } else {
114                 $rel = Text::Tradition::Collation::Relationship->new( $options );
115                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
116                 return $rel;
117         }
118 }
119
120 =head2 add_scoped_relationship( $rel )
121
122 Keep track of relationships defined between specific readings that are scoped
123 non-locally.  Key on whichever reading occurs first alphabetically.
124
125 =cut
126
127 sub add_scoped_relationship {
128         my( $self, $rel ) = @_;
129         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
130         if( $r ) {
131                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
132                         $r->type, $rel->reading_a, $rel->reading_b );
133                 return;
134         }
135         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
136 }
137
138 =head2 scoped_relationship( $reading_a, $reading_b )
139
140 Returns the general (document-level or global) relationship that has been defined 
141 between the two reading strings. Returns undef if there is no general relationship.
142
143 =cut
144
145 sub scoped_relationship {
146         my( $self, $rdga, $rdgb ) = @_;
147         my( $first, $second ) = sort( $rdga, $rdgb );
148         if( exists $self->scopedrels->{$first}->{$second} ) {
149                 return $self->scopedrels->{$first}->{$second};
150         } else {
151                 return undef;
152         }
153 }
154
155 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
156
157 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
158 for the possible options) between the readings given in $source and $target.  Sets
159 up a scoped relationship between $sourcetext and $targettext if the relationship is
160 scoped non-locally.
161
162 Returns a status boolean and a list of all reading pairs connected by the call to
163 add_relationship.
164
165 =cut
166
167 sub add_relationship {
168         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
169
170         # Check the options
171         $options->{'scope'} = 'local' unless $options->{'scope'};
172         
173         my( $is_valid, $reason ) = 
174                 $self->relationship_valid( $source, $target, $options->{'type'} );
175     unless( $is_valid ) {
176         return ( undef, $reason );
177     }
178     
179     # Try to create the relationship object.
180     $options->{'reading_a'} = $source_rdg->text;
181     $options->{'reading_b'} = $target_rdg->text;
182     $options->{'orig_a'} = $source;
183     $options->{'orig_b'} = $target;
184     my $relationship = $self->create( $options );
185         return( undef, "Relationship creation failed" ) unless $relationship;
186
187         # Find all the pairs for which we need to set the relationship.
188         my @vectors = ( [ $source, $target ] ); 
189     if( $relationship->colocated && $relationship->nonlocal ) {
190         my $c = $self->collation;
191         # Set the same relationship everywhere we can, throughout the graph.
192         my @identical_readings = grep { $_->text eq $relationship->reading_a }
193                 $c->readings;
194         foreach my $ir ( @identical_readings ) {
195                 next if $ir->id eq $source;
196                 # Check to see if there is a target reading with the same text at
197                 # the same rank.
198                 my @itarget = grep 
199                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
200                         $c->readings;
201                 if( @itarget ) {
202                         # We found a hit.
203                         warn "More than one reading with text " . $target_rdg->text
204                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
205                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
206                 }
207         }       
208     }
209     
210     # Now set the relationship(s).
211     my @pairs_set;
212     foreach my $v ( @vectors ) {
213                 my $rel = $self->get_relationship( @$v );
214         if( $rel ) {
215                 my $warning = $rel->nonlocal
216                         ? "Found conflicting relationship at @$v"
217                         : "Not overriding local relationship set at @$v";
218                 warn $warning;
219                 next;
220         }
221         $self->_set_relationship( $relationship, @$v );
222         push( @pairs_set, $v );
223     }
224     
225     return( 1, @pairs_set );
226 }
227
228 =head2 relationship_valid( $source, $target, $type )
229
230 Checks whether a relationship of type $type may exist between the readings given
231 in $source and $target.  Returns a tuple of ( status, message ) where status is
232 a yes/no boolean and, if the answer is no, message gives the reason why.
233
234 =cut
235
236 sub relationship_valid {
237     my( $self, $source, $target, $rel ) = @_;
238     my $c = $self->collation;
239     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
240                 # Check that the two readings do (for a repetition) or do not (for
241                 # a transposition) appear in the same witness.
242                 my %seen_wits;
243                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
244                 foreach my $w ( $c->reading_witnesses( $target ) ) {
245                         if( $seen_wits{$w} ) {
246                                 return ( 0, "Readings both occur in witness $w" ) 
247                                         if $rel eq 'transposition';
248                                 return ( 1, "ok" ) if $rel eq 'repetition';
249                 }
250                 return $rel eq 'transposition' ? ( 1, "ok" )
251                         : ( 0, "Readings occur only in distinct witnesses" );
252                 }
253         } else {
254                 # Check that linking the source and target in a relationship won't lead
255                 # to a path loop for any witness.  First make a lookup table of all the
256                 # readings related to either the source or the target.
257                 my @proposed_related = ( $source, $target );
258                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
259                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
260                 my %pr_ids;
261                 map { $pr_ids{ $_ } = 1 } @proposed_related;
262         
263                 # None of these proposed related readings should have a neighbor that
264                 # is also in proposed_related.
265                 foreach my $pr ( keys %pr_ids ) {
266                         foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
267                                 return( 0, "Would relate neighboring readings $pr and $neighbor" )
268                                         if exists $pr_ids{$neighbor};
269                         }
270                 }               
271                 return ( 1, "ok" );
272         }
273 }
274
275 =head2 related_readings( $reading, $colocated_only )
276
277 Returns a list of readings that are connected via relationship links to $reading.
278 If $colocated_only is true, restricts the list to those readings that are in the
279 same logical location (and therefore have the same rank in the collation graph.)
280
281 =cut
282
283 sub related_readings {
284         my( $self, $reading, $colocated ) = @_;
285         my $return_object;
286         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
287                 $reading = $reading->id;
288                 $return_object = 1;
289         }
290         my @answer;
291         if( $colocated ) {
292                 my %found = ( $reading => 1 );
293                 my $check = [ $reading ];
294                 my $iter = 0;
295                 while( @$check ) {
296                         my $more = [];
297                         foreach my $r ( @$check ) {
298                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
299                                         if( $self->get_relationship( $r, $nr )->colocated ) {
300                                                 push( @$more, $nr ) unless exists $found{$nr};
301                                                 $found{$nr} = 1;
302                                         }
303                                 }
304                         }
305                         $check = $more;
306                 }
307                 @answer = keys %found;
308         } else {
309                 @answer = $self->graph->all_reachable( $reading );
310         }
311         if( $return_object ) {
312                 my $c = $self->collation;
313                 return map { $c->reading( $_ ) } @answer;
314         } else {
315                 return @answer;
316         }
317 }
318
319 =head2 merge_readings( $kept, $deleted );
320
321 Makes a best-effort merge of the relationship links between the given readings, and
322 stops tracking the to-be-deleted reading.
323
324 =cut
325
326 sub merge_readings {
327         my( $self, $kept, $deleted, $combined ) = @_;
328         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
329                 # Get the pair of kept / rel
330                 my @vector = ( $kept );
331                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
332                 next if $vector[0] eq $vector[1]; # Don't add a self loop
333                 
334                 # If kept changes its text, drop the relationship.
335                 next if $combined;
336                         
337                 # If kept / rel already has a relationship, warn and keep the old
338                 my $rel = $self->get_relationship( @vector );
339                 if( $rel ) {
340                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
341                         next;
342                 }
343                 
344                 # Otherwise, adopt the relationship that would be deleted.
345                 $rel = $self->get_relationship( @$edge );
346                 $self->_set_relationship( $rel, @vector );
347         }
348         $self->delete_reading( $deleted );
349 }
350
351 sub as_graphml { 
352         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
353         
354     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
355         $rgraph->setAttribute( 'edgedefault', 'directed' );
356     $rgraph->setAttribute( 'id', 'relationships', );
357     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
358     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
359     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
360     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
361     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
362     
363     # Add the vertices according to their XML IDs
364     my %rdg_lookup = ( reverse %$node_hash );
365     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
366         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
367         $n_el->setAttribute( 'id', $n );
368         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
369     }
370     
371     # Add the relationship edges, with their object information
372     my $edge_ctr = 0;
373     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
374         # Add an edge and fill in its relationship info.
375                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
376                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
377                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
378                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
379
380                 my $rel_obj = $self->get_relationship( @$e );
381                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
382                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
383                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
384                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
385                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
386                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
387         }
388 }
389
390 sub _by_xmlid {
391         my $tmp_a = $a;
392         my $tmp_b = $b;
393         $tmp_a =~ s/\D//g;
394         $tmp_b =~ s/\D//g;
395         return $tmp_a <=> $tmp_b;
396 }
397
398 sub _add_graphml_data {
399     my( $el, $key, $value ) = @_;
400     return unless defined $value;
401     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
402     $data_el->setAttribute( 'key', $key );
403     $data_el->appendText( $value );
404 }
405
406 no Moose;
407 __PACKAGE__->meta->make_immutable;
408
409 1;