detect and mark common readings
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7
8 use Moose;
9
10 =head1 NAME
11
12 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
13 between readings in a given collation
14     
15 =head1 DESCRIPTION
16
17 Text::Tradition is a library for representation and analysis of collated
18 texts, particularly medieval ones.  The RelationshipStore is an internal object
19 of the collation, to keep track of the defined relationships (both specific and
20 general) between readings.
21
22 =begin testing
23
24 use Text::Tradition;
25
26 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
27
28 =end testing
29
30 =head1 METHODS
31
32 =head2 new( collation => $collation );
33
34 Creates a new relationship store for the given collation.
35
36 =cut
37
38 has 'collation' => (
39         is => 'ro',
40         isa => 'Text::Tradition::Collation',
41         required => 1,
42         weak_ref => 1,
43         );
44
45 has 'scopedrels' => (
46         is => 'ro',
47         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
48         default => sub { {} },
49         );
50
51 has 'graph' => (
52         is => 'ro',
53         isa => 'Graph',
54         default => sub { Graph->new( undirected => 1 ) },
55     handles => {
56         relationships => 'edges',
57         add_reading => 'add_vertex',
58         delete_reading => 'delete_vertex',
59     },
60         );
61         
62 =head2 get_relationship
63
64 Return the relationship object, if any, that exists between two readings.
65
66 =cut
67
68 sub get_relationship {
69         my( $self, @vector ) = @_;
70         my $relationship;
71         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
72                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
73         }
74         return $relationship;
75 }
76
77 sub _set_relationship {
78         my( $self, $relationship, @vector ) = @_;
79         $self->graph->add_edge( @vector );
80         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
81 }
82         
83 =head2 create
84
85 Create a new relationship with the given options and return it.
86 Warn and return undef if the relationship cannot be created.
87
88 =cut
89
90 sub create {
91         my( $self, $options ) = @_;
92         # Check to see if a relationship exists between the two given readings
93         my $source = delete $options->{'orig_a'};
94         my $target = delete $options->{'orig_b'};
95         my $rel = $self->get_relationship( $source, $target );
96         if( $rel ) {
97                 if( $rel->type ne $options->{'type'} ) {
98                         throw( "Another relationship of type " . $rel->type 
99                                 . " already exists between $source and $target" );
100                 } else {
101                         return $rel;
102                 }
103         }
104         
105         # Check to see if a nonlocal relationship is defined for the two readings
106         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
107                 $options->{'reading_b'} );
108         if( $rel && $rel->type eq $options->{'type'} ) {
109                 return $rel;
110         } elsif( $rel ) {
111                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
112         } else {
113                 $rel = Text::Tradition::Collation::Relationship->new( $options );
114                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
115                 return $rel;
116         }
117 }
118
119 =head2 add_scoped_relationship( $rel )
120
121 Keep track of relationships defined between specific readings that are scoped
122 non-locally.  Key on whichever reading occurs first alphabetically.
123
124 =cut
125
126 sub add_scoped_relationship {
127         my( $self, $rel ) = @_;
128         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
129         if( $r ) {
130                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
131                         $r->type, $rel->reading_a, $rel->reading_b );
132                 return;
133         }
134         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
135 }
136
137 =head2 scoped_relationship( $reading_a, $reading_b )
138
139 Returns the general (document-level or global) relationship that has been defined 
140 between the two reading strings. Returns undef if there is no general relationship.
141
142 =cut
143
144 sub scoped_relationship {
145         my( $self, $rdga, $rdgb ) = @_;
146         my( $first, $second ) = sort( $rdga, $rdgb );
147         if( exists $self->scopedrels->{$first}->{$second} ) {
148                 return $self->scopedrels->{$first}->{$second};
149         } else {
150                 return undef;
151         }
152 }
153
154 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
155
156 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
157 for the possible options) between the readings given in $source and $target.  Sets
158 up a scoped relationship between $sourcetext and $targettext if the relationship is
159 scoped non-locally.
160
161 Returns a status boolean and a list of all reading pairs connected by the call to
162 add_relationship.
163
164 =cut
165
166 sub add_relationship {
167         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
168
169         # Check the options
170         $options->{'scope'} = 'local' unless $options->{'scope'};
171         
172         my( $is_valid, $reason ) = 
173                 $self->relationship_valid( $source, $target, $options->{'type'} );
174     unless( $is_valid ) {
175         throw( "Invalid relationship: $reason" );
176     }
177     
178     # Try to create the relationship object.
179     $options->{'reading_a'} = $source_rdg->text;
180     $options->{'reading_b'} = $target_rdg->text;
181     $options->{'orig_a'} = $source;
182     $options->{'orig_b'} = $target;
183     my $relationship = $self->create( $options );  # Will throw on error
184
185         # Find all the pairs for which we need to set the relationship.
186         my @vectors = ( [ $source, $target ] ); 
187     if( $relationship->colocated && $relationship->nonlocal ) {
188         my $c = $self->collation;
189         # Set the same relationship everywhere we can, throughout the graph.
190         my @identical_readings = grep { $_->text eq $relationship->reading_a }
191                 $c->readings;
192         foreach my $ir ( @identical_readings ) {
193                 next if $ir->id eq $source;
194                 # Check to see if there is a target reading with the same text at
195                 # the same rank.
196                 my @itarget = grep 
197                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
198                         $c->readings;
199                 if( @itarget ) {
200                         # We found a hit.
201                         warn "More than one reading with text " . $target_rdg->text
202                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
203                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
204                 }
205         }       
206     }
207     
208     # Now set the relationship(s).
209     my @pairs_set;
210     foreach my $v ( @vectors ) {
211                 my $rel = $self->get_relationship( @$v );
212         if( $rel ) {
213                 if( $rel->nonlocal ) {
214                         throw( "Found conflicting relationship at @$v" );
215                 } else {
216                         warn "Not overriding local relationship set at @$v";
217                 }
218                 next;
219         }
220         $self->_set_relationship( $relationship, @$v );
221         push( @pairs_set, $v );
222     }
223     
224     return @pairs_set;
225 }
226
227 =head2 relationship_valid( $source, $target, $type )
228
229 Checks whether a relationship of type $type may exist between the readings given
230 in $source and $target.  Returns a tuple of ( status, message ) where status is
231 a yes/no boolean and, if the answer is no, message gives the reason why.
232
233 =cut
234
235 sub relationship_valid {
236     my( $self, $source, $target, $rel ) = @_;
237     my $c = $self->collation;
238     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
239                 # Check that the two readings do (for a repetition) or do not (for
240                 # a transposition) appear in the same witness.
241                 my %seen_wits;
242                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
243                 foreach my $w ( $c->reading_witnesses( $target ) ) {
244                         if( $seen_wits{$w} ) {
245                                 return ( 0, "Readings both occur in witness $w" ) 
246                                         if $rel eq 'transposition';
247                                 return ( 1, "ok" ) if $rel eq 'repetition';
248                 }
249                 return $rel eq 'transposition' ? ( 1, "ok" )
250                         : ( 0, "Readings occur only in distinct witnesses" );
251                 }
252         } else {
253                 # Check that linking the source and target in a relationship won't lead
254                 # to a path loop for any witness.  First make a lookup table of all the
255                 # readings related to either the source or the target.
256                 my @proposed_related = ( $source, $target );
257                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
258                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
259                 my %pr_ids;
260                 map { $pr_ids{ $_ } = 1 } @proposed_related;
261         
262                 # None of these proposed related readings should have a neighbor that
263                 # is also in proposed_related.
264                 foreach my $pr ( keys %pr_ids ) {
265                         foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
266                                 return( 0, "Would relate neighboring readings $pr and $neighbor" )
267                                         if exists $pr_ids{$neighbor};
268                         }
269                 }               
270                 return ( 1, "ok" );
271         }
272 }
273
274 =head2 related_readings( $reading, $colocated_only )
275
276 Returns a list of readings that are connected via relationship links to $reading.
277 If $colocated_only is true, restricts the list to those readings that are in the
278 same logical location (and therefore have the same rank in the collation graph.)
279
280 =cut
281
282 sub related_readings {
283         my( $self, $reading, $colocated ) = @_;
284         my $return_object;
285         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286                 $reading = $reading->id;
287                 $return_object = 1;
288         }
289         my @answer;
290         if( $colocated ) {
291                 my %found = ( $reading => 1 );
292                 my $check = [ $reading ];
293                 my $iter = 0;
294                 while( @$check ) {
295                         my $more = [];
296                         foreach my $r ( @$check ) {
297                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
298                                         if( $self->get_relationship( $r, $nr )->colocated ) {
299                                                 push( @$more, $nr ) unless exists $found{$nr};
300                                                 $found{$nr} = 1;
301                                         }
302                                 }
303                         }
304                         $check = $more;
305                 }
306                 @answer = keys %found;
307         } else {
308                 @answer = $self->graph->all_reachable( $reading );
309         }
310         if( $return_object ) {
311                 my $c = $self->collation;
312                 return map { $c->reading( $_ ) } @answer;
313         } else {
314                 return @answer;
315         }
316 }
317
318 =head2 merge_readings( $kept, $deleted );
319
320 Makes a best-effort merge of the relationship links between the given readings, and
321 stops tracking the to-be-deleted reading.
322
323 =cut
324
325 sub merge_readings {
326         my( $self, $kept, $deleted, $combined ) = @_;
327         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
328                 # Get the pair of kept / rel
329                 my @vector = ( $kept );
330                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
331                 next if $vector[0] eq $vector[1]; # Don't add a self loop
332                 
333                 # If kept changes its text, drop the relationship.
334                 next if $combined;
335                         
336                 # If kept / rel already has a relationship, warn and keep the old
337                 my $rel = $self->get_relationship( @vector );
338                 if( $rel ) {
339                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
340                         next;
341                 }
342                 
343                 # Otherwise, adopt the relationship that would be deleted.
344                 $rel = $self->get_relationship( @$edge );
345                 $self->_set_relationship( $rel, @vector );
346         }
347         $self->delete_reading( $deleted );
348 }
349
350 sub as_graphml { 
351         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
352         
353     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
354         $rgraph->setAttribute( 'edgedefault', 'directed' );
355     $rgraph->setAttribute( 'id', 'relationships', );
356     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
357     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
358     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
359     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
360     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
361     
362     # Add the vertices according to their XML IDs
363     my %rdg_lookup = ( reverse %$node_hash );
364     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
365         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
366         $n_el->setAttribute( 'id', $n );
367         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
368     }
369     
370     # Add the relationship edges, with their object information
371     my $edge_ctr = 0;
372     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
373         # Add an edge and fill in its relationship info.
374                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
375                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
376                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
377                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
378
379                 my $rel_obj = $self->get_relationship( @$e );
380                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
381                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
382                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
383                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
384                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
385                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
386         }
387 }
388
389 sub _by_xmlid {
390         my $tmp_a = $a;
391         my $tmp_b = $b;
392         $tmp_a =~ s/\D//g;
393         $tmp_b =~ s/\D//g;
394         return $tmp_a <=> $tmp_b;
395 }
396
397 sub _add_graphml_data {
398     my( $el, $key, $value ) = @_;
399     return unless defined $value;
400     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
401     $data_el->setAttribute( 'key', $key );
402     $data_el->appendText( $value );
403 }
404
405 sub throw {
406         Text::Tradition::Error->throw( 
407                 'ident' => 'Relationship error',
408                 'message' => $_[0],
409                 );
410 }
411
412 no Moose;
413 __PACKAGE__->meta->make_immutable;
414
415 1;