oops, is not an object
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Collation::Relationship;
6
7 use Moose;
8
9 =head1 NAME
10
11 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
12 between readings in a given collation
13     
14 =head1 DESCRIPTION
15
16 Text::Tradition is a library for representation and analysis of collated
17 texts, particularly medieval ones.  The RelationshipStore is an internal object
18 of the collation, to keep track of the defined relationships (both specific and
19 general) between readings.
20
21 =head1 METHODS
22
23 =head2 new( collation => $collation );
24
25 Creates a new relationship store for the given collation.
26
27 =cut
28
29 has 'collation' => (
30         is => 'ro',
31         isa => 'Text::Tradition::Collation',
32         required => 1,
33         weak_ref => 1,
34         );
35
36 has 'scopedrels' => (
37         is => 'ro',
38         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
39         default => sub { {} },
40         );
41
42 has 'graph' => (
43         is => 'ro',
44         isa => 'Graph',
45         default => sub { Graph->new( undirected => 1 ) },
46     handles => {
47         relationships => 'edges',
48         add_reading => 'add_vertex',
49         delete_reading => 'delete_vertex',
50     },
51         );
52         
53 =head2 create
54
55 Create a new relationship with the given options and return it.
56 Warn and return undef if the relationship cannot be created.
57
58 =cut
59
60 sub create {
61         my( $self, $options ) = @_;
62         # Check to see if a relationship exists between the two given readings
63         my $source = delete $options->{'orig_a'};
64         my $target = delete $options->{'orig_b'};
65         my $rel;
66         if( $self->graph->has_edge( $source, $target ) ) {
67                 $rel = $self->graph->get_edge_attribute( $source, $target, 'object' );
68                 if( $rel->type ne $options->{'type'} ) {
69                         warn "Another relationship of type " . $rel->type 
70                                 . " already exists between $source and $target";
71                         return;
72                 } else {
73                         return $rel;
74                 }
75         }
76         
77         # Check to see if a nonlocal relationship is defined for the two readings
78         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
79                 $options->{'reading_b'} );
80         if( $rel && $rel->type eq $options->{'type'} ) {
81                 return $rel;
82         } elsif( $rel ) {
83                 warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} );
84                 return;
85         } else {
86                 $rel = Text::Tradition::Collation::Relationship->new( $options );
87                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
88                 return $rel;
89         }
90 }
91
92 =head2 add_scoped_relationship( $rel )
93
94 Keep track of relationships defined between specific readings that are scoped
95 non-locally.  Key on whichever reading occurs first alphabetically.
96
97 =cut
98
99 sub add_scoped_relationship {
100         my( $self, $rel ) = @_;
101         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
102         if( $r ) {
103                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
104                         $r->type, $rel->reading_a, $rel->reading_b );
105                 return;
106         }
107         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
108 }
109
110 =head2 scoped_relationship( $reading_a, $reading_b )
111
112 Returns the general (document-level or global) relationship that has been defined 
113 between the two reading strings. Returns undef if there is no general relationship.
114
115 =cut
116
117 sub scoped_relationship {
118         my( $self, $rdga, $rdgb ) = @_;
119         my( $first, $second ) = sort( $rdga, $rdgb );
120         if( exists $self->scopedrels->{$first}->{$second} ) {
121                 return $self->scopedrels->{$first}->{$second};
122         } else {
123                 return undef;
124         }
125 }
126
127 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
128
129 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
130 for the possible options) between the readings given in $source and $target.  Sets
131 up a scoped relationship between $sourcetext and $targettext if the relationship is
132 scoped non-locally.
133
134 Returns a status boolean and a list of all reading pairs connected by the call to
135 add_relationship.
136
137 =cut
138
139 sub add_relationship {
140         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
141
142         # Check the options
143         $options->{'scope'} = 'local' unless $options->{'scope'};
144         
145         my( $is_valid, $reason ) = 
146                 $self->relationship_valid( $source, $target, $options->{'type'} );
147     unless( $is_valid ) {
148         return ( undef, $reason );
149     }
150     
151     # Try to create the relationship object.
152     $options->{'reading_a'} = $source_rdg->text;
153     $options->{'reading_b'} = $target_rdg->text;
154     $options->{'orig_a'} = $source;
155     $options->{'orig_b'} = $target;
156     my $relationship = $self->create( $options );
157         return( undef, "Relationship creation failed" ) unless $relationship;
158
159         # Find all the pairs for which we need to set the relationship.
160         my @vectors = ( [ $source, $target ] ); 
161     if( $relationship->colocated && $relationship->nonlocal ) {
162         my $c = $self->collation;
163         # Set the same relationship everywhere we can, throughout the graph.
164         my @identical_readings = grep { $_->text eq $relationship->reading_a }
165                 $c->readings;
166         foreach my $ir ( @identical_readings ) {
167                 next if $ir->id eq $source;
168                 # Check to see if there is a target reading with the same text at
169                 # the same rank.
170                 my @itarget = grep 
171                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
172                         $c->readings;
173                 if( @itarget ) {
174                         # We found a hit.
175                         warn "More than one reading with text " . $target_rdg->text
176                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
177                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
178                 }
179         }       
180     }
181     
182     # Now set the relationship(s).
183     my @pairs_set;
184     foreach my $v ( @vectors ) {
185         if( $self->graph->has_edge( @$v ) ) {
186                 # Is it locally scoped?
187                 my $rel = $self->graph->get_edge_attribute( @$v, 'object' );
188                 if( $rel->nonlocal ) {
189                         # TODO I think we should not be able to get here.
190                         warn "Found conflicting relationship at @$v";
191                 } else {
192                         warn "Not overriding local relationship set at @$v";
193                         next;
194                 }
195         }
196         $self->graph->add_edge( @$v );
197         $self->graph->set_edge_attribute( @$v, 'object', $relationship );
198         push( @pairs_set, $v );
199     }
200     
201     return( 1, @pairs_set );
202 }
203
204 =head2 relationship_valid( $source, $target, $type )
205
206 Checks whether a relationship of type $type may exist between the readings given
207 in $source and $target.  Returns a tuple of ( status, message ) where status is
208 a yes/no boolean and, if the answer is no, message gives the reason why.
209
210 =cut
211
212 sub relationship_valid {
213     my( $self, $source, $target, $rel ) = @_;
214     my $c = $self->collation;
215     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
216                 # Check that the two readings do (for a repetition) or do not (for
217                 # a transposition) appear in the same witness.
218                 my %seen_wits;
219                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
220                 foreach my $w ( $c->reading_witnesses( $target ) ) {
221                         if( $seen_wits{$w} ) {
222                                 return ( 0, "Readings both occur in witness $w" ) 
223                                         if $rel eq 'transposition';
224                                 return ( 1, "ok" ) if $rel eq 'repetition';
225                 }
226                 return $rel eq 'transposition' ? ( 1, "ok" )
227                         : ( 0, "Readings occur only in distinct witnesses" );
228                 }
229         } else {
230                 # Check that linking the source and target in a relationship won't lead
231                 # to a path loop for any witness.  First make a lookup table of all the
232                 # readings related to either the source or the target.
233                 my @proposed_related = ( $source, $target );
234                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
235                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
236                 my %pr_ids;
237                 map { $pr_ids{ $_ } = 1 } @proposed_related;
238         
239                 # None of these proposed related readings should have a neighbor that
240                 # is also in proposed_related.
241                 foreach my $pr ( keys %pr_ids ) {
242                         foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
243                                 return( 0, "Would relate neighboring readings $pr and $neighbor" )
244                                         if exists $pr_ids{$neighbor};
245                         }
246                 }               
247                 return ( 1, "ok" );
248         }
249 }
250
251 =head2 related_readings( $reading, $colocated_only )
252
253 Returns a list of readings that are connected via relationship links to $reading.
254 If $colocated_only is true, restricts the list to those readings that are in the
255 same logical location (and therefore have the same rank in the collation graph.)
256
257 =cut
258
259 sub related_readings {
260         my( $self, $reading, $colocated ) = @_;
261         my $return_object;
262         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
263                 $reading = $reading->id;
264                 $return_object = 1;
265         }
266         my @answer;
267         if( $colocated ) {
268                 my %found = ( $reading => 1 );
269                 my $check = [ $reading ];
270                 my $iter = 0;
271                 while( @$check ) {
272                         my $more = [];
273                         foreach my $r ( @$check ) {
274                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
275                                         if( $self->graph->get_edge_attribute( $r, $nr, 'object' )->colocated ) {
276                                                 push( @$more, $nr ) unless exists $found{$nr};
277                                                 $found{$nr} = 1;
278                                         }
279                                 }
280                         }
281                         $check = $more;
282                 }
283                 @answer = keys %found;
284         } else {
285                 @answer = $self->graph->all_reachable( $reading );
286         }
287         if( $return_object ) {
288                 my $c = $self->collation;
289                 return map { $c->reading( $_ ) } @answer;
290         } else {
291                 return @answer;
292         }
293 }
294
295 =head2 merge_readings( $kept, $deleted );
296
297 Makes a best-effort merge of the relationship links between the given readings, and
298 stops tracking the to-be-deleted reading.
299
300 =cut
301
302 sub merge_readings {
303         my( $self, $kept, $deleted, $combined ) = @_;
304         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
305                 # Get the pair of kept / rel
306                 my @vector = ( $kept );
307                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
308                 next if $vector[0] eq $vector[1]; # Don't add a self loop
309                 
310                 # If kept changes its text, drop the relationship.
311                 next if $combined;
312                         
313                 # If kept / rel already has a relationship, warn and keep the old
314                 if( $self->graph->has_edge( @vector ) ) {
315                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
316                         next;
317                 }
318                 
319                 # Otherwise, adopt the relationship that would be deleted.
320                 my $rel = $self->graph->get_edge_attribute( @$edge, 'object' );
321                 $self->graph->add_edge( @vector );
322                 $self->graph->set_edge_attribute( @vector, 'object', $rel );
323         }
324         $self->delete_reading( $deleted );
325 }
326
327 sub as_graphml { 
328         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
329         
330     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
331         $rgraph->setAttribute( 'edgedefault', 'directed' );
332     $rgraph->setAttribute( 'id', 'relationships', );
333     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
334     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
335     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
336     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
337     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
338     
339     # Add the vertices according to their XML IDs
340     my %rdg_lookup = ( reverse %$node_hash );
341     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
342         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
343         $n_el->setAttribute( 'id', $n );
344         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
345     }
346     
347     # Add the relationship edges, with their object information
348     my $edge_ctr = 0;
349     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
350         # Add an edge and fill in its relationship info.
351                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
352                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
353                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
354                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
355
356                 my $rel_obj = $self->graph->get_edge_attribute( @$e, 'object' );
357                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
358                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
359                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
360                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
361                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
362                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
363         }
364 }
365
366 sub _by_xmlid {
367         my $tmp_a = $a;
368         my $tmp_b = $b;
369         $tmp_a =~ s/\D//g;
370         $tmp_b =~ s/\D//g;
371         return $tmp_a <=> $tmp_b;
372 }
373
374 sub _add_graphml_data {
375     my( $el, $key, $value ) = @_;
376     return unless defined $value;
377     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
378     $data_el->setAttribute( 'key', $key );
379     $data_el->appendText( $value );
380 }
381
382 no Moose;
383 __PACKAGE__->meta->make_immutable;
384
385 1;