add exceptions to the rest of the Tradition library
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
7
8use Moose;
9
10=head1 NAME
11
2626f709 12Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
13between readings in a given collation
22222af9 14
15=head1 DESCRIPTION
16
17Text::Tradition is a library for representation and analysis of collated
18texts, particularly medieval ones. The RelationshipStore is an internal object
19of the collation, to keep track of the defined relationships (both specific and
20general) between readings.
21
3ae5e2ad 22=begin testing
23
24use Text::Tradition;
25
26use_ok( 'Text::Tradition::Collation::RelationshipStore' );
27
28=end testing
29
22222af9 30=head1 METHODS
31
32=head2 new( collation => $collation );
33
34Creates a new relationship store for the given collation.
35
36=cut
37
38has 'collation' => (
39 is => 'ro',
40 isa => 'Text::Tradition::Collation',
41 required => 1,
42 weak_ref => 1,
43 );
44
45has 'scopedrels' => (
46 is => 'ro',
47 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
48 default => sub { {} },
49 );
50
51has 'graph' => (
52 is => 'ro',
53 isa => 'Graph',
54 default => sub { Graph->new( undirected => 1 ) },
55 handles => {
56 relationships => 'edges',
57 add_reading => 'add_vertex',
58 delete_reading => 'delete_vertex',
59 },
60 );
61
3ae5e2ad 62=head2 get_relationship
63
64Return the relationship object, if any, that exists between two readings.
65
66=cut
67
68sub get_relationship {
69 my( $self, @vector ) = @_;
70 my $relationship;
71 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
72 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
73 }
74 return $relationship;
75}
76
77sub _set_relationship {
78 my( $self, $relationship, @vector ) = @_;
79 $self->graph->add_edge( @vector );
80 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
81}
82
22222af9 83=head2 create
84
85Create a new relationship with the given options and return it.
86Warn and return undef if the relationship cannot be created.
87
88=cut
89
90sub create {
91 my( $self, $options ) = @_;
92 # Check to see if a relationship exists between the two given readings
93 my $source = delete $options->{'orig_a'};
94 my $target = delete $options->{'orig_b'};
3ae5e2ad 95 my $rel = $self->get_relationship( $source, $target );
96 if( $rel ) {
a7037072 97 if( $rel->type ne $options->{'type'} ) {
63778331 98 throw( "Another relationship of type " . $rel->type
99 . " already exists between $source and $target" );
22222af9 100 } else {
101 return $rel;
102 }
103 }
104
105 # Check to see if a nonlocal relationship is defined for the two readings
106 $rel = $self->scoped_relationship( $options->{'reading_a'},
107 $options->{'reading_b'} );
108 if( $rel && $rel->type eq $options->{'type'} ) {
109 return $rel;
110 } elsif( $rel ) {
63778331 111 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 112 } else {
113 $rel = Text::Tradition::Collation::Relationship->new( $options );
114 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
115 return $rel;
116 }
117}
118
119=head2 add_scoped_relationship( $rel )
120
121Keep track of relationships defined between specific readings that are scoped
122non-locally. Key on whichever reading occurs first alphabetically.
123
124=cut
125
126sub add_scoped_relationship {
127 my( $self, $rel ) = @_;
128 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
129 if( $r ) {
130 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
131 $r->type, $rel->reading_a, $rel->reading_b );
132 return;
133 }
134 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
135}
136
137=head2 scoped_relationship( $reading_a, $reading_b )
138
139Returns the general (document-level or global) relationship that has been defined
140between the two reading strings. Returns undef if there is no general relationship.
141
142=cut
143
144sub scoped_relationship {
145 my( $self, $rdga, $rdgb ) = @_;
146 my( $first, $second ) = sort( $rdga, $rdgb );
147 if( exists $self->scopedrels->{$first}->{$second} ) {
148 return $self->scopedrels->{$first}->{$second};
149 } else {
150 return undef;
151 }
152}
153
154=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
155
156Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
157for the possible options) between the readings given in $source and $target. Sets
158up a scoped relationship between $sourcetext and $targettext if the relationship is
159scoped non-locally.
160
161Returns a status boolean and a list of all reading pairs connected by the call to
162add_relationship.
163
164=cut
165
166sub add_relationship {
167 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
168
169 # Check the options
170 $options->{'scope'} = 'local' unless $options->{'scope'};
171
172 my( $is_valid, $reason ) =
173 $self->relationship_valid( $source, $target, $options->{'type'} );
174 unless( $is_valid ) {
63778331 175 throw( "Invalid relationship: $reason" );
22222af9 176 }
177
178 # Try to create the relationship object.
179 $options->{'reading_a'} = $source_rdg->text;
180 $options->{'reading_b'} = $target_rdg->text;
181 $options->{'orig_a'} = $source;
182 $options->{'orig_b'} = $target;
63778331 183 my $relationship = $self->create( $options ); # Will throw on error
22222af9 184
185 # Find all the pairs for which we need to set the relationship.
186 my @vectors = ( [ $source, $target ] );
187 if( $relationship->colocated && $relationship->nonlocal ) {
188 my $c = $self->collation;
189 # Set the same relationship everywhere we can, throughout the graph.
190 my @identical_readings = grep { $_->text eq $relationship->reading_a }
191 $c->readings;
192 foreach my $ir ( @identical_readings ) {
cf6c01be 193 next if $ir->id eq $source;
22222af9 194 # Check to see if there is a target reading with the same text at
195 # the same rank.
196 my @itarget = grep
197 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
198 $c->readings;
199 if( @itarget ) {
200 # We found a hit.
201 warn "More than one reading with text " . $target_rdg->text
202 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 203 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 204 }
205 }
206 }
207
208 # Now set the relationship(s).
209 my @pairs_set;
210 foreach my $v ( @vectors ) {
3ae5e2ad 211 my $rel = $self->get_relationship( @$v );
212 if( $rel ) {
63778331 213 if( $rel->nonlocal ) {
214 throw( "Found conflicting relationship at @$v" );
215 } else {
216 warn "Not overriding local relationship set at @$v";
217 }
3ae5e2ad 218 next;
22222af9 219 }
3ae5e2ad 220 $self->_set_relationship( $relationship, @$v );
22222af9 221 push( @pairs_set, $v );
222 }
223
63778331 224 return @pairs_set;
22222af9 225}
226
227=head2 relationship_valid( $source, $target, $type )
228
229Checks whether a relationship of type $type may exist between the readings given
230in $source and $target. Returns a tuple of ( status, message ) where status is
231a yes/no boolean and, if the answer is no, message gives the reason why.
232
233=cut
234
235sub relationship_valid {
236 my( $self, $source, $target, $rel ) = @_;
237 my $c = $self->collation;
238 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
239 # Check that the two readings do (for a repetition) or do not (for
240 # a transposition) appear in the same witness.
241 my %seen_wits;
242 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
243 foreach my $w ( $c->reading_witnesses( $target ) ) {
244 if( $seen_wits{$w} ) {
245 return ( 0, "Readings both occur in witness $w" )
246 if $rel eq 'transposition';
247 return ( 1, "ok" ) if $rel eq 'repetition';
248 }
249 return $rel eq 'transposition' ? ( 1, "ok" )
250 : ( 0, "Readings occur only in distinct witnesses" );
251 }
252 } else {
253 # Check that linking the source and target in a relationship won't lead
254 # to a path loop for any witness. First make a lookup table of all the
255 # readings related to either the source or the target.
256 my @proposed_related = ( $source, $target );
257 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
258 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
259 my %pr_ids;
260 map { $pr_ids{ $_ } = 1 } @proposed_related;
261
262 # None of these proposed related readings should have a neighbor that
263 # is also in proposed_related.
264 foreach my $pr ( keys %pr_ids ) {
265 foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
266 return( 0, "Would relate neighboring readings $pr and $neighbor" )
267 if exists $pr_ids{$neighbor};
268 }
269 }
270 return ( 1, "ok" );
271 }
272}
273
274=head2 related_readings( $reading, $colocated_only )
275
276Returns a list of readings that are connected via relationship links to $reading.
277If $colocated_only is true, restricts the list to those readings that are in the
278same logical location (and therefore have the same rank in the collation graph.)
279
280=cut
281
282sub related_readings {
283 my( $self, $reading, $colocated ) = @_;
284 my $return_object;
285 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
286 $reading = $reading->id;
287 $return_object = 1;
288 }
c84275ff 289 my @answer;
22222af9 290 if( $colocated ) {
c84275ff 291 my %found = ( $reading => 1 );
292 my $check = [ $reading ];
293 my $iter = 0;
294 while( @$check ) {
c84275ff 295 my $more = [];
296 foreach my $r ( @$check ) {
297 foreach my $nr ( $self->graph->neighbors( $r ) ) {
3ae5e2ad 298 if( $self->get_relationship( $r, $nr )->colocated ) {
c84275ff 299 push( @$more, $nr ) unless exists $found{$nr};
300 $found{$nr} = 1;
301 }
302 }
303 }
304 $check = $more;
22222af9 305 }
c84275ff 306 @answer = keys %found;
307 } else {
308 @answer = $self->graph->all_reachable( $reading );
22222af9 309 }
310 if( $return_object ) {
311 my $c = $self->collation;
c84275ff 312 return map { $c->reading( $_ ) } @answer;
22222af9 313 } else {
c84275ff 314 return @answer;
22222af9 315 }
316}
317
318=head2 merge_readings( $kept, $deleted );
319
320Makes a best-effort merge of the relationship links between the given readings, and
321stops tracking the to-be-deleted reading.
322
323=cut
324
325sub merge_readings {
326 my( $self, $kept, $deleted, $combined ) = @_;
327 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
328 # Get the pair of kept / rel
329 my @vector = ( $kept );
330 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
331 next if $vector[0] eq $vector[1]; # Don't add a self loop
332
333 # If kept changes its text, drop the relationship.
334 next if $combined;
335
336 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 337 my $rel = $self->get_relationship( @vector );
338 if( $rel ) {
22222af9 339 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
340 next;
341 }
342
343 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 344 $rel = $self->get_relationship( @$edge );
345 $self->_set_relationship( $rel, @vector );
22222af9 346 }
347 $self->delete_reading( $deleted );
348}
349
c84275ff 350sub as_graphml {
2626f709 351 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 352
353 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
354 $rgraph->setAttribute( 'edgedefault', 'directed' );
355 $rgraph->setAttribute( 'id', 'relationships', );
356 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
357 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
358 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
359 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
360 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
361
362 # Add the vertices according to their XML IDs
2626f709 363 my %rdg_lookup = ( reverse %$node_hash );
364 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 365 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
366 $n_el->setAttribute( 'id', $n );
2626f709 367 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 368 }
369
370 # Add the relationship edges, with their object information
371 my $edge_ctr = 0;
372 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
373 # Add an edge and fill in its relationship info.
374 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
375 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
376 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
377 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
378
3ae5e2ad 379 my $rel_obj = $self->get_relationship( @$e );
c84275ff 380 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
381 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
382 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
383 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
384 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
385 $rel_obj->non_independent ) if $rel_obj->nonind_set;
386 }
387}
388
389sub _by_xmlid {
2626f709 390 my $tmp_a = $a;
391 my $tmp_b = $b;
392 $tmp_a =~ s/\D//g;
393 $tmp_b =~ s/\D//g;
394 return $tmp_a <=> $tmp_b;
c84275ff 395}
396
397sub _add_graphml_data {
398 my( $el, $key, $value ) = @_;
399 return unless defined $value;
400 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
401 $data_el->setAttribute( 'key', $key );
402 $data_el->appendText( $value );
83d5ac3a 403}
404
63778331 405sub throw {
406 Text::Tradition::Error->throw(
407 'ident' => 'Relationship error',
408 'message' => $_[0],
409 );
410}
411
22222af9 412no Moose;
413__PACKAGE__->meta->make_immutable;
414
4151;