Commit | Line | Data |
22222af9 |
1 | package Text::Tradition::Collation::RelationshipStore; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Text::Tradition::Collation::Relationship; |
6 | |
7 | use Moose; |
8 | |
9 | =head1 NAME |
10 | |
2626f709 |
11 | Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships |
12 | between readings in a given collation |
22222af9 |
13 | |
14 | =head1 DESCRIPTION |
15 | |
16 | Text::Tradition is a library for representation and analysis of collated |
17 | texts, particularly medieval ones. The RelationshipStore is an internal object |
18 | of the collation, to keep track of the defined relationships (both specific and |
19 | general) between readings. |
20 | |
3ae5e2ad |
21 | =begin testing |
22 | |
23 | use Text::Tradition; |
24 | |
25 | use_ok( 'Text::Tradition::Collation::RelationshipStore' ); |
26 | |
27 | =end testing |
28 | |
22222af9 |
29 | =head1 METHODS |
30 | |
31 | =head2 new( collation => $collation ); |
32 | |
33 | Creates a new relationship store for the given collation. |
34 | |
35 | =cut |
36 | |
37 | has 'collation' => ( |
38 | is => 'ro', |
39 | isa => 'Text::Tradition::Collation', |
40 | required => 1, |
41 | weak_ref => 1, |
42 | ); |
43 | |
44 | has 'scopedrels' => ( |
45 | is => 'ro', |
46 | isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]', |
47 | default => sub { {} }, |
48 | ); |
49 | |
50 | has 'graph' => ( |
51 | is => 'ro', |
52 | isa => 'Graph', |
53 | default => sub { Graph->new( undirected => 1 ) }, |
54 | handles => { |
55 | relationships => 'edges', |
56 | add_reading => 'add_vertex', |
57 | delete_reading => 'delete_vertex', |
58 | }, |
59 | ); |
60 | |
3ae5e2ad |
61 | =head2 get_relationship |
62 | |
63 | Return the relationship object, if any, that exists between two readings. |
64 | |
65 | =cut |
66 | |
67 | sub get_relationship { |
68 | my( $self, @vector ) = @_; |
69 | my $relationship; |
70 | if( $self->graph->has_edge_attribute( @vector, 'object' ) ) { |
71 | $relationship = $self->graph->get_edge_attribute( @vector, 'object' ); |
72 | } |
73 | return $relationship; |
74 | } |
75 | |
76 | sub _set_relationship { |
77 | my( $self, $relationship, @vector ) = @_; |
78 | $self->graph->add_edge( @vector ); |
79 | $self->graph->set_edge_attribute( @vector, 'object', $relationship ); |
80 | } |
81 | |
22222af9 |
82 | =head2 create |
83 | |
84 | Create a new relationship with the given options and return it. |
85 | Warn and return undef if the relationship cannot be created. |
86 | |
87 | =cut |
88 | |
89 | sub create { |
90 | my( $self, $options ) = @_; |
91 | # Check to see if a relationship exists between the two given readings |
92 | my $source = delete $options->{'orig_a'}; |
93 | my $target = delete $options->{'orig_b'}; |
3ae5e2ad |
94 | my $rel = $self->get_relationship( $source, $target ); |
95 | if( $rel ) { |
a7037072 |
96 | if( $rel->type ne $options->{'type'} ) { |
2626f709 |
97 | warn "Another relationship of type " . $rel->type |
a7037072 |
98 | . " already exists between $source and $target"; |
22222af9 |
99 | return; |
100 | } else { |
101 | return $rel; |
102 | } |
103 | } |
104 | |
105 | # Check to see if a nonlocal relationship is defined for the two readings |
106 | $rel = $self->scoped_relationship( $options->{'reading_a'}, |
107 | $options->{'reading_b'} ); |
108 | if( $rel && $rel->type eq $options->{'type'} ) { |
109 | return $rel; |
110 | } elsif( $rel ) { |
111 | warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ); |
112 | return; |
113 | } else { |
114 | $rel = Text::Tradition::Collation::Relationship->new( $options ); |
115 | $self->add_scoped_relationship( $rel ) if $rel->nonlocal; |
116 | return $rel; |
117 | } |
118 | } |
119 | |
120 | =head2 add_scoped_relationship( $rel ) |
121 | |
122 | Keep track of relationships defined between specific readings that are scoped |
123 | non-locally. Key on whichever reading occurs first alphabetically. |
124 | |
125 | =cut |
126 | |
127 | sub add_scoped_relationship { |
128 | my( $self, $rel ) = @_; |
129 | my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b ); |
130 | if( $r ) { |
131 | warn sprintf( "Scoped relationship of type %s already exists between %s and %s", |
132 | $r->type, $rel->reading_a, $rel->reading_b ); |
133 | return; |
134 | } |
135 | $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel; |
136 | } |
137 | |
138 | =head2 scoped_relationship( $reading_a, $reading_b ) |
139 | |
140 | Returns the general (document-level or global) relationship that has been defined |
141 | between the two reading strings. Returns undef if there is no general relationship. |
142 | |
143 | =cut |
144 | |
145 | sub scoped_relationship { |
146 | my( $self, $rdga, $rdgb ) = @_; |
147 | my( $first, $second ) = sort( $rdga, $rdgb ); |
148 | if( exists $self->scopedrels->{$first}->{$second} ) { |
149 | return $self->scopedrels->{$first}->{$second}; |
150 | } else { |
151 | return undef; |
152 | } |
153 | } |
154 | |
155 | =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts ) |
156 | |
157 | Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship |
158 | for the possible options) between the readings given in $source and $target. Sets |
159 | up a scoped relationship between $sourcetext and $targettext if the relationship is |
160 | scoped non-locally. |
161 | |
162 | Returns a status boolean and a list of all reading pairs connected by the call to |
163 | add_relationship. |
164 | |
165 | =cut |
166 | |
167 | sub add_relationship { |
168 | my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_; |
169 | |
170 | # Check the options |
171 | $options->{'scope'} = 'local' unless $options->{'scope'}; |
172 | |
173 | my( $is_valid, $reason ) = |
174 | $self->relationship_valid( $source, $target, $options->{'type'} ); |
175 | unless( $is_valid ) { |
176 | return ( undef, $reason ); |
177 | } |
178 | |
179 | # Try to create the relationship object. |
180 | $options->{'reading_a'} = $source_rdg->text; |
181 | $options->{'reading_b'} = $target_rdg->text; |
182 | $options->{'orig_a'} = $source; |
183 | $options->{'orig_b'} = $target; |
184 | my $relationship = $self->create( $options ); |
185 | return( undef, "Relationship creation failed" ) unless $relationship; |
186 | |
187 | # Find all the pairs for which we need to set the relationship. |
188 | my @vectors = ( [ $source, $target ] ); |
189 | if( $relationship->colocated && $relationship->nonlocal ) { |
190 | my $c = $self->collation; |
191 | # Set the same relationship everywhere we can, throughout the graph. |
192 | my @identical_readings = grep { $_->text eq $relationship->reading_a } |
193 | $c->readings; |
194 | foreach my $ir ( @identical_readings ) { |
cf6c01be |
195 | next if $ir->id eq $source; |
22222af9 |
196 | # Check to see if there is a target reading with the same text at |
197 | # the same rank. |
198 | my @itarget = grep |
199 | { $_->rank == $ir->rank && $_->text eq $relationship->reading_b } |
200 | $c->readings; |
201 | if( @itarget ) { |
202 | # We found a hit. |
203 | warn "More than one reading with text " . $target_rdg->text |
204 | . " at rank " . $ir->rank . "!" if @itarget > 1; |
cf6c01be |
205 | push( @vectors, [ $ir->id, $itarget[0]->id ] ); |
22222af9 |
206 | } |
207 | } |
208 | } |
209 | |
210 | # Now set the relationship(s). |
211 | my @pairs_set; |
212 | foreach my $v ( @vectors ) { |
3ae5e2ad |
213 | my $rel = $self->get_relationship( @$v ); |
214 | if( $rel ) { |
215 | my $warning = $rel->nonlocal |
216 | ? "Found conflicting relationship at @$v" |
217 | : "Not overriding local relationship set at @$v"; |
218 | warn $warning; |
219 | next; |
22222af9 |
220 | } |
3ae5e2ad |
221 | $self->_set_relationship( $relationship, @$v ); |
22222af9 |
222 | push( @pairs_set, $v ); |
223 | } |
224 | |
225 | return( 1, @pairs_set ); |
226 | } |
227 | |
228 | =head2 relationship_valid( $source, $target, $type ) |
229 | |
230 | Checks whether a relationship of type $type may exist between the readings given |
231 | in $source and $target. Returns a tuple of ( status, message ) where status is |
232 | a yes/no boolean and, if the answer is no, message gives the reason why. |
233 | |
234 | =cut |
235 | |
236 | sub relationship_valid { |
237 | my( $self, $source, $target, $rel ) = @_; |
238 | my $c = $self->collation; |
239 | if ( $rel eq 'transposition' || $rel eq 'repetition' ) { |
240 | # Check that the two readings do (for a repetition) or do not (for |
241 | # a transposition) appear in the same witness. |
242 | my %seen_wits; |
243 | map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source ); |
244 | foreach my $w ( $c->reading_witnesses( $target ) ) { |
245 | if( $seen_wits{$w} ) { |
246 | return ( 0, "Readings both occur in witness $w" ) |
247 | if $rel eq 'transposition'; |
248 | return ( 1, "ok" ) if $rel eq 'repetition'; |
249 | } |
250 | return $rel eq 'transposition' ? ( 1, "ok" ) |
251 | : ( 0, "Readings occur only in distinct witnesses" ); |
252 | } |
253 | } else { |
254 | # Check that linking the source and target in a relationship won't lead |
255 | # to a path loop for any witness. First make a lookup table of all the |
256 | # readings related to either the source or the target. |
257 | my @proposed_related = ( $source, $target ); |
258 | push( @proposed_related, $self->related_readings( $source, 'colocated' ) ); |
259 | push( @proposed_related, $self->related_readings( $target, 'colocated' ) ); |
260 | my %pr_ids; |
261 | map { $pr_ids{ $_ } = 1 } @proposed_related; |
262 | |
263 | # None of these proposed related readings should have a neighbor that |
264 | # is also in proposed_related. |
265 | foreach my $pr ( keys %pr_ids ) { |
266 | foreach my $neighbor( $c->sequence->neighbors( $pr ) ) { |
267 | return( 0, "Would relate neighboring readings $pr and $neighbor" ) |
268 | if exists $pr_ids{$neighbor}; |
269 | } |
270 | } |
271 | return ( 1, "ok" ); |
272 | } |
273 | } |
274 | |
275 | =head2 related_readings( $reading, $colocated_only ) |
276 | |
277 | Returns a list of readings that are connected via relationship links to $reading. |
278 | If $colocated_only is true, restricts the list to those readings that are in the |
279 | same logical location (and therefore have the same rank in the collation graph.) |
280 | |
281 | =cut |
282 | |
283 | sub related_readings { |
284 | my( $self, $reading, $colocated ) = @_; |
285 | my $return_object; |
286 | if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) { |
287 | $reading = $reading->id; |
288 | $return_object = 1; |
289 | } |
c84275ff |
290 | my @answer; |
22222af9 |
291 | if( $colocated ) { |
c84275ff |
292 | my %found = ( $reading => 1 ); |
293 | my $check = [ $reading ]; |
294 | my $iter = 0; |
295 | while( @$check ) { |
c84275ff |
296 | my $more = []; |
297 | foreach my $r ( @$check ) { |
298 | foreach my $nr ( $self->graph->neighbors( $r ) ) { |
3ae5e2ad |
299 | if( $self->get_relationship( $r, $nr )->colocated ) { |
c84275ff |
300 | push( @$more, $nr ) unless exists $found{$nr}; |
301 | $found{$nr} = 1; |
302 | } |
303 | } |
304 | } |
305 | $check = $more; |
22222af9 |
306 | } |
c84275ff |
307 | @answer = keys %found; |
308 | } else { |
309 | @answer = $self->graph->all_reachable( $reading ); |
22222af9 |
310 | } |
311 | if( $return_object ) { |
312 | my $c = $self->collation; |
c84275ff |
313 | return map { $c->reading( $_ ) } @answer; |
22222af9 |
314 | } else { |
c84275ff |
315 | return @answer; |
22222af9 |
316 | } |
317 | } |
318 | |
319 | =head2 merge_readings( $kept, $deleted ); |
320 | |
321 | Makes a best-effort merge of the relationship links between the given readings, and |
322 | stops tracking the to-be-deleted reading. |
323 | |
324 | =cut |
325 | |
326 | sub merge_readings { |
327 | my( $self, $kept, $deleted, $combined ) = @_; |
328 | foreach my $edge ( $self->graph->edges_at( $deleted ) ) { |
329 | # Get the pair of kept / rel |
330 | my @vector = ( $kept ); |
331 | push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] ); |
332 | next if $vector[0] eq $vector[1]; # Don't add a self loop |
333 | |
334 | # If kept changes its text, drop the relationship. |
335 | next if $combined; |
336 | |
337 | # If kept / rel already has a relationship, warn and keep the old |
3ae5e2ad |
338 | my $rel = $self->get_relationship( @vector ); |
339 | if( $rel ) { |
22222af9 |
340 | warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted ); |
341 | next; |
342 | } |
343 | |
344 | # Otherwise, adopt the relationship that would be deleted. |
3ae5e2ad |
345 | $rel = $self->get_relationship( @$edge ); |
346 | $self->_set_relationship( $rel, @vector ); |
22222af9 |
347 | } |
348 | $self->delete_reading( $deleted ); |
349 | } |
350 | |
c84275ff |
351 | sub as_graphml { |
2626f709 |
352 | my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_; |
c84275ff |
353 | |
354 | my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' ); |
355 | $rgraph->setAttribute( 'edgedefault', 'directed' ); |
356 | $rgraph->setAttribute( 'id', 'relationships', ); |
357 | $rgraph->setAttribute( 'parse.edgeids', 'canonical' ); |
358 | $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) ); |
359 | $rgraph->setAttribute( 'parse.nodeids', 'canonical' ); |
360 | $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) ); |
361 | $rgraph->setAttribute( 'parse.order', 'nodesfirst' ); |
362 | |
363 | # Add the vertices according to their XML IDs |
2626f709 |
364 | my %rdg_lookup = ( reverse %$node_hash ); |
365 | foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) { |
c84275ff |
366 | my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' ); |
367 | $n_el->setAttribute( 'id', $n ); |
2626f709 |
368 | _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} ); |
c84275ff |
369 | } |
370 | |
371 | # Add the relationship edges, with their object information |
372 | my $edge_ctr = 0; |
373 | foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) { |
374 | # Add an edge and fill in its relationship info. |
375 | my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' ); |
376 | $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} ); |
377 | $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} ); |
378 | $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ ); |
379 | |
3ae5e2ad |
380 | my $rel_obj = $self->get_relationship( @$e ); |
c84275ff |
381 | _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type ); |
382 | _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope ); |
383 | _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, |
384 | $rel_obj->non_correctable ) if $rel_obj->noncorr_set; |
385 | _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, |
386 | $rel_obj->non_independent ) if $rel_obj->nonind_set; |
387 | } |
388 | } |
389 | |
390 | sub _by_xmlid { |
2626f709 |
391 | my $tmp_a = $a; |
392 | my $tmp_b = $b; |
393 | $tmp_a =~ s/\D//g; |
394 | $tmp_b =~ s/\D//g; |
395 | return $tmp_a <=> $tmp_b; |
c84275ff |
396 | } |
397 | |
398 | sub _add_graphml_data { |
399 | my( $el, $key, $value ) = @_; |
400 | return unless defined $value; |
401 | my $data_el = $el->addNewChild( $el->namespaceURI, 'data' ); |
402 | $data_el->setAttribute( 'key', $key ); |
403 | $data_el->appendText( $value ); |
83d5ac3a |
404 | } |
405 | |
22222af9 |
406 | no Moose; |
407 | __PACKAGE__->meta->make_immutable; |
408 | |
409 | 1; |