restore total count; get rid of benchmarking
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
5use Text::Tradition::Collation::Relationship;
6
7use Moose;
8
9=head1 NAME
10
2626f709 11Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
12between readings in a given collation
22222af9 13
14=head1 DESCRIPTION
15
16Text::Tradition is a library for representation and analysis of collated
17texts, particularly medieval ones. The RelationshipStore is an internal object
18of the collation, to keep track of the defined relationships (both specific and
19general) between readings.
20
3ae5e2ad 21=begin testing
22
23use Text::Tradition;
24
25use_ok( 'Text::Tradition::Collation::RelationshipStore' );
26
27=end testing
28
22222af9 29=head1 METHODS
30
31=head2 new( collation => $collation );
32
33Creates a new relationship store for the given collation.
34
35=cut
36
37has 'collation' => (
38 is => 'ro',
39 isa => 'Text::Tradition::Collation',
40 required => 1,
41 weak_ref => 1,
42 );
43
44has 'scopedrels' => (
45 is => 'ro',
46 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
47 default => sub { {} },
48 );
49
50has 'graph' => (
51 is => 'ro',
52 isa => 'Graph',
53 default => sub { Graph->new( undirected => 1 ) },
54 handles => {
55 relationships => 'edges',
56 add_reading => 'add_vertex',
57 delete_reading => 'delete_vertex',
58 },
59 );
60
3ae5e2ad 61=head2 get_relationship
62
63Return the relationship object, if any, that exists between two readings.
64
65=cut
66
67sub get_relationship {
68 my( $self, @vector ) = @_;
69 my $relationship;
70 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
71 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
72 }
73 return $relationship;
74}
75
76sub _set_relationship {
77 my( $self, $relationship, @vector ) = @_;
78 $self->graph->add_edge( @vector );
79 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
80}
81
22222af9 82=head2 create
83
84Create a new relationship with the given options and return it.
85Warn and return undef if the relationship cannot be created.
86
87=cut
88
89sub create {
90 my( $self, $options ) = @_;
91 # Check to see if a relationship exists between the two given readings
92 my $source = delete $options->{'orig_a'};
93 my $target = delete $options->{'orig_b'};
3ae5e2ad 94 my $rel = $self->get_relationship( $source, $target );
95 if( $rel ) {
a7037072 96 if( $rel->type ne $options->{'type'} ) {
2626f709 97 warn "Another relationship of type " . $rel->type
a7037072 98 . " already exists between $source and $target";
22222af9 99 return;
100 } else {
101 return $rel;
102 }
103 }
104
105 # Check to see if a nonlocal relationship is defined for the two readings
106 $rel = $self->scoped_relationship( $options->{'reading_a'},
107 $options->{'reading_b'} );
108 if( $rel && $rel->type eq $options->{'type'} ) {
109 return $rel;
110 } elsif( $rel ) {
111 warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} );
112 return;
113 } else {
114 $rel = Text::Tradition::Collation::Relationship->new( $options );
115 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
116 return $rel;
117 }
118}
119
120=head2 add_scoped_relationship( $rel )
121
122Keep track of relationships defined between specific readings that are scoped
123non-locally. Key on whichever reading occurs first alphabetically.
124
125=cut
126
127sub add_scoped_relationship {
128 my( $self, $rel ) = @_;
129 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
130 if( $r ) {
131 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
132 $r->type, $rel->reading_a, $rel->reading_b );
133 return;
134 }
135 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
136}
137
138=head2 scoped_relationship( $reading_a, $reading_b )
139
140Returns the general (document-level or global) relationship that has been defined
141between the two reading strings. Returns undef if there is no general relationship.
142
143=cut
144
145sub scoped_relationship {
146 my( $self, $rdga, $rdgb ) = @_;
147 my( $first, $second ) = sort( $rdga, $rdgb );
148 if( exists $self->scopedrels->{$first}->{$second} ) {
149 return $self->scopedrels->{$first}->{$second};
150 } else {
151 return undef;
152 }
153}
154
155=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
156
157Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
158for the possible options) between the readings given in $source and $target. Sets
159up a scoped relationship between $sourcetext and $targettext if the relationship is
160scoped non-locally.
161
162Returns a status boolean and a list of all reading pairs connected by the call to
163add_relationship.
164
165=cut
166
167sub add_relationship {
168 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
169
170 # Check the options
171 $options->{'scope'} = 'local' unless $options->{'scope'};
172
173 my( $is_valid, $reason ) =
174 $self->relationship_valid( $source, $target, $options->{'type'} );
175 unless( $is_valid ) {
176 return ( undef, $reason );
177 }
178
179 # Try to create the relationship object.
180 $options->{'reading_a'} = $source_rdg->text;
181 $options->{'reading_b'} = $target_rdg->text;
182 $options->{'orig_a'} = $source;
183 $options->{'orig_b'} = $target;
184 my $relationship = $self->create( $options );
185 return( undef, "Relationship creation failed" ) unless $relationship;
186
187 # Find all the pairs for which we need to set the relationship.
188 my @vectors = ( [ $source, $target ] );
189 if( $relationship->colocated && $relationship->nonlocal ) {
190 my $c = $self->collation;
191 # Set the same relationship everywhere we can, throughout the graph.
192 my @identical_readings = grep { $_->text eq $relationship->reading_a }
193 $c->readings;
194 foreach my $ir ( @identical_readings ) {
cf6c01be 195 next if $ir->id eq $source;
22222af9 196 # Check to see if there is a target reading with the same text at
197 # the same rank.
198 my @itarget = grep
199 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
200 $c->readings;
201 if( @itarget ) {
202 # We found a hit.
203 warn "More than one reading with text " . $target_rdg->text
204 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 205 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 206 }
207 }
208 }
209
210 # Now set the relationship(s).
211 my @pairs_set;
212 foreach my $v ( @vectors ) {
3ae5e2ad 213 my $rel = $self->get_relationship( @$v );
214 if( $rel ) {
215 my $warning = $rel->nonlocal
216 ? "Found conflicting relationship at @$v"
217 : "Not overriding local relationship set at @$v";
218 warn $warning;
219 next;
22222af9 220 }
3ae5e2ad 221 $self->_set_relationship( $relationship, @$v );
22222af9 222 push( @pairs_set, $v );
223 }
224
225 return( 1, @pairs_set );
226}
227
228=head2 relationship_valid( $source, $target, $type )
229
230Checks whether a relationship of type $type may exist between the readings given
231in $source and $target. Returns a tuple of ( status, message ) where status is
232a yes/no boolean and, if the answer is no, message gives the reason why.
233
234=cut
235
236sub relationship_valid {
237 my( $self, $source, $target, $rel ) = @_;
238 my $c = $self->collation;
239 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
240 # Check that the two readings do (for a repetition) or do not (for
241 # a transposition) appear in the same witness.
242 my %seen_wits;
243 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
244 foreach my $w ( $c->reading_witnesses( $target ) ) {
245 if( $seen_wits{$w} ) {
246 return ( 0, "Readings both occur in witness $w" )
247 if $rel eq 'transposition';
248 return ( 1, "ok" ) if $rel eq 'repetition';
249 }
250 return $rel eq 'transposition' ? ( 1, "ok" )
251 : ( 0, "Readings occur only in distinct witnesses" );
252 }
253 } else {
254 # Check that linking the source and target in a relationship won't lead
255 # to a path loop for any witness. First make a lookup table of all the
256 # readings related to either the source or the target.
257 my @proposed_related = ( $source, $target );
258 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
259 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
260 my %pr_ids;
261 map { $pr_ids{ $_ } = 1 } @proposed_related;
262
263 # None of these proposed related readings should have a neighbor that
264 # is also in proposed_related.
265 foreach my $pr ( keys %pr_ids ) {
266 foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
267 return( 0, "Would relate neighboring readings $pr and $neighbor" )
268 if exists $pr_ids{$neighbor};
269 }
270 }
271 return ( 1, "ok" );
272 }
273}
274
275=head2 related_readings( $reading, $colocated_only )
276
277Returns a list of readings that are connected via relationship links to $reading.
278If $colocated_only is true, restricts the list to those readings that are in the
279same logical location (and therefore have the same rank in the collation graph.)
280
281=cut
282
283sub related_readings {
284 my( $self, $reading, $colocated ) = @_;
285 my $return_object;
286 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
287 $reading = $reading->id;
288 $return_object = 1;
289 }
c84275ff 290 my @answer;
22222af9 291 if( $colocated ) {
c84275ff 292 my %found = ( $reading => 1 );
293 my $check = [ $reading ];
294 my $iter = 0;
295 while( @$check ) {
c84275ff 296 my $more = [];
297 foreach my $r ( @$check ) {
298 foreach my $nr ( $self->graph->neighbors( $r ) ) {
3ae5e2ad 299 if( $self->get_relationship( $r, $nr )->colocated ) {
c84275ff 300 push( @$more, $nr ) unless exists $found{$nr};
301 $found{$nr} = 1;
302 }
303 }
304 }
305 $check = $more;
22222af9 306 }
c84275ff 307 @answer = keys %found;
308 } else {
309 @answer = $self->graph->all_reachable( $reading );
22222af9 310 }
311 if( $return_object ) {
312 my $c = $self->collation;
c84275ff 313 return map { $c->reading( $_ ) } @answer;
22222af9 314 } else {
c84275ff 315 return @answer;
22222af9 316 }
317}
318
319=head2 merge_readings( $kept, $deleted );
320
321Makes a best-effort merge of the relationship links between the given readings, and
322stops tracking the to-be-deleted reading.
323
324=cut
325
326sub merge_readings {
327 my( $self, $kept, $deleted, $combined ) = @_;
328 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
329 # Get the pair of kept / rel
330 my @vector = ( $kept );
331 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
332 next if $vector[0] eq $vector[1]; # Don't add a self loop
333
334 # If kept changes its text, drop the relationship.
335 next if $combined;
336
337 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 338 my $rel = $self->get_relationship( @vector );
339 if( $rel ) {
22222af9 340 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
341 next;
342 }
343
344 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 345 $rel = $self->get_relationship( @$edge );
346 $self->_set_relationship( $rel, @vector );
22222af9 347 }
348 $self->delete_reading( $deleted );
349}
350
c84275ff 351sub as_graphml {
2626f709 352 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 353
354 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
355 $rgraph->setAttribute( 'edgedefault', 'directed' );
356 $rgraph->setAttribute( 'id', 'relationships', );
357 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
358 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
359 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
360 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
361 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
362
363 # Add the vertices according to their XML IDs
2626f709 364 my %rdg_lookup = ( reverse %$node_hash );
365 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 366 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
367 $n_el->setAttribute( 'id', $n );
2626f709 368 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 369 }
370
371 # Add the relationship edges, with their object information
372 my $edge_ctr = 0;
373 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
374 # Add an edge and fill in its relationship info.
375 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
376 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
377 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
378 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
379
3ae5e2ad 380 my $rel_obj = $self->get_relationship( @$e );
c84275ff 381 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
382 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
383 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
384 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
385 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
386 $rel_obj->non_independent ) if $rel_obj->nonind_set;
387 }
388}
389
390sub _by_xmlid {
2626f709 391 my $tmp_a = $a;
392 my $tmp_b = $b;
393 $tmp_a =~ s/\D//g;
394 $tmp_b =~ s/\D//g;
395 return $tmp_a <=> $tmp_b;
c84275ff 396}
397
398sub _add_graphml_data {
399 my( $el, $key, $value ) = @_;
400 return unless defined $value;
401 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
402 $data_el->setAttribute( 'key', $key );
403 $data_el->appendText( $value );
83d5ac3a 404}
405
22222af9 406no Moose;
407__PACKAGE__->meta->make_immutable;
408
4091;