oops, is not an object
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
5use Text::Tradition::Collation::Relationship;
6
7use Moose;
8
9=head1 NAME
10
2626f709 11Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
12between readings in a given collation
22222af9 13
14=head1 DESCRIPTION
15
16Text::Tradition is a library for representation and analysis of collated
17texts, particularly medieval ones. The RelationshipStore is an internal object
18of the collation, to keep track of the defined relationships (both specific and
19general) between readings.
20
21=head1 METHODS
22
23=head2 new( collation => $collation );
24
25Creates a new relationship store for the given collation.
26
27=cut
28
29has 'collation' => (
30 is => 'ro',
31 isa => 'Text::Tradition::Collation',
32 required => 1,
33 weak_ref => 1,
34 );
35
36has 'scopedrels' => (
37 is => 'ro',
38 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
39 default => sub { {} },
40 );
41
42has 'graph' => (
43 is => 'ro',
44 isa => 'Graph',
45 default => sub { Graph->new( undirected => 1 ) },
46 handles => {
47 relationships => 'edges',
48 add_reading => 'add_vertex',
49 delete_reading => 'delete_vertex',
50 },
51 );
52
53=head2 create
54
55Create a new relationship with the given options and return it.
56Warn and return undef if the relationship cannot be created.
57
58=cut
59
60sub create {
61 my( $self, $options ) = @_;
62 # Check to see if a relationship exists between the two given readings
63 my $source = delete $options->{'orig_a'};
64 my $target = delete $options->{'orig_b'};
65 my $rel;
66 if( $self->graph->has_edge( $source, $target ) ) {
67 $rel = $self->graph->get_edge_attribute( $source, $target, 'object' );
a7037072 68 if( $rel->type ne $options->{'type'} ) {
2626f709 69 warn "Another relationship of type " . $rel->type
a7037072 70 . " already exists between $source and $target";
22222af9 71 return;
72 } else {
73 return $rel;
74 }
75 }
76
77 # Check to see if a nonlocal relationship is defined for the two readings
78 $rel = $self->scoped_relationship( $options->{'reading_a'},
79 $options->{'reading_b'} );
80 if( $rel && $rel->type eq $options->{'type'} ) {
81 return $rel;
82 } elsif( $rel ) {
83 warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} );
84 return;
85 } else {
86 $rel = Text::Tradition::Collation::Relationship->new( $options );
87 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
88 return $rel;
89 }
90}
91
92=head2 add_scoped_relationship( $rel )
93
94Keep track of relationships defined between specific readings that are scoped
95non-locally. Key on whichever reading occurs first alphabetically.
96
97=cut
98
99sub add_scoped_relationship {
100 my( $self, $rel ) = @_;
101 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
102 if( $r ) {
103 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
104 $r->type, $rel->reading_a, $rel->reading_b );
105 return;
106 }
107 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
108}
109
110=head2 scoped_relationship( $reading_a, $reading_b )
111
112Returns the general (document-level or global) relationship that has been defined
113between the two reading strings. Returns undef if there is no general relationship.
114
115=cut
116
117sub scoped_relationship {
118 my( $self, $rdga, $rdgb ) = @_;
119 my( $first, $second ) = sort( $rdga, $rdgb );
120 if( exists $self->scopedrels->{$first}->{$second} ) {
121 return $self->scopedrels->{$first}->{$second};
122 } else {
123 return undef;
124 }
125}
126
127=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
128
129Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
130for the possible options) between the readings given in $source and $target. Sets
131up a scoped relationship between $sourcetext and $targettext if the relationship is
132scoped non-locally.
133
134Returns a status boolean and a list of all reading pairs connected by the call to
135add_relationship.
136
137=cut
138
139sub add_relationship {
140 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
141
142 # Check the options
143 $options->{'scope'} = 'local' unless $options->{'scope'};
144
145 my( $is_valid, $reason ) =
146 $self->relationship_valid( $source, $target, $options->{'type'} );
147 unless( $is_valid ) {
148 return ( undef, $reason );
149 }
150
151 # Try to create the relationship object.
152 $options->{'reading_a'} = $source_rdg->text;
153 $options->{'reading_b'} = $target_rdg->text;
154 $options->{'orig_a'} = $source;
155 $options->{'orig_b'} = $target;
156 my $relationship = $self->create( $options );
157 return( undef, "Relationship creation failed" ) unless $relationship;
158
159 # Find all the pairs for which we need to set the relationship.
160 my @vectors = ( [ $source, $target ] );
161 if( $relationship->colocated && $relationship->nonlocal ) {
162 my $c = $self->collation;
163 # Set the same relationship everywhere we can, throughout the graph.
164 my @identical_readings = grep { $_->text eq $relationship->reading_a }
165 $c->readings;
166 foreach my $ir ( @identical_readings ) {
cf6c01be 167 next if $ir->id eq $source;
22222af9 168 # Check to see if there is a target reading with the same text at
169 # the same rank.
170 my @itarget = grep
171 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
172 $c->readings;
173 if( @itarget ) {
174 # We found a hit.
175 warn "More than one reading with text " . $target_rdg->text
176 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 177 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 178 }
179 }
180 }
181
182 # Now set the relationship(s).
183 my @pairs_set;
184 foreach my $v ( @vectors ) {
185 if( $self->graph->has_edge( @$v ) ) {
186 # Is it locally scoped?
c84275ff 187 my $rel = $self->graph->get_edge_attribute( @$v, 'object' );
22222af9 188 if( $rel->nonlocal ) {
189 # TODO I think we should not be able to get here.
190 warn "Found conflicting relationship at @$v";
191 } else {
192 warn "Not overriding local relationship set at @$v";
193 next;
194 }
195 }
196 $self->graph->add_edge( @$v );
197 $self->graph->set_edge_attribute( @$v, 'object', $relationship );
198 push( @pairs_set, $v );
199 }
200
201 return( 1, @pairs_set );
202}
203
204=head2 relationship_valid( $source, $target, $type )
205
206Checks whether a relationship of type $type may exist between the readings given
207in $source and $target. Returns a tuple of ( status, message ) where status is
208a yes/no boolean and, if the answer is no, message gives the reason why.
209
210=cut
211
212sub relationship_valid {
213 my( $self, $source, $target, $rel ) = @_;
214 my $c = $self->collation;
215 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
216 # Check that the two readings do (for a repetition) or do not (for
217 # a transposition) appear in the same witness.
218 my %seen_wits;
219 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
220 foreach my $w ( $c->reading_witnesses( $target ) ) {
221 if( $seen_wits{$w} ) {
222 return ( 0, "Readings both occur in witness $w" )
223 if $rel eq 'transposition';
224 return ( 1, "ok" ) if $rel eq 'repetition';
225 }
226 return $rel eq 'transposition' ? ( 1, "ok" )
227 : ( 0, "Readings occur only in distinct witnesses" );
228 }
229 } else {
230 # Check that linking the source and target in a relationship won't lead
231 # to a path loop for any witness. First make a lookup table of all the
232 # readings related to either the source or the target.
233 my @proposed_related = ( $source, $target );
234 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
235 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
236 my %pr_ids;
237 map { $pr_ids{ $_ } = 1 } @proposed_related;
238
239 # None of these proposed related readings should have a neighbor that
240 # is also in proposed_related.
241 foreach my $pr ( keys %pr_ids ) {
242 foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
243 return( 0, "Would relate neighboring readings $pr and $neighbor" )
244 if exists $pr_ids{$neighbor};
245 }
246 }
247 return ( 1, "ok" );
248 }
249}
250
251=head2 related_readings( $reading, $colocated_only )
252
253Returns a list of readings that are connected via relationship links to $reading.
254If $colocated_only is true, restricts the list to those readings that are in the
255same logical location (and therefore have the same rank in the collation graph.)
256
257=cut
258
259sub related_readings {
260 my( $self, $reading, $colocated ) = @_;
261 my $return_object;
262 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
263 $reading = $reading->id;
264 $return_object = 1;
265 }
c84275ff 266 my @answer;
22222af9 267 if( $colocated ) {
c84275ff 268 my %found = ( $reading => 1 );
269 my $check = [ $reading ];
270 my $iter = 0;
271 while( @$check ) {
c84275ff 272 my $more = [];
273 foreach my $r ( @$check ) {
274 foreach my $nr ( $self->graph->neighbors( $r ) ) {
275 if( $self->graph->get_edge_attribute( $r, $nr, 'object' )->colocated ) {
276 push( @$more, $nr ) unless exists $found{$nr};
277 $found{$nr} = 1;
278 }
279 }
280 }
281 $check = $more;
22222af9 282 }
c84275ff 283 @answer = keys %found;
284 } else {
285 @answer = $self->graph->all_reachable( $reading );
22222af9 286 }
287 if( $return_object ) {
288 my $c = $self->collation;
c84275ff 289 return map { $c->reading( $_ ) } @answer;
22222af9 290 } else {
c84275ff 291 return @answer;
22222af9 292 }
293}
294
295=head2 merge_readings( $kept, $deleted );
296
297Makes a best-effort merge of the relationship links between the given readings, and
298stops tracking the to-be-deleted reading.
299
300=cut
301
302sub merge_readings {
303 my( $self, $kept, $deleted, $combined ) = @_;
304 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
305 # Get the pair of kept / rel
306 my @vector = ( $kept );
307 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
308 next if $vector[0] eq $vector[1]; # Don't add a self loop
309
310 # If kept changes its text, drop the relationship.
311 next if $combined;
312
313 # If kept / rel already has a relationship, warn and keep the old
314 if( $self->graph->has_edge( @vector ) ) {
315 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
316 next;
317 }
318
319 # Otherwise, adopt the relationship that would be deleted.
320 my $rel = $self->graph->get_edge_attribute( @$edge, 'object' );
321 $self->graph->add_edge( @vector );
322 $self->graph->set_edge_attribute( @vector, 'object', $rel );
323 }
324 $self->delete_reading( $deleted );
325}
326
c84275ff 327sub as_graphml {
2626f709 328 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 329
330 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
331 $rgraph->setAttribute( 'edgedefault', 'directed' );
332 $rgraph->setAttribute( 'id', 'relationships', );
333 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
334 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
335 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
336 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
337 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
338
339 # Add the vertices according to their XML IDs
2626f709 340 my %rdg_lookup = ( reverse %$node_hash );
341 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 342 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
343 $n_el->setAttribute( 'id', $n );
2626f709 344 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 345 }
346
347 # Add the relationship edges, with their object information
348 my $edge_ctr = 0;
349 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
350 # Add an edge and fill in its relationship info.
351 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
352 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
353 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
354 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
355
356 my $rel_obj = $self->graph->get_edge_attribute( @$e, 'object' );
357 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
358 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
359 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
360 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
361 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
362 $rel_obj->non_independent ) if $rel_obj->nonind_set;
363 }
364}
365
366sub _by_xmlid {
2626f709 367 my $tmp_a = $a;
368 my $tmp_b = $b;
369 $tmp_a =~ s/\D//g;
370 $tmp_b =~ s/\D//g;
371 return $tmp_a <=> $tmp_b;
c84275ff 372}
373
374sub _add_graphml_data {
375 my( $el, $key, $value ) = @_;
376 return unless defined $value;
377 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
378 $data_el->setAttribute( 'key', $key );
379 $data_el->appendText( $value );
83d5ac3a 380}
381
22222af9 382no Moose;
383__PACKAGE__->meta->make_immutable;
384
3851;