keep from stripping meta readings, oops
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
26
27use_ok( 'Text::Tradition::Collation::RelationshipStore' );
28
29=end testing
30
22222af9 31=head1 METHODS
32
33=head2 new( collation => $collation );
34
35Creates a new relationship store for the given collation.
36
37=cut
38
39has 'collation' => (
40 is => 'ro',
41 isa => 'Text::Tradition::Collation',
42 required => 1,
43 weak_ref => 1,
44 );
45
46has 'scopedrels' => (
47 is => 'ro',
48 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
49 default => sub { {} },
50 );
51
52has 'graph' => (
53 is => 'ro',
54 isa => 'Graph',
55 default => sub { Graph->new( undirected => 1 ) },
56 handles => {
57 relationships => 'edges',
58 add_reading => 'add_vertex',
59 delete_reading => 'delete_vertex',
4633f9e4 60 delete_relationship => 'delete_edge',
22222af9 61 },
62 );
63
4633f9e4 64around 'delete_relationship' => sub {
65 my $orig = shift;
66 my $self = shift;
67 my @vector;
68 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
69 # Dereference the edge arrayref that was passed.
70 my $edge = shift;
71 @vector = @$edge;
72 } else {
73 @vector = @_;
74 }
75 return $self->$orig( @vector );
76};
77
3ae5e2ad 78=head2 get_relationship
79
80Return the relationship object, if any, that exists between two readings.
81
82=cut
83
84sub get_relationship {
4633f9e4 85 my $self = shift;
86 my @vector;
87 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
88 # Dereference the edge arrayref that was passed.
89 my $edge = shift;
90 @vector = @$edge;
91 } else {
92 @vector = @_;
93 }
3ae5e2ad 94 my $relationship;
95 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
96 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
97 }
98 return $relationship;
99}
100
101sub _set_relationship {
102 my( $self, $relationship, @vector ) = @_;
103 $self->graph->add_edge( @vector );
104 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
105}
a1615ee4 106
107sub _remove_relationship {
108 my( $self, @vector ) = @_;
109 $self->graph->delete_edge( @vector );
110}
3ae5e2ad 111
22222af9 112=head2 create
113
114Create a new relationship with the given options and return it.
115Warn and return undef if the relationship cannot be created.
116
117=cut
118
119sub create {
120 my( $self, $options ) = @_;
121 # Check to see if a relationship exists between the two given readings
122 my $source = delete $options->{'orig_a'};
123 my $target = delete $options->{'orig_b'};
3ae5e2ad 124 my $rel = $self->get_relationship( $source, $target );
125 if( $rel ) {
a7037072 126 if( $rel->type ne $options->{'type'} ) {
63778331 127 throw( "Another relationship of type " . $rel->type
128 . " already exists between $source and $target" );
22222af9 129 } else {
130 return $rel;
131 }
132 }
133
134 # Check to see if a nonlocal relationship is defined for the two readings
135 $rel = $self->scoped_relationship( $options->{'reading_a'},
136 $options->{'reading_b'} );
137 if( $rel && $rel->type eq $options->{'type'} ) {
138 return $rel;
139 } elsif( $rel ) {
63778331 140 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 141 } else {
142 $rel = Text::Tradition::Collation::Relationship->new( $options );
143 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
144 return $rel;
145 }
146}
147
148=head2 add_scoped_relationship( $rel )
149
150Keep track of relationships defined between specific readings that are scoped
151non-locally. Key on whichever reading occurs first alphabetically.
152
153=cut
154
155sub add_scoped_relationship {
156 my( $self, $rel ) = @_;
157 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
158 if( $r ) {
159 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
160 $r->type, $rel->reading_a, $rel->reading_b );
161 return;
162 }
163 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
164}
165
166=head2 scoped_relationship( $reading_a, $reading_b )
167
168Returns the general (document-level or global) relationship that has been defined
169between the two reading strings. Returns undef if there is no general relationship.
170
171=cut
172
173sub scoped_relationship {
174 my( $self, $rdga, $rdgb ) = @_;
175 my( $first, $second ) = sort( $rdga, $rdgb );
176 if( exists $self->scopedrels->{$first}->{$second} ) {
177 return $self->scopedrels->{$first}->{$second};
178 } else {
179 return undef;
180 }
181}
182
183=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
184
185Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
186for the possible options) between the readings given in $source and $target. Sets
187up a scoped relationship between $sourcetext and $targettext if the relationship is
188scoped non-locally.
189
190Returns a status boolean and a list of all reading pairs connected by the call to
191add_relationship.
192
193=cut
194
195sub add_relationship {
196 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
197
198 # Check the options
199 $options->{'scope'} = 'local' unless $options->{'scope'};
200
201 my( $is_valid, $reason ) =
202 $self->relationship_valid( $source, $target, $options->{'type'} );
203 unless( $is_valid ) {
63778331 204 throw( "Invalid relationship: $reason" );
22222af9 205 }
206
207 # Try to create the relationship object.
208 $options->{'reading_a'} = $source_rdg->text;
209 $options->{'reading_b'} = $target_rdg->text;
210 $options->{'orig_a'} = $source;
211 $options->{'orig_b'} = $target;
63778331 212 my $relationship = $self->create( $options ); # Will throw on error
22222af9 213
214 # Find all the pairs for which we need to set the relationship.
215 my @vectors = ( [ $source, $target ] );
216 if( $relationship->colocated && $relationship->nonlocal ) {
217 my $c = $self->collation;
218 # Set the same relationship everywhere we can, throughout the graph.
219 my @identical_readings = grep { $_->text eq $relationship->reading_a }
220 $c->readings;
221 foreach my $ir ( @identical_readings ) {
cf6c01be 222 next if $ir->id eq $source;
22222af9 223 # Check to see if there is a target reading with the same text at
224 # the same rank.
225 my @itarget = grep
226 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
227 $c->readings;
228 if( @itarget ) {
229 # We found a hit.
230 warn "More than one reading with text " . $target_rdg->text
231 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 232 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 233 }
234 }
235 }
236
237 # Now set the relationship(s).
238 my @pairs_set;
239 foreach my $v ( @vectors ) {
3ae5e2ad 240 my $rel = $self->get_relationship( @$v );
241 if( $rel ) {
63778331 242 if( $rel->nonlocal ) {
243 throw( "Found conflicting relationship at @$v" );
244 } else {
245 warn "Not overriding local relationship set at @$v";
246 }
3ae5e2ad 247 next;
22222af9 248 }
3ae5e2ad 249 $self->_set_relationship( $relationship, @$v );
22222af9 250 push( @pairs_set, $v );
251 }
252
63778331 253 return @pairs_set;
22222af9 254}
255
256=head2 relationship_valid( $source, $target, $type )
257
258Checks whether a relationship of type $type may exist between the readings given
259in $source and $target. Returns a tuple of ( status, message ) where status is
260a yes/no boolean and, if the answer is no, message gives the reason why.
261
262=cut
263
264sub relationship_valid {
265 my( $self, $source, $target, $rel ) = @_;
266 my $c = $self->collation;
267 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
268 # Check that the two readings do (for a repetition) or do not (for
269 # a transposition) appear in the same witness.
32e95735 270 # TODO this might be called before witness paths are set...
22222af9 271 my %seen_wits;
272 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
273 foreach my $w ( $c->reading_witnesses( $target ) ) {
274 if( $seen_wits{$w} ) {
275 return ( 0, "Readings both occur in witness $w" )
276 if $rel eq 'transposition';
277 return ( 1, "ok" ) if $rel eq 'repetition';
278 }
279 return $rel eq 'transposition' ? ( 1, "ok" )
280 : ( 0, "Readings occur only in distinct witnesses" );
281 }
282 } else {
283 # Check that linking the source and target in a relationship won't lead
a1615ee4 284 # to a path loop for any witness. If they have the same rank then fine.
285 return( 1, "ok" )
84d4ca78 286 if $c->reading( $source )->has_rank
287 && $c->reading( $target )->has_rank
288 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 289
290 # Otherwise, first make a lookup table of all the
22222af9 291 # readings related to either the source or the target.
292 my @proposed_related = ( $source, $target );
293 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
294 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
295 my %pr_ids;
296 map { $pr_ids{ $_ } = 1 } @proposed_related;
297
a1615ee4 298 # The cumulative predecessors and successors of the proposed-related readings
299 # should not overlap.
300 my %all_pred;
301 my %all_succ;
22222af9 302 foreach my $pr ( keys %pr_ids ) {
a1615ee4 303 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
304 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
305 }
306 foreach my $k ( keys %all_pred ) {
307 return( 0, "Relationship would create witness loop" )
308 if exists $all_succ{$k};
309 }
310 foreach my $k ( keys %pr_ids ) {
311 return( 0, "Relationship would create witness loop" )
312 if exists $all_pred{$k} || exists $all_succ{$k};
313 }
22222af9 314 return ( 1, "ok" );
315 }
316}
317
318=head2 related_readings( $reading, $colocated_only )
319
320Returns a list of readings that are connected via relationship links to $reading.
321If $colocated_only is true, restricts the list to those readings that are in the
322same logical location (and therefore have the same rank in the collation graph.)
323
324=cut
325
326sub related_readings {
327 my( $self, $reading, $colocated ) = @_;
328 my $return_object;
329 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
330 $reading = $reading->id;
331 $return_object = 1;
332 }
c84275ff 333 my @answer;
22222af9 334 if( $colocated ) {
c84275ff 335 my %found = ( $reading => 1 );
336 my $check = [ $reading ];
337 my $iter = 0;
338 while( @$check ) {
c84275ff 339 my $more = [];
340 foreach my $r ( @$check ) {
341 foreach my $nr ( $self->graph->neighbors( $r ) ) {
3ae5e2ad 342 if( $self->get_relationship( $r, $nr )->colocated ) {
c84275ff 343 push( @$more, $nr ) unless exists $found{$nr};
344 $found{$nr} = 1;
345 }
346 }
347 }
348 $check = $more;
22222af9 349 }
c84275ff 350 @answer = keys %found;
351 } else {
352 @answer = $self->graph->all_reachable( $reading );
22222af9 353 }
354 if( $return_object ) {
355 my $c = $self->collation;
c84275ff 356 return map { $c->reading( $_ ) } @answer;
22222af9 357 } else {
c84275ff 358 return @answer;
22222af9 359 }
360}
361
362=head2 merge_readings( $kept, $deleted );
363
364Makes a best-effort merge of the relationship links between the given readings, and
365stops tracking the to-be-deleted reading.
366
367=cut
368
369sub merge_readings {
370 my( $self, $kept, $deleted, $combined ) = @_;
371 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
372 # Get the pair of kept / rel
373 my @vector = ( $kept );
374 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
375 next if $vector[0] eq $vector[1]; # Don't add a self loop
376
377 # If kept changes its text, drop the relationship.
378 next if $combined;
379
380 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 381 my $rel = $self->get_relationship( @vector );
382 if( $rel ) {
22222af9 383 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
384 next;
385 }
386
387 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 388 $rel = $self->get_relationship( @$edge );
389 $self->_set_relationship( $rel, @vector );
22222af9 390 }
391 $self->delete_reading( $deleted );
392}
393
027d819c 394sub _as_graphml {
2626f709 395 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 396
397 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
398 $rgraph->setAttribute( 'edgedefault', 'directed' );
399 $rgraph->setAttribute( 'id', 'relationships', );
400 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
401 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
402 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
403 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
404 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
405
406 # Add the vertices according to their XML IDs
2626f709 407 my %rdg_lookup = ( reverse %$node_hash );
408 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 409 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
410 $n_el->setAttribute( 'id', $n );
2626f709 411 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 412 }
413
414 # Add the relationship edges, with their object information
415 my $edge_ctr = 0;
416 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
417 # Add an edge and fill in its relationship info.
418 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
419 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
420 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
421 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
422
3ae5e2ad 423 my $rel_obj = $self->get_relationship( @$e );
c84275ff 424 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
425 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
426 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
427 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
428 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
429 $rel_obj->non_independent ) if $rel_obj->nonind_set;
430 }
431}
432
433sub _by_xmlid {
2626f709 434 my $tmp_a = $a;
435 my $tmp_b = $b;
436 $tmp_a =~ s/\D//g;
437 $tmp_b =~ s/\D//g;
438 return $tmp_a <=> $tmp_b;
c84275ff 439}
440
441sub _add_graphml_data {
442 my( $el, $key, $value ) = @_;
443 return unless defined $value;
444 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
445 $data_el->setAttribute( 'key', $key );
446 $data_el->appendText( $value );
83d5ac3a 447}
448
63778331 449sub throw {
450 Text::Tradition::Error->throw(
451 'ident' => 'Relationship error',
452 'message' => $_[0],
453 );
454}
455
22222af9 456no Moose;
457__PACKAGE__->meta->make_immutable;
458
4591;