refactored Analysis module with associated changes
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
44my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
49@v2 = $c->del_relationship( 'n8', 'n13' );
50is( scalar @v2, 2, "Deleted second global relationship" );
51try {
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54} catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56}
57
3ae5e2ad 58=end testing
59
22222af9 60=head1 METHODS
61
62=head2 new( collation => $collation );
63
64Creates a new relationship store for the given collation.
65
66=cut
67
68has 'collation' => (
69 is => 'ro',
70 isa => 'Text::Tradition::Collation',
71 required => 1,
72 weak_ref => 1,
73 );
74
75has 'scopedrels' => (
76 is => 'ro',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
79 );
80
81has 'graph' => (
82 is => 'ro',
83 isa => 'Graph',
84 default => sub { Graph->new( undirected => 1 ) },
85 handles => {
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
4633f9e4 89 delete_relationship => 'delete_edge',
22222af9 90 },
91 );
92
4633f9e4 93around 'delete_relationship' => sub {
94 my $orig = shift;
95 my $self = shift;
96 my @vector;
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
99 my $edge = shift;
100 @vector = @$edge;
101 } else {
102 @vector = @_;
103 }
104 return $self->$orig( @vector );
105};
106
3ae5e2ad 107=head2 get_relationship
108
109Return the relationship object, if any, that exists between two readings.
110
111=cut
112
113sub get_relationship {
4633f9e4 114 my $self = shift;
115 my @vector;
116 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
117 # Dereference the edge arrayref that was passed.
118 my $edge = shift;
119 @vector = @$edge;
120 } else {
121 @vector = @_;
122 }
3ae5e2ad 123 my $relationship;
124 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
125 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
126 }
127 return $relationship;
128}
129
130sub _set_relationship {
131 my( $self, $relationship, @vector ) = @_;
132 $self->graph->add_edge( @vector );
133 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
134}
a1615ee4 135
136sub _remove_relationship {
137 my( $self, @vector ) = @_;
138 $self->graph->delete_edge( @vector );
139}
3ae5e2ad 140
22222af9 141=head2 create
142
143Create a new relationship with the given options and return it.
144Warn and return undef if the relationship cannot be created.
145
146=cut
147
148sub create {
149 my( $self, $options ) = @_;
150 # Check to see if a relationship exists between the two given readings
151 my $source = delete $options->{'orig_a'};
152 my $target = delete $options->{'orig_b'};
3ae5e2ad 153 my $rel = $self->get_relationship( $source, $target );
154 if( $rel ) {
a7037072 155 if( $rel->type ne $options->{'type'} ) {
63778331 156 throw( "Another relationship of type " . $rel->type
157 . " already exists between $source and $target" );
22222af9 158 } else {
159 return $rel;
160 }
161 }
162
163 # Check to see if a nonlocal relationship is defined for the two readings
164 $rel = $self->scoped_relationship( $options->{'reading_a'},
165 $options->{'reading_b'} );
166 if( $rel && $rel->type eq $options->{'type'} ) {
167 return $rel;
168 } elsif( $rel ) {
63778331 169 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 170 } else {
171 $rel = Text::Tradition::Collation::Relationship->new( $options );
172 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
173 return $rel;
174 }
175}
176
177=head2 add_scoped_relationship( $rel )
178
179Keep track of relationships defined between specific readings that are scoped
180non-locally. Key on whichever reading occurs first alphabetically.
181
182=cut
183
184sub add_scoped_relationship {
185 my( $self, $rel ) = @_;
186 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
187 if( $r ) {
188 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
189 $r->type, $rel->reading_a, $rel->reading_b );
190 return;
191 }
192 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
193}
194
195=head2 scoped_relationship( $reading_a, $reading_b )
196
197Returns the general (document-level or global) relationship that has been defined
198between the two reading strings. Returns undef if there is no general relationship.
199
200=cut
201
202sub scoped_relationship {
203 my( $self, $rdga, $rdgb ) = @_;
204 my( $first, $second ) = sort( $rdga, $rdgb );
205 if( exists $self->scopedrels->{$first}->{$second} ) {
206 return $self->scopedrels->{$first}->{$second};
207 } else {
208 return undef;
209 }
210}
211
212=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
213
214Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
215for the possible options) between the readings given in $source and $target. Sets
216up a scoped relationship between $sourcetext and $targettext if the relationship is
217scoped non-locally.
218
219Returns a status boolean and a list of all reading pairs connected by the call to
220add_relationship.
221
222=cut
223
224sub add_relationship {
225 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
226
227 # Check the options
228 $options->{'scope'} = 'local' unless $options->{'scope'};
229
230 my( $is_valid, $reason ) =
231 $self->relationship_valid( $source, $target, $options->{'type'} );
232 unless( $is_valid ) {
63778331 233 throw( "Invalid relationship: $reason" );
22222af9 234 }
235
236 # Try to create the relationship object.
237 $options->{'reading_a'} = $source_rdg->text;
238 $options->{'reading_b'} = $target_rdg->text;
239 $options->{'orig_a'} = $source;
240 $options->{'orig_b'} = $target;
63778331 241 my $relationship = $self->create( $options ); # Will throw on error
22222af9 242
243 # Find all the pairs for which we need to set the relationship.
244 my @vectors = ( [ $source, $target ] );
245 if( $relationship->colocated && $relationship->nonlocal ) {
246 my $c = $self->collation;
247 # Set the same relationship everywhere we can, throughout the graph.
248 my @identical_readings = grep { $_->text eq $relationship->reading_a }
249 $c->readings;
250 foreach my $ir ( @identical_readings ) {
cf6c01be 251 next if $ir->id eq $source;
22222af9 252 # Check to see if there is a target reading with the same text at
253 # the same rank.
254 my @itarget = grep
255 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
256 $c->readings;
257 if( @itarget ) {
258 # We found a hit.
259 warn "More than one reading with text " . $target_rdg->text
260 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 261 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 262 }
263 }
264 }
265
266 # Now set the relationship(s).
267 my @pairs_set;
268 foreach my $v ( @vectors ) {
3ae5e2ad 269 my $rel = $self->get_relationship( @$v );
270 if( $rel ) {
63778331 271 if( $rel->nonlocal ) {
272 throw( "Found conflicting relationship at @$v" );
273 } else {
274 warn "Not overriding local relationship set at @$v";
275 }
3ae5e2ad 276 next;
22222af9 277 }
3ae5e2ad 278 $self->_set_relationship( $relationship, @$v );
22222af9 279 push( @pairs_set, $v );
280 }
281
63778331 282 return @pairs_set;
22222af9 283}
284
ee801e17 285=head2 del_relationship( $source, $target )
286
287Removes the relationship between the given readings. If the relationship is
288non-local, removes the relationship everywhere in the graph.
289
290=cut
291
292sub del_relationship {
293 my( $self, $source, $target ) = @_;
294 my $rel = $self->get_relationship( $source, $target );
295 throw( "No relationship defined between $source and $target" ) unless $rel;
296 my @vectors = ( [ $source, $target ] );
297 $self->_remove_relationship( $source, $target );
298 if( $rel->nonlocal ) {
299 # Remove the relationship wherever it occurs.
300 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
301 $self->relationships;
302 foreach my $re ( @rel_edges ) {
303 $self->_remove_relationship( @$re );
304 push( @vectors, $re );
305 }
306 }
307 return @vectors;
308}
309
22222af9 310=head2 relationship_valid( $source, $target, $type )
311
312Checks whether a relationship of type $type may exist between the readings given
313in $source and $target. Returns a tuple of ( status, message ) where status is
314a yes/no boolean and, if the answer is no, message gives the reason why.
315
316=cut
317
318sub relationship_valid {
319 my( $self, $source, $target, $rel ) = @_;
320 my $c = $self->collation;
321 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
322 # Check that the two readings do (for a repetition) or do not (for
323 # a transposition) appear in the same witness.
32e95735 324 # TODO this might be called before witness paths are set...
22222af9 325 my %seen_wits;
326 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
327 foreach my $w ( $c->reading_witnesses( $target ) ) {
328 if( $seen_wits{$w} ) {
329 return ( 0, "Readings both occur in witness $w" )
330 if $rel eq 'transposition';
331 return ( 1, "ok" ) if $rel eq 'repetition';
332 }
333 return $rel eq 'transposition' ? ( 1, "ok" )
334 : ( 0, "Readings occur only in distinct witnesses" );
335 }
336 } else {
337 # Check that linking the source and target in a relationship won't lead
a1615ee4 338 # to a path loop for any witness. If they have the same rank then fine.
339 return( 1, "ok" )
84d4ca78 340 if $c->reading( $source )->has_rank
341 && $c->reading( $target )->has_rank
342 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 343
344 # Otherwise, first make a lookup table of all the
22222af9 345 # readings related to either the source or the target.
346 my @proposed_related = ( $source, $target );
347 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
348 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
349 my %pr_ids;
350 map { $pr_ids{ $_ } = 1 } @proposed_related;
351
a1615ee4 352 # The cumulative predecessors and successors of the proposed-related readings
353 # should not overlap.
354 my %all_pred;
355 my %all_succ;
22222af9 356 foreach my $pr ( keys %pr_ids ) {
a1615ee4 357 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
358 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
359 }
360 foreach my $k ( keys %all_pred ) {
361 return( 0, "Relationship would create witness loop" )
362 if exists $all_succ{$k};
363 }
364 foreach my $k ( keys %pr_ids ) {
365 return( 0, "Relationship would create witness loop" )
366 if exists $all_pred{$k} || exists $all_succ{$k};
367 }
22222af9 368 return ( 1, "ok" );
369 }
370}
371
7f52eac8 372=head2 related_readings( $reading, $filter )
22222af9 373
374Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 375If $filter is set to a subroutine ref, returns only those related readings where
376$filter( $relationship ) returns a true value.
22222af9 377
378=cut
379
380sub related_readings {
7f52eac8 381 my( $self, $reading, $filter ) = @_;
22222af9 382 my $return_object;
383 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
384 $reading = $reading->id;
385 $return_object = 1;
386 }
c84275ff 387 my @answer;
7f52eac8 388 if( $filter ) {
389 # Backwards compat
390 if( $filter eq 'colocated' ) {
391 $filter = sub { $_[0]->colocated };
392 }
c84275ff 393 my %found = ( $reading => 1 );
394 my $check = [ $reading ];
395 my $iter = 0;
396 while( @$check ) {
c84275ff 397 my $more = [];
398 foreach my $r ( @$check ) {
399 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 400 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 401 push( @$more, $nr ) unless exists $found{$nr};
402 $found{$nr} = 1;
403 }
404 }
405 }
406 $check = $more;
22222af9 407 }
7f52eac8 408 delete $found{$reading};
c84275ff 409 @answer = keys %found;
410 } else {
411 @answer = $self->graph->all_reachable( $reading );
22222af9 412 }
413 if( $return_object ) {
414 my $c = $self->collation;
c84275ff 415 return map { $c->reading( $_ ) } @answer;
22222af9 416 } else {
c84275ff 417 return @answer;
22222af9 418 }
419}
420
421=head2 merge_readings( $kept, $deleted );
422
423Makes a best-effort merge of the relationship links between the given readings, and
424stops tracking the to-be-deleted reading.
425
426=cut
427
428sub merge_readings {
429 my( $self, $kept, $deleted, $combined ) = @_;
430 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
431 # Get the pair of kept / rel
432 my @vector = ( $kept );
433 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
434 next if $vector[0] eq $vector[1]; # Don't add a self loop
435
436 # If kept changes its text, drop the relationship.
437 next if $combined;
438
439 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 440 my $rel = $self->get_relationship( @vector );
441 if( $rel ) {
22222af9 442 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
443 next;
444 }
445
446 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 447 $rel = $self->get_relationship( @$edge );
448 $self->_set_relationship( $rel, @vector );
22222af9 449 }
450 $self->delete_reading( $deleted );
451}
452
027d819c 453sub _as_graphml {
2626f709 454 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 455
456 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
457 $rgraph->setAttribute( 'edgedefault', 'directed' );
458 $rgraph->setAttribute( 'id', 'relationships', );
459 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
460 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
461 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
462 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
463 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
464
465 # Add the vertices according to their XML IDs
2626f709 466 my %rdg_lookup = ( reverse %$node_hash );
467 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 468 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
469 $n_el->setAttribute( 'id', $n );
2626f709 470 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 471 }
472
473 # Add the relationship edges, with their object information
474 my $edge_ctr = 0;
475 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
476 # Add an edge and fill in its relationship info.
477 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
478 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
479 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
480 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
481
3ae5e2ad 482 my $rel_obj = $self->get_relationship( @$e );
c84275ff 483 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
484 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
485 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
486 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
487 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
488 $rel_obj->non_independent ) if $rel_obj->nonind_set;
489 }
490}
491
492sub _by_xmlid {
2626f709 493 my $tmp_a = $a;
494 my $tmp_b = $b;
495 $tmp_a =~ s/\D//g;
496 $tmp_b =~ s/\D//g;
497 return $tmp_a <=> $tmp_b;
c84275ff 498}
499
500sub _add_graphml_data {
501 my( $el, $key, $value ) = @_;
502 return unless defined $value;
503 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
504 $data_el->setAttribute( 'key', $key );
505 $data_el->appendText( $value );
83d5ac3a 506}
507
63778331 508sub throw {
509 Text::Tradition::Error->throw(
510 'ident' => 'Relationship error',
511 'message' => $_[0],
512 );
513}
514
22222af9 515no Moose;
516__PACKAGE__->meta->make_immutable;
517
5181;