replace collation relationships whenever we can
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
44my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
49@v2 = $c->del_relationship( 'n8', 'n13' );
50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
3ae5e2ad 88=head2 get_relationship
89
90Return the relationship object, if any, that exists between two readings.
91
92=cut
93
94sub get_relationship {
4633f9e4 95 my $self = shift;
96 my @vector;
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
99 my $edge = shift;
100 @vector = @$edge;
101 } else {
102 @vector = @_;
103 }
3ae5e2ad 104 my $relationship;
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 107 }
3ae5e2ad 108 return $relationship;
109}
110
111sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115}
a1615ee4 116
22222af9 117=head2 create
118
119Create a new relationship with the given options and return it.
120Warn and return undef if the relationship cannot be created.
121
122=cut
123
124sub create {
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
3ae5e2ad 129 my $rel = $self->get_relationship( $source, $target );
130 if( $rel ) {
3d14b48e 131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
133 # one, if asked.
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
63778331 136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
22222af9 138 } else {
139 return $rel;
140 }
141 }
142
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
147 return $rel;
148 } elsif( $rel ) {
63778331 149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 150 } else {
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153 return $rel;
154 }
155}
156
157=head2 add_scoped_relationship( $rel )
158
159Keep track of relationships defined between specific readings that are scoped
160non-locally. Key on whichever reading occurs first alphabetically.
161
162=cut
163
164sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
f222800e 166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 169 if( $r ) {
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 171 $r->type, $rdga, $rdgb );
22222af9 172 return;
173 }
f222800e 174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 176}
177
178=head2 scoped_relationship( $reading_a, $reading_b )
179
180Returns the general (document-level or global) relationship that has been defined
181between the two reading strings. Returns undef if there is no general relationship.
182
183=cut
184
185sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
190 } else {
191 return undef;
192 }
193}
194
195=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198for the possible options) between the readings given in $source and $target. Sets
199up a scoped relationship between $sourcetext and $targettext if the relationship is
200scoped non-locally.
201
202Returns a status boolean and a list of all reading pairs connected by the call to
203add_relationship.
204
205=cut
206
207sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
ca6e6095 210 my $relationship;
211 my $thispaironly;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
215 } else {
216 # Check the options
217 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
ca6e6095 219
220 my( $is_valid, $reason ) =
221 $self->relationship_valid( $source, $target, $options->{'type'} );
222 unless( $is_valid ) {
223 throw( "Invalid relationship: $reason" );
224 }
225
226 # Try to create the relationship object.
227 $options->{'reading_a'} = $source_rdg->text;
228 $options->{'reading_b'} = $target_rdg->text;
229 $options->{'orig_a'} = $source;
230 $options->{'orig_b'} = $target;
0ac5e750 231 if( $options->{'scope'} ne 'local' ) {
232 # Is there a relationship with this a & b already?
f222800e 233 # Case-insensitive for non-orthographics.
234 my $rdga = $options->{'type'} eq 'orthographic'
235 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
236 my $rdgb = $options->{'type'} eq 'orthographic'
237 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
238 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 239 if( $otherrel && $otherrel->type eq $options->{type}
240 && $otherrel->scope eq $options->{scope} ) {
241 warn "Applying existing scoped relationship";
242 $relationship = $otherrel;
243 }
244 }
245 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 246 }
ca6e6095 247
22222af9 248
249 # Find all the pairs for which we need to set the relationship.
bf6e338d 250 my @vectors = [ $source, $target ];
ca6e6095 251 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 252 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 253 }
bf6e338d 254
22222af9 255 # Now set the relationship(s).
256 my @pairs_set;
257 foreach my $v ( @vectors ) {
3ae5e2ad 258 my $rel = $self->get_relationship( @$v );
ca6e6095 259 if( $rel && $rel ne $relationship ) {
63778331 260 if( $rel->nonlocal ) {
261 throw( "Found conflicting relationship at @$v" );
a2ed66b2 262 } elsif( $rel->type ne 'collated' ) {
263 # Replace a collation relationship; leave any other sort in place.
63778331 264 warn "Not overriding local relationship set at @$v";
a2ed66b2 265 next;
63778331 266 }
22222af9 267 }
3ae5e2ad 268 $self->_set_relationship( $relationship, @$v );
22222af9 269 push( @pairs_set, $v );
270 }
271
63778331 272 return @pairs_set;
22222af9 273}
274
bf6e338d 275sub _find_applicable {
276 my( $self, $rel ) = @_;
277 my $c = $self->collation;
278 # TODO Someday we might use a case sensitive language.
279 my $lang = $c->tradition->language;
280 my @vectors;
281 my @identical_readings;
282 if( $rel->type eq 'orthographic' ) {
283 @identical_readings = grep { $_->text eq $rel->reading_a }
284 $c->readings;
285 } else {
286 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
287 $c->readings;
288 }
289 foreach my $ir ( @identical_readings ) {
290 my @itarget;
291 if( $rel->type eq 'orthographic' ) {
292 @itarget = grep { $_->rank == $ir->rank
293 && $_->text eq $rel->reading_b } $c->readings;
294 } else {
295 @itarget = grep { $_->rank == $ir->rank
296 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
297 }
298 if( @itarget ) {
299 # Warn if there is more than one hit with no orth link between them.
300 my $itmain = shift @itarget;
301 if( @itarget ) {
302 my %all_targets;
303 map { $all_targets{$_} = 1 } @itarget;
304 map { delete $all_targets{$_} }
305 $self->related_readings( $itmain,
306 sub { $_[0]->type eq 'orthographic' } );
307 warn "More than one unrelated reading with text " . $itmain->text
308 . " at rank " . $ir->rank . "!" if keys %all_targets;
309 }
310 push( @vectors, [ $ir->id, $itmain->id ] );
311 }
312 }
313 return @vectors;
314}
315
ee801e17 316=head2 del_relationship( $source, $target )
317
318Removes the relationship between the given readings. If the relationship is
319non-local, removes the relationship everywhere in the graph.
320
321=cut
322
323sub del_relationship {
324 my( $self, $source, $target ) = @_;
325 my $rel = $self->get_relationship( $source, $target );
681893aa 326 return () unless $rel; # Nothing to delete; return an empty set.
ee801e17 327 my @vectors = ( [ $source, $target ] );
328 $self->_remove_relationship( $source, $target );
329 if( $rel->nonlocal ) {
330 # Remove the relationship wherever it occurs.
331 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
332 $self->relationships;
333 foreach my $re ( @rel_edges ) {
334 $self->_remove_relationship( @$re );
335 push( @vectors, $re );
336 }
337 }
338 return @vectors;
339}
340
ca6e6095 341sub _remove_relationship {
342 my( $self, @vector ) = @_;
343 $self->graph->delete_edge( @vector );
344}
345
22222af9 346=head2 relationship_valid( $source, $target, $type )
347
348Checks whether a relationship of type $type may exist between the readings given
349in $source and $target. Returns a tuple of ( status, message ) where status is
350a yes/no boolean and, if the answer is no, message gives the reason why.
351
352=cut
353
354sub relationship_valid {
355 my( $self, $source, $target, $rel ) = @_;
356 my $c = $self->collation;
357 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
358 # Check that the two readings do (for a repetition) or do not (for
359 # a transposition) appear in the same witness.
32e95735 360 # TODO this might be called before witness paths are set...
22222af9 361 my %seen_wits;
362 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
363 foreach my $w ( $c->reading_witnesses( $target ) ) {
364 if( $seen_wits{$w} ) {
365 return ( 0, "Readings both occur in witness $w" )
366 if $rel eq 'transposition';
367 return ( 1, "ok" ) if $rel eq 'repetition';
368 }
369 return $rel eq 'transposition' ? ( 1, "ok" )
370 : ( 0, "Readings occur only in distinct witnesses" );
371 }
372 } else {
373 # Check that linking the source and target in a relationship won't lead
a1615ee4 374 # to a path loop for any witness. If they have the same rank then fine.
375 return( 1, "ok" )
84d4ca78 376 if $c->reading( $source )->has_rank
377 && $c->reading( $target )->has_rank
378 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 379
380 # Otherwise, first make a lookup table of all the
22222af9 381 # readings related to either the source or the target.
382 my @proposed_related = ( $source, $target );
778251a6 383 # Drop the collation links of source and target, unless we want to
384 # add a collation relationship.
385 foreach my $r ( ( $source, $target ) ) {
386 $self->_drop_collations( $r ) unless $rel eq 'collated';
387 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
388 }
22222af9 389 my %pr_ids;
390 map { $pr_ids{ $_ } = 1 } @proposed_related;
391
a1615ee4 392 # The cumulative predecessors and successors of the proposed-related readings
393 # should not overlap.
394 my %all_pred;
395 my %all_succ;
22222af9 396 foreach my $pr ( keys %pr_ids ) {
a1615ee4 397 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
398 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
399 }
400 foreach my $k ( keys %all_pred ) {
401 return( 0, "Relationship would create witness loop" )
402 if exists $all_succ{$k};
403 }
404 foreach my $k ( keys %pr_ids ) {
405 return( 0, "Relationship would create witness loop" )
406 if exists $all_pred{$k} || exists $all_succ{$k};
407 }
22222af9 408 return ( 1, "ok" );
409 }
410}
411
778251a6 412sub _drop_collations {
413 my( $self, $reading ) = @_;
414 foreach my $n ( $self->graph->neighbors( $reading ) ) {
415 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
416 $self->del_relationship( $reading, $n );
417 }
418 }
419}
420
7f52eac8 421=head2 related_readings( $reading, $filter )
22222af9 422
423Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 424If $filter is set to a subroutine ref, returns only those related readings where
425$filter( $relationship ) returns a true value.
22222af9 426
427=cut
428
429sub related_readings {
7f52eac8 430 my( $self, $reading, $filter ) = @_;
22222af9 431 my $return_object;
432 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
433 $reading = $reading->id;
434 $return_object = 1;
435 }
c84275ff 436 my @answer;
7f52eac8 437 if( $filter ) {
438 # Backwards compat
439 if( $filter eq 'colocated' ) {
440 $filter = sub { $_[0]->colocated };
441 }
c84275ff 442 my %found = ( $reading => 1 );
443 my $check = [ $reading ];
444 my $iter = 0;
445 while( @$check ) {
c84275ff 446 my $more = [];
447 foreach my $r ( @$check ) {
448 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 449 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 450 push( @$more, $nr ) unless exists $found{$nr};
451 $found{$nr} = 1;
452 }
453 }
454 }
455 $check = $more;
22222af9 456 }
7f52eac8 457 delete $found{$reading};
c84275ff 458 @answer = keys %found;
459 } else {
460 @answer = $self->graph->all_reachable( $reading );
22222af9 461 }
462 if( $return_object ) {
463 my $c = $self->collation;
c84275ff 464 return map { $c->reading( $_ ) } @answer;
22222af9 465 } else {
c84275ff 466 return @answer;
22222af9 467 }
468}
469
470=head2 merge_readings( $kept, $deleted );
471
472Makes a best-effort merge of the relationship links between the given readings, and
473stops tracking the to-be-deleted reading.
474
475=cut
476
477sub merge_readings {
478 my( $self, $kept, $deleted, $combined ) = @_;
479 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
480 # Get the pair of kept / rel
481 my @vector = ( $kept );
482 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
483 next if $vector[0] eq $vector[1]; # Don't add a self loop
484
485 # If kept changes its text, drop the relationship.
486 next if $combined;
487
f222800e 488 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 489 my $rel = $self->get_relationship( @vector );
f222800e 490 next if $rel;
22222af9 491
492 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 493 $rel = $self->get_relationship( @$edge );
494 $self->_set_relationship( $rel, @vector );
22222af9 495 }
496 $self->delete_reading( $deleted );
497}
498
027d819c 499sub _as_graphml {
2626f709 500 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 501
502 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
503 $rgraph->setAttribute( 'edgedefault', 'directed' );
504 $rgraph->setAttribute( 'id', 'relationships', );
505 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
506 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
507 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
508 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
509 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
510
511 # Add the vertices according to their XML IDs
2626f709 512 my %rdg_lookup = ( reverse %$node_hash );
513 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 514 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
515 $n_el->setAttribute( 'id', $n );
2626f709 516 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 517 }
518
519 # Add the relationship edges, with their object information
520 my $edge_ctr = 0;
521 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
522 # Add an edge and fill in its relationship info.
523 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
524 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
525 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
526 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
527
3ae5e2ad 528 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 529 foreach my $key ( keys %$edge_keys ) {
530 my $value = $rel_obj->$key;
531 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
532 if defined $value;
533 }
c84275ff 534 }
535}
536
537sub _by_xmlid {
2626f709 538 my $tmp_a = $a;
539 my $tmp_b = $b;
540 $tmp_a =~ s/\D//g;
541 $tmp_b =~ s/\D//g;
542 return $tmp_a <=> $tmp_b;
c84275ff 543}
544
545sub _add_graphml_data {
546 my( $el, $key, $value ) = @_;
547 return unless defined $value;
548 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
549 $data_el->setAttribute( 'key', $key );
550 $data_el->appendText( $value );
83d5ac3a 551}
552
63778331 553sub throw {
554 Text::Tradition::Error->throw(
555 'ident' => 'Relationship error',
556 'message' => $_[0],
557 );
558}
559
22222af9 560no Moose;
561__PACKAGE__->meta->make_immutable;
562
5631;