Merge branch 'master' of github.com:tla/stemmatology
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
44my @v2 = $c->add_relationship( 'n9', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
49@v2 = $c->del_relationship( 'n8', 'n13' );
50is( scalar @v2, 2, "Deleted second global relationship" );
51try {
52 my @v3 = $c->del_relationship( 'n1', 'n2' );
53 ok( 0, "Should have errored on non-existent relationship" );
54} catch( Text::Tradition::Error $e ) {
55 like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56}
57
3ae5e2ad 58=end testing
59
22222af9 60=head1 METHODS
61
62=head2 new( collation => $collation );
63
64Creates a new relationship store for the given collation.
65
66=cut
67
68has 'collation' => (
69 is => 'ro',
70 isa => 'Text::Tradition::Collation',
71 required => 1,
72 weak_ref => 1,
73 );
74
75has 'scopedrels' => (
76 is => 'ro',
77 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78 default => sub { {} },
79 );
80
81has 'graph' => (
82 is => 'ro',
83 isa => 'Graph',
84 default => sub { Graph->new( undirected => 1 ) },
85 handles => {
86 relationships => 'edges',
87 add_reading => 'add_vertex',
88 delete_reading => 'delete_vertex',
89 },
90 );
91
3ae5e2ad 92=head2 get_relationship
93
94Return the relationship object, if any, that exists between two readings.
95
96=cut
97
98sub get_relationship {
4633f9e4 99 my $self = shift;
100 my @vector;
101 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102 # Dereference the edge arrayref that was passed.
103 my $edge = shift;
104 @vector = @$edge;
105 } else {
106 @vector = @_;
107 }
3ae5e2ad 108 my $relationship;
109 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 111 }
3ae5e2ad 112 return $relationship;
113}
114
115sub _set_relationship {
116 my( $self, $relationship, @vector ) = @_;
117 $self->graph->add_edge( @vector );
118 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119}
a1615ee4 120
22222af9 121=head2 create
122
123Create a new relationship with the given options and return it.
124Warn and return undef if the relationship cannot be created.
125
126=cut
127
128sub create {
129 my( $self, $options ) = @_;
130 # Check to see if a relationship exists between the two given readings
131 my $source = delete $options->{'orig_a'};
132 my $target = delete $options->{'orig_b'};
3ae5e2ad 133 my $rel = $self->get_relationship( $source, $target );
134 if( $rel ) {
3d14b48e 135 if( $rel->type eq 'collated' ) {
136 # Always replace a 'collated' relationship with a more descriptive
137 # one, if asked.
138 $self->del_relationship( $source, $target );
139 } elsif( $rel->type ne $options->{'type'} ) {
63778331 140 throw( "Another relationship of type " . $rel->type
141 . " already exists between $source and $target" );
22222af9 142 } else {
143 return $rel;
144 }
145 }
146
147 # Check to see if a nonlocal relationship is defined for the two readings
148 $rel = $self->scoped_relationship( $options->{'reading_a'},
149 $options->{'reading_b'} );
150 if( $rel && $rel->type eq $options->{'type'} ) {
151 return $rel;
152 } elsif( $rel ) {
63778331 153 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 154 } else {
155 $rel = Text::Tradition::Collation::Relationship->new( $options );
156 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 return $rel;
158 }
159}
160
161=head2 add_scoped_relationship( $rel )
162
163Keep track of relationships defined between specific readings that are scoped
164non-locally. Key on whichever reading occurs first alphabetically.
165
166=cut
167
168sub add_scoped_relationship {
169 my( $self, $rel ) = @_;
f222800e 170 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
171 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
172 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 173 if( $r ) {
174 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 175 $r->type, $rdga, $rdgb );
22222af9 176 return;
177 }
f222800e 178 my( $first, $second ) = sort ( $rdga, $rdgb );
179 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 180}
181
182=head2 scoped_relationship( $reading_a, $reading_b )
183
184Returns the general (document-level or global) relationship that has been defined
185between the two reading strings. Returns undef if there is no general relationship.
186
187=cut
188
189sub scoped_relationship {
190 my( $self, $rdga, $rdgb ) = @_;
191 my( $first, $second ) = sort( $rdga, $rdgb );
192 if( exists $self->scopedrels->{$first}->{$second} ) {
193 return $self->scopedrels->{$first}->{$second};
194 } else {
195 return undef;
196 }
197}
198
199=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
200
201Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
202for the possible options) between the readings given in $source and $target. Sets
203up a scoped relationship between $sourcetext and $targettext if the relationship is
204scoped non-locally.
205
206Returns a status boolean and a list of all reading pairs connected by the call to
207add_relationship.
208
209=cut
210
211sub add_relationship {
212 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
213
ca6e6095 214 my $relationship;
215 my $thispaironly;
216 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
217 $relationship = $options;
218 $thispaironly = 1; # If existing rel, set only where asked.
219 } else {
220 # Check the options
221 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 222 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
ca6e6095 223
224 my( $is_valid, $reason ) =
225 $self->relationship_valid( $source, $target, $options->{'type'} );
226 unless( $is_valid ) {
227 throw( "Invalid relationship: $reason" );
228 }
229
230 # Try to create the relationship object.
231 $options->{'reading_a'} = $source_rdg->text;
232 $options->{'reading_b'} = $target_rdg->text;
233 $options->{'orig_a'} = $source;
234 $options->{'orig_b'} = $target;
0ac5e750 235 if( $options->{'scope'} ne 'local' ) {
236 # Is there a relationship with this a & b already?
f222800e 237 # Case-insensitive for non-orthographics.
238 my $rdga = $options->{'type'} eq 'orthographic'
239 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
240 my $rdgb = $options->{'type'} eq 'orthographic'
241 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
242 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 243 if( $otherrel && $otherrel->type eq $options->{type}
244 && $otherrel->scope eq $options->{scope} ) {
245 warn "Applying existing scoped relationship";
246 $relationship = $otherrel;
247 }
248 }
249 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 250 }
ca6e6095 251
22222af9 252
253 # Find all the pairs for which we need to set the relationship.
bf6e338d 254 my @vectors = [ $source, $target ];
ca6e6095 255 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 256 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 257 }
258 $DB::single = 1 if grep { $_->[0] eq 'w494' || $_->[1] eq 'w494' } @vectors;
bf6e338d 259
22222af9 260 # Now set the relationship(s).
261 my @pairs_set;
262 foreach my $v ( @vectors ) {
3ae5e2ad 263 my $rel = $self->get_relationship( @$v );
ca6e6095 264 if( $rel && $rel ne $relationship ) {
63778331 265 if( $rel->nonlocal ) {
266 throw( "Found conflicting relationship at @$v" );
267 } else {
268 warn "Not overriding local relationship set at @$v";
269 }
3ae5e2ad 270 next;
22222af9 271 }
3ae5e2ad 272 $self->_set_relationship( $relationship, @$v );
22222af9 273 push( @pairs_set, $v );
274 }
275
63778331 276 return @pairs_set;
22222af9 277}
278
bf6e338d 279sub _find_applicable {
280 my( $self, $rel ) = @_;
281 my $c = $self->collation;
282 # TODO Someday we might use a case sensitive language.
283 my $lang = $c->tradition->language;
284 my @vectors;
285 my @identical_readings;
286 if( $rel->type eq 'orthographic' ) {
287 @identical_readings = grep { $_->text eq $rel->reading_a }
288 $c->readings;
289 } else {
290 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
291 $c->readings;
292 }
293 foreach my $ir ( @identical_readings ) {
294 my @itarget;
295 if( $rel->type eq 'orthographic' ) {
296 @itarget = grep { $_->rank == $ir->rank
297 && $_->text eq $rel->reading_b } $c->readings;
298 } else {
299 @itarget = grep { $_->rank == $ir->rank
300 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
301 }
302 if( @itarget ) {
303 # Warn if there is more than one hit with no orth link between them.
304 my $itmain = shift @itarget;
305 if( @itarget ) {
306 my %all_targets;
307 map { $all_targets{$_} = 1 } @itarget;
308 map { delete $all_targets{$_} }
309 $self->related_readings( $itmain,
310 sub { $_[0]->type eq 'orthographic' } );
311 warn "More than one unrelated reading with text " . $itmain->text
312 . " at rank " . $ir->rank . "!" if keys %all_targets;
313 }
314 push( @vectors, [ $ir->id, $itmain->id ] );
315 }
316 }
317 return @vectors;
318}
319
ee801e17 320=head2 del_relationship( $source, $target )
321
322Removes the relationship between the given readings. If the relationship is
323non-local, removes the relationship everywhere in the graph.
324
325=cut
326
327sub del_relationship {
328 my( $self, $source, $target ) = @_;
329 my $rel = $self->get_relationship( $source, $target );
330 throw( "No relationship defined between $source and $target" ) unless $rel;
331 my @vectors = ( [ $source, $target ] );
332 $self->_remove_relationship( $source, $target );
333 if( $rel->nonlocal ) {
334 # Remove the relationship wherever it occurs.
335 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
336 $self->relationships;
337 foreach my $re ( @rel_edges ) {
338 $self->_remove_relationship( @$re );
339 push( @vectors, $re );
340 }
341 }
342 return @vectors;
343}
344
ca6e6095 345sub _remove_relationship {
346 my( $self, @vector ) = @_;
347 $self->graph->delete_edge( @vector );
348}
349
22222af9 350=head2 relationship_valid( $source, $target, $type )
351
352Checks whether a relationship of type $type may exist between the readings given
353in $source and $target. Returns a tuple of ( status, message ) where status is
354a yes/no boolean and, if the answer is no, message gives the reason why.
355
356=cut
357
358sub relationship_valid {
359 my( $self, $source, $target, $rel ) = @_;
360 my $c = $self->collation;
361 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
362 # Check that the two readings do (for a repetition) or do not (for
363 # a transposition) appear in the same witness.
32e95735 364 # TODO this might be called before witness paths are set...
22222af9 365 my %seen_wits;
366 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
367 foreach my $w ( $c->reading_witnesses( $target ) ) {
368 if( $seen_wits{$w} ) {
369 return ( 0, "Readings both occur in witness $w" )
370 if $rel eq 'transposition';
371 return ( 1, "ok" ) if $rel eq 'repetition';
372 }
373 return $rel eq 'transposition' ? ( 1, "ok" )
374 : ( 0, "Readings occur only in distinct witnesses" );
375 }
376 } else {
377 # Check that linking the source and target in a relationship won't lead
a1615ee4 378 # to a path loop for any witness. If they have the same rank then fine.
379 return( 1, "ok" )
84d4ca78 380 if $c->reading( $source )->has_rank
381 && $c->reading( $target )->has_rank
382 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 383
384 # Otherwise, first make a lookup table of all the
22222af9 385 # readings related to either the source or the target.
386 my @proposed_related = ( $source, $target );
778251a6 387 # Drop the collation links of source and target, unless we want to
388 # add a collation relationship.
389 foreach my $r ( ( $source, $target ) ) {
390 $self->_drop_collations( $r ) unless $rel eq 'collated';
391 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
392 }
22222af9 393 my %pr_ids;
394 map { $pr_ids{ $_ } = 1 } @proposed_related;
395
a1615ee4 396 # The cumulative predecessors and successors of the proposed-related readings
397 # should not overlap.
398 my %all_pred;
399 my %all_succ;
22222af9 400 foreach my $pr ( keys %pr_ids ) {
a1615ee4 401 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
402 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
403 }
404 foreach my $k ( keys %all_pred ) {
405 return( 0, "Relationship would create witness loop" )
406 if exists $all_succ{$k};
407 }
408 foreach my $k ( keys %pr_ids ) {
409 return( 0, "Relationship would create witness loop" )
410 if exists $all_pred{$k} || exists $all_succ{$k};
411 }
22222af9 412 return ( 1, "ok" );
413 }
414}
415
778251a6 416sub _drop_collations {
417 my( $self, $reading ) = @_;
418 foreach my $n ( $self->graph->neighbors( $reading ) ) {
419 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
420 $self->del_relationship( $reading, $n );
421 }
422 }
423}
424
7f52eac8 425=head2 related_readings( $reading, $filter )
22222af9 426
427Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 428If $filter is set to a subroutine ref, returns only those related readings where
429$filter( $relationship ) returns a true value.
22222af9 430
431=cut
432
433sub related_readings {
7f52eac8 434 my( $self, $reading, $filter ) = @_;
22222af9 435 my $return_object;
436 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
437 $reading = $reading->id;
438 $return_object = 1;
439 }
c84275ff 440 my @answer;
7f52eac8 441 if( $filter ) {
442 # Backwards compat
443 if( $filter eq 'colocated' ) {
444 $filter = sub { $_[0]->colocated };
445 }
c84275ff 446 my %found = ( $reading => 1 );
447 my $check = [ $reading ];
448 my $iter = 0;
449 while( @$check ) {
c84275ff 450 my $more = [];
451 foreach my $r ( @$check ) {
452 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 453 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 454 push( @$more, $nr ) unless exists $found{$nr};
455 $found{$nr} = 1;
456 }
457 }
458 }
459 $check = $more;
22222af9 460 }
7f52eac8 461 delete $found{$reading};
c84275ff 462 @answer = keys %found;
463 } else {
464 @answer = $self->graph->all_reachable( $reading );
22222af9 465 }
466 if( $return_object ) {
467 my $c = $self->collation;
c84275ff 468 return map { $c->reading( $_ ) } @answer;
22222af9 469 } else {
c84275ff 470 return @answer;
22222af9 471 }
472}
473
474=head2 merge_readings( $kept, $deleted );
475
476Makes a best-effort merge of the relationship links between the given readings, and
477stops tracking the to-be-deleted reading.
478
479=cut
480
481sub merge_readings {
482 my( $self, $kept, $deleted, $combined ) = @_;
483 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
484 # Get the pair of kept / rel
485 my @vector = ( $kept );
486 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
487 next if $vector[0] eq $vector[1]; # Don't add a self loop
488
489 # If kept changes its text, drop the relationship.
490 next if $combined;
491
f222800e 492 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 493 my $rel = $self->get_relationship( @vector );
f222800e 494 next if $rel;
22222af9 495
496 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 497 $rel = $self->get_relationship( @$edge );
498 $self->_set_relationship( $rel, @vector );
22222af9 499 }
500 $self->delete_reading( $deleted );
501}
502
027d819c 503sub _as_graphml {
2626f709 504 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 505
506 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
507 $rgraph->setAttribute( 'edgedefault', 'directed' );
508 $rgraph->setAttribute( 'id', 'relationships', );
509 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
510 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
511 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
512 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
513 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
514
515 # Add the vertices according to their XML IDs
2626f709 516 my %rdg_lookup = ( reverse %$node_hash );
517 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 518 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
519 $n_el->setAttribute( 'id', $n );
2626f709 520 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 521 }
522
523 # Add the relationship edges, with their object information
524 my $edge_ctr = 0;
525 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
526 # Add an edge and fill in its relationship info.
527 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
528 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
529 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
530 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
531
3ae5e2ad 532 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 533 foreach my $key ( keys %$edge_keys ) {
534 my $value = $rel_obj->$key;
535 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
536 if defined $value;
537 }
c84275ff 538 }
539}
540
541sub _by_xmlid {
2626f709 542 my $tmp_a = $a;
543 my $tmp_b = $b;
544 $tmp_a =~ s/\D//g;
545 $tmp_b =~ s/\D//g;
546 return $tmp_a <=> $tmp_b;
c84275ff 547}
548
549sub _add_graphml_data {
550 my( $el, $key, $value ) = @_;
551 return unless defined $value;
552 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
553 $data_el->setAttribute( 'key', $key );
554 $data_el->appendText( $value );
83d5ac3a 555}
556
63778331 557sub throw {
558 Text::Tradition::Error->throw(
559 'ident' => 'Relationship error',
560 'message' => $_[0],
561 );
562}
563
22222af9 564no Moose;
565__PACKAGE__->meta->make_immutable;
566
5671;