add -from and -to options; TODO rationalize these across outputs
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
3ae5e2ad 88=head2 get_relationship
89
90Return the relationship object, if any, that exists between two readings.
91
92=cut
93
94sub get_relationship {
4633f9e4 95 my $self = shift;
96 my @vector;
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
99 my $edge = shift;
100 @vector = @$edge;
101 } else {
102 @vector = @_;
103 }
3ae5e2ad 104 my $relationship;
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 107 }
3ae5e2ad 108 return $relationship;
109}
110
111sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115}
a1615ee4 116
22222af9 117=head2 create
118
119Create a new relationship with the given options and return it.
120Warn and return undef if the relationship cannot be created.
121
122=cut
123
124sub create {
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
3ae5e2ad 129 my $rel = $self->get_relationship( $source, $target );
130 if( $rel ) {
3d14b48e 131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
133 # one, if asked.
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
63778331 136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
22222af9 138 } else {
139 return $rel;
140 }
141 }
142
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
147 return $rel;
148 } elsif( $rel ) {
63778331 149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 150 } else {
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153 return $rel;
154 }
155}
156
157=head2 add_scoped_relationship( $rel )
158
159Keep track of relationships defined between specific readings that are scoped
160non-locally. Key on whichever reading occurs first alphabetically.
161
162=cut
163
164sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
f222800e 166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 169 if( $r ) {
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 171 $r->type, $rdga, $rdgb );
22222af9 172 return;
173 }
f222800e 174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 176}
177
178=head2 scoped_relationship( $reading_a, $reading_b )
179
180Returns the general (document-level or global) relationship that has been defined
181between the two reading strings. Returns undef if there is no general relationship.
182
183=cut
184
185sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
190 } else {
191 return undef;
192 }
193}
194
195=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198for the possible options) between the readings given in $source and $target. Sets
199up a scoped relationship between $sourcetext and $targettext if the relationship is
200scoped non-locally.
201
202Returns a status boolean and a list of all reading pairs connected by the call to
203add_relationship.
204
205=cut
206
207sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
ca6e6095 210 my $relationship;
211 my $thispaironly;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
215 } else {
216 # Check the options
217 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
8d5c8893 219 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
ca6e6095 220
221 my( $is_valid, $reason ) =
222 $self->relationship_valid( $source, $target, $options->{'type'} );
223 unless( $is_valid ) {
224 throw( "Invalid relationship: $reason" );
225 }
226
227 # Try to create the relationship object.
228 $options->{'reading_a'} = $source_rdg->text;
229 $options->{'reading_b'} = $target_rdg->text;
230 $options->{'orig_a'} = $source;
231 $options->{'orig_b'} = $target;
0ac5e750 232 if( $options->{'scope'} ne 'local' ) {
233 # Is there a relationship with this a & b already?
f222800e 234 # Case-insensitive for non-orthographics.
235 my $rdga = $options->{'type'} eq 'orthographic'
236 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
237 my $rdgb = $options->{'type'} eq 'orthographic'
238 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
239 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 240 if( $otherrel && $otherrel->type eq $options->{type}
241 && $otherrel->scope eq $options->{scope} ) {
242 warn "Applying existing scoped relationship";
243 $relationship = $otherrel;
244 }
245 }
246 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 247 }
ca6e6095 248
22222af9 249
250 # Find all the pairs for which we need to set the relationship.
bf6e338d 251 my @vectors = [ $source, $target ];
ca6e6095 252 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 253 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 254 }
bf6e338d 255
22222af9 256 # Now set the relationship(s).
257 my @pairs_set;
258 foreach my $v ( @vectors ) {
3ae5e2ad 259 my $rel = $self->get_relationship( @$v );
ca6e6095 260 if( $rel && $rel ne $relationship ) {
63778331 261 if( $rel->nonlocal ) {
262 throw( "Found conflicting relationship at @$v" );
a2ed66b2 263 } elsif( $rel->type ne 'collated' ) {
264 # Replace a collation relationship; leave any other sort in place.
13e893dc 265 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
266 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
267 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
268 warn sprintf( "Not overriding local relationship %s with global %s "
269 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
270 @$v, $rel->reading_a, $rel->reading_b );
271 next;
272 }
63778331 273 }
22222af9 274 }
8d5c8893 275 map { $self->_drop_collations( $_ ) } @$v;
3ae5e2ad 276 $self->_set_relationship( $relationship, @$v );
22222af9 277 push( @pairs_set, $v );
278 }
279
63778331 280 return @pairs_set;
22222af9 281}
282
9d829138 283=head2 del_scoped_relationship( $reading_a, $reading_b )
284
285Returns the general (document-level or global) relationship that has been defined
286between the two reading strings. Returns undef if there is no general relationship.
287
288=cut
289
290sub del_scoped_relationship {
291 my( $self, $rdga, $rdgb ) = @_;
292 my( $first, $second ) = sort( $rdga, $rdgb );
293 return delete $self->scopedrels->{$first}->{$second};
294}
295
bf6e338d 296sub _find_applicable {
297 my( $self, $rel ) = @_;
298 my $c = $self->collation;
299 # TODO Someday we might use a case sensitive language.
300 my $lang = $c->tradition->language;
301 my @vectors;
302 my @identical_readings;
303 if( $rel->type eq 'orthographic' ) {
304 @identical_readings = grep { $_->text eq $rel->reading_a }
305 $c->readings;
306 } else {
307 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
308 $c->readings;
309 }
310 foreach my $ir ( @identical_readings ) {
311 my @itarget;
312 if( $rel->type eq 'orthographic' ) {
313 @itarget = grep { $_->rank == $ir->rank
314 && $_->text eq $rel->reading_b } $c->readings;
315 } else {
316 @itarget = grep { $_->rank == $ir->rank
317 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
318 }
319 if( @itarget ) {
320 # Warn if there is more than one hit with no orth link between them.
321 my $itmain = shift @itarget;
322 if( @itarget ) {
323 my %all_targets;
324 map { $all_targets{$_} = 1 } @itarget;
325 map { delete $all_targets{$_} }
326 $self->related_readings( $itmain,
327 sub { $_[0]->type eq 'orthographic' } );
328 warn "More than one unrelated reading with text " . $itmain->text
329 . " at rank " . $ir->rank . "!" if keys %all_targets;
330 }
331 push( @vectors, [ $ir->id, $itmain->id ] );
332 }
333 }
334 return @vectors;
335}
336
ee801e17 337=head2 del_relationship( $source, $target )
338
339Removes the relationship between the given readings. If the relationship is
340non-local, removes the relationship everywhere in the graph.
341
342=cut
343
344sub del_relationship {
345 my( $self, $source, $target ) = @_;
346 my $rel = $self->get_relationship( $source, $target );
681893aa 347 return () unless $rel; # Nothing to delete; return an empty set.
ee801e17 348 my @vectors = ( [ $source, $target ] );
349 $self->_remove_relationship( $source, $target );
350 if( $rel->nonlocal ) {
351 # Remove the relationship wherever it occurs.
9d829138 352 # Remove the relationship wherever it occurs.
ee801e17 353 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
354 $self->relationships;
355 foreach my $re ( @rel_edges ) {
356 $self->_remove_relationship( @$re );
357 push( @vectors, $re );
358 }
9d829138 359 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 360 }
361 return @vectors;
362}
363
ca6e6095 364sub _remove_relationship {
365 my( $self, @vector ) = @_;
366 $self->graph->delete_edge( @vector );
367}
368
22222af9 369=head2 relationship_valid( $source, $target, $type )
370
371Checks whether a relationship of type $type may exist between the readings given
372in $source and $target. Returns a tuple of ( status, message ) where status is
373a yes/no boolean and, if the answer is no, message gives the reason why.
374
375=cut
376
377sub relationship_valid {
378 my( $self, $source, $target, $rel ) = @_;
379 my $c = $self->collation;
380 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
381 # Check that the two readings do (for a repetition) or do not (for
382 # a transposition) appear in the same witness.
2f39215b 383 # TODO this might be called before witness paths are set...
22222af9 384 my %seen_wits;
385 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
386 foreach my $w ( $c->reading_witnesses( $target ) ) {
387 if( $seen_wits{$w} ) {
388 return ( 0, "Readings both occur in witness $w" )
389 if $rel eq 'transposition';
390 return ( 1, "ok" ) if $rel eq 'repetition';
d6936dea 391 }
22222af9 392 }
393 return $rel eq 'transposition' ? ( 1, "ok" )
394 : ( 0, "Readings occur only in distinct witnesses" );
2f39215b 395 } else {
22222af9 396 # Check that linking the source and target in a relationship won't lead
2f39215b 397 # to a path loop for any witness. If they have the same rank then fine.
398 return( 1, "ok" )
399 if $c->reading( $source )->has_rank
400 && $c->reading( $target )->has_rank
401 && $c->reading( $source )->rank == $c->reading( $target )->rank;
a1615ee4 402
403 # Otherwise, first make a lookup table of all the
22222af9 404 # readings related to either the source or the target.
405 my @proposed_related = ( $source, $target );
778251a6 406 # Drop the collation links of source and target, unless we want to
407 # add a collation relationship.
408 foreach my $r ( ( $source, $target ) ) {
2f39215b 409 $self->_drop_collations( $r ) unless $rel eq 'collated';
778251a6 410 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
411 }
22222af9 412 my %pr_ids;
413 map { $pr_ids{ $_ } = 1 } @proposed_related;
414
a1615ee4 415 # The cumulative predecessors and successors of the proposed-related readings
416 # should not overlap.
417 my %all_pred;
418 my %all_succ;
22222af9 419 foreach my $pr ( keys %pr_ids ) {
a1615ee4 420 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
421 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
422 }
423 foreach my $k ( keys %all_pred ) {
2f39215b 424 return( 0, "Relationship would create witness loop" )
425 if exists $all_succ{$k};
a1615ee4 426 }
427 foreach my $k ( keys %pr_ids ) {
2f39215b 428 return( 0, "Relationship would create witness loop" )
429 if exists $all_pred{$k} || exists $all_succ{$k};
a1615ee4 430 }
22222af9 431 return ( 1, "ok" );
432 }
433}
434
778251a6 435sub _drop_collations {
436 my( $self, $reading ) = @_;
437 foreach my $n ( $self->graph->neighbors( $reading ) ) {
438 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
439 $self->del_relationship( $reading, $n );
440 }
441 }
442}
443
7f52eac8 444=head2 related_readings( $reading, $filter )
22222af9 445
446Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 447If $filter is set to a subroutine ref, returns only those related readings where
448$filter( $relationship ) returns a true value.
22222af9 449
450=cut
451
452sub related_readings {
7f52eac8 453 my( $self, $reading, $filter ) = @_;
22222af9 454 my $return_object;
455 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
456 $reading = $reading->id;
457 $return_object = 1;
458 }
c84275ff 459 my @answer;
7f52eac8 460 if( $filter ) {
461 # Backwards compat
462 if( $filter eq 'colocated' ) {
463 $filter = sub { $_[0]->colocated };
464 }
c84275ff 465 my %found = ( $reading => 1 );
466 my $check = [ $reading ];
467 my $iter = 0;
468 while( @$check ) {
c84275ff 469 my $more = [];
470 foreach my $r ( @$check ) {
471 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 472 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 473 push( @$more, $nr ) unless exists $found{$nr};
474 $found{$nr} = 1;
475 }
476 }
477 }
478 $check = $more;
22222af9 479 }
7f52eac8 480 delete $found{$reading};
c84275ff 481 @answer = keys %found;
482 } else {
483 @answer = $self->graph->all_reachable( $reading );
22222af9 484 }
485 if( $return_object ) {
486 my $c = $self->collation;
c84275ff 487 return map { $c->reading( $_ ) } @answer;
22222af9 488 } else {
c84275ff 489 return @answer;
22222af9 490 }
491}
492
493=head2 merge_readings( $kept, $deleted );
494
495Makes a best-effort merge of the relationship links between the given readings, and
496stops tracking the to-be-deleted reading.
497
498=cut
499
500sub merge_readings {
501 my( $self, $kept, $deleted, $combined ) = @_;
502 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
503 # Get the pair of kept / rel
504 my @vector = ( $kept );
505 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
506 next if $vector[0] eq $vector[1]; # Don't add a self loop
507
508 # If kept changes its text, drop the relationship.
509 next if $combined;
510
f222800e 511 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 512 my $rel = $self->get_relationship( @vector );
f222800e 513 next if $rel;
22222af9 514
515 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 516 $rel = $self->get_relationship( @$edge );
517 $self->_set_relationship( $rel, @vector );
22222af9 518 }
519 $self->delete_reading( $deleted );
520}
521
027d819c 522sub _as_graphml {
2626f709 523 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 524
525 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
526 $rgraph->setAttribute( 'edgedefault', 'directed' );
527 $rgraph->setAttribute( 'id', 'relationships', );
528 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
529 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
530 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
531 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
532 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
533
534 # Add the vertices according to their XML IDs
2626f709 535 my %rdg_lookup = ( reverse %$node_hash );
536 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 537 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
538 $n_el->setAttribute( 'id', $n );
2626f709 539 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 540 }
541
542 # Add the relationship edges, with their object information
543 my $edge_ctr = 0;
544 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
545 # Add an edge and fill in its relationship info.
a30ca502 546 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
c84275ff 547 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
548 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
549 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
550 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
551
3ae5e2ad 552 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 553 foreach my $key ( keys %$edge_keys ) {
554 my $value = $rel_obj->$key;
555 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
556 if defined $value;
557 }
c84275ff 558 }
559}
560
561sub _by_xmlid {
2626f709 562 my $tmp_a = $a;
563 my $tmp_b = $b;
564 $tmp_a =~ s/\D//g;
565 $tmp_b =~ s/\D//g;
566 return $tmp_a <=> $tmp_b;
c84275ff 567}
568
569sub _add_graphml_data {
570 my( $el, $key, $value ) = @_;
571 return unless defined $value;
572 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
573 $data_el->setAttribute( 'key', $key );
574 $data_el->appendText( $value );
83d5ac3a 575}
576
63778331 577sub throw {
578 Text::Tradition::Error->throw(
579 'ident' => 'Relationship error',
580 'message' => $_[0],
581 );
582}
583
22222af9 584no Moose;
585__PACKAGE__->meta->make_immutable;
586
5871;