try not to let collations interfere with relationship mapping
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89     },
90         );
91         
92 =head2 get_relationship
93
94 Return the relationship object, if any, that exists between two readings.
95
96 =cut
97
98 sub get_relationship {
99         my $self = shift;
100         my @vector;
101         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102                 # Dereference the edge arrayref that was passed.
103                 my $edge = shift;
104                 @vector = @$edge;
105         } else {
106                 @vector = @_;
107         }
108         my $relationship;
109         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
111         } 
112         return $relationship;
113 }
114
115 sub _set_relationship {
116         my( $self, $relationship, @vector ) = @_;
117         $self->graph->add_edge( @vector );
118         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 }
120
121 =head2 create
122
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
125
126 =cut
127
128 sub create {
129         my( $self, $options ) = @_;
130         # Check to see if a relationship exists between the two given readings
131         my $source = delete $options->{'orig_a'};
132         my $target = delete $options->{'orig_b'};
133         my $rel = $self->get_relationship( $source, $target );
134         if( $rel ) {
135                 if( $rel->type eq 'collated' ) {
136                         # Always replace a 'collated' relationship with a more descriptive
137                         # one, if asked.
138                         $self->del_relationship( $source, $target );
139                 } elsif( $rel->type ne $options->{'type'} ) {
140                         throw( "Another relationship of type " . $rel->type 
141                                 . " already exists between $source and $target" );
142                 } else {
143                         return $rel;
144                 }
145         }
146         
147         # Check to see if a nonlocal relationship is defined for the two readings
148         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
149                 $options->{'reading_b'} );
150         if( $rel && $rel->type eq $options->{'type'} ) {
151                 return $rel;
152         } elsif( $rel ) {
153                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
154         } else {
155                 $rel = Text::Tradition::Collation::Relationship->new( $options );
156                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157                 return $rel;
158         }
159 }
160
161 =head2 add_scoped_relationship( $rel )
162
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally.  Key on whichever reading occurs first alphabetically.
165
166 =cut
167
168 sub add_scoped_relationship {
169         my( $self, $rel ) = @_;
170         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
171         if( $r ) {
172                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173                         $r->type, $rel->reading_a, $rel->reading_b );
174                 return;
175         }
176         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
177 }
178
179 =head2 scoped_relationship( $reading_a, $reading_b )
180
181 Returns the general (document-level or global) relationship that has been defined 
182 between the two reading strings. Returns undef if there is no general relationship.
183
184 =cut
185
186 sub scoped_relationship {
187         my( $self, $rdga, $rdgb ) = @_;
188         my( $first, $second ) = sort( $rdga, $rdgb );
189         if( exists $self->scopedrels->{$first}->{$second} ) {
190                 return $self->scopedrels->{$first}->{$second};
191         } else {
192                 return undef;
193         }
194 }
195
196 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197
198 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
199 for the possible options) between the readings given in $source and $target.  Sets
200 up a scoped relationship between $sourcetext and $targettext if the relationship is
201 scoped non-locally.
202
203 Returns a status boolean and a list of all reading pairs connected by the call to
204 add_relationship.
205
206 =cut
207
208 sub add_relationship {
209         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
210
211         my $relationship;
212         my $thispaironly;
213         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214                 $relationship = $options;
215                 $thispaironly = 1;  # If existing rel, set only where asked.
216         } else {
217                 # Check the options
218                 $options->{'scope'} = 'local' unless $options->{'scope'};
219                 
220                 my( $is_valid, $reason ) = 
221                         $self->relationship_valid( $source, $target, $options->{'type'} );
222                 unless( $is_valid ) {
223                         throw( "Invalid relationship: $reason" );
224                 }
225                 
226                 # Try to create the relationship object.
227                 $options->{'reading_a'} = $source_rdg->text;
228                 $options->{'reading_b'} = $target_rdg->text;
229                 $options->{'orig_a'} = $source;
230                 $options->{'orig_b'} = $target;
231         if( $options->{'scope'} ne 'local' ) {
232                         # Is there a relationship with this a & b already?
233                         my $otherrel = $self->scoped_relationship( $options->{reading_a}, 
234                                 $options->{reading_b} );
235                         if( $otherrel && $otherrel->type eq $options->{type}
236                                 && $otherrel->scope eq $options->{scope} ) {
237                                 warn "Applying existing scoped relationship";
238                                 $relationship = $otherrel;
239                         }
240         }
241                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
242     }
243
244
245         # Find all the pairs for which we need to set the relationship.
246         my @vectors = ( [ $source, $target ] ); 
247     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
248         my $c = $self->collation;
249         # Set the same relationship everywhere we can, throughout the graph.
250         my @identical_readings = grep { $_->text eq $relationship->reading_a }
251                 $c->readings;
252         foreach my $ir ( @identical_readings ) {
253                 next if $ir->id eq $source;
254                 # Check to see if there is a target reading with the same text at
255                 # the same rank.
256                 my @itarget = grep 
257                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
258                         $c->readings;
259                 if( @itarget ) {
260                         # We found a hit.
261                         warn "More than one reading with text " . $target_rdg->text
262                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
263                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
264                 }
265         }       
266     }
267     
268     # Now set the relationship(s).
269     my @pairs_set;
270     foreach my $v ( @vectors ) {
271                 my $rel = $self->get_relationship( @$v );
272         if( $rel && $rel ne $relationship ) {
273                 if( $rel->nonlocal ) {
274                         throw( "Found conflicting relationship at @$v" );
275                 } else {
276                         warn "Not overriding local relationship set at @$v";
277                 }
278                 next;
279         }
280         $self->_set_relationship( $relationship, @$v );
281         push( @pairs_set, $v );
282     }
283     
284     return @pairs_set;
285 }
286
287 =head2 del_relationship( $source, $target )
288
289 Removes the relationship between the given readings. If the relationship is
290 non-local, removes the relationship everywhere in the graph.
291
292 =cut
293
294 sub del_relationship {
295         my( $self, $source, $target ) = @_;
296         my $rel = $self->get_relationship( $source, $target );
297         throw( "No relationship defined between $source and $target" ) unless $rel;
298         my @vectors = ( [ $source, $target ] );
299         $self->_remove_relationship( $source, $target );
300         if( $rel->nonlocal ) {
301                 # Remove the relationship wherever it occurs.
302                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
303                         $self->relationships;
304                 foreach my $re ( @rel_edges ) {
305                         $self->_remove_relationship( @$re );
306                         push( @vectors, $re );
307                 }
308         }
309         return @vectors;
310 }
311
312 sub _remove_relationship {
313         my( $self, @vector ) = @_;
314         $self->graph->delete_edge( @vector );
315 }
316         
317 =head2 relationship_valid( $source, $target, $type )
318
319 Checks whether a relationship of type $type may exist between the readings given
320 in $source and $target.  Returns a tuple of ( status, message ) where status is
321 a yes/no boolean and, if the answer is no, message gives the reason why.
322
323 =cut
324
325 sub relationship_valid {
326     my( $self, $source, $target, $rel ) = @_;
327     my $c = $self->collation;
328     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
329                 # Check that the two readings do (for a repetition) or do not (for
330                 # a transposition) appear in the same witness.
331                 # TODO this might be called before witness paths are set...
332                 my %seen_wits;
333                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
334                 foreach my $w ( $c->reading_witnesses( $target ) ) {
335                         if( $seen_wits{$w} ) {
336                                 return ( 0, "Readings both occur in witness $w" ) 
337                                         if $rel eq 'transposition';
338                                 return ( 1, "ok" ) if $rel eq 'repetition';
339                 }
340                 return $rel eq 'transposition' ? ( 1, "ok" )
341                         : ( 0, "Readings occur only in distinct witnesses" );
342                 }
343         } else {
344                 # Check that linking the source and target in a relationship won't lead
345                 # to a path loop for any witness.  If they have the same rank then fine.
346                 return( 1, "ok" ) 
347                         if $c->reading( $source )->has_rank
348                                 && $c->reading( $target )->has_rank
349                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
350                 
351                 # Otherwise, first make a lookup table of all the
352                 # readings related to either the source or the target.
353                 my @proposed_related = ( $source, $target );
354                 # Drop the collation links of source and target, unless we want to
355                 # add a collation relationship.
356                 foreach my $r ( ( $source, $target ) ) {
357                         $self->_drop_collations( $r ) unless $rel eq 'collated';
358                         push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
359                 }
360                 my %pr_ids;
361                 map { $pr_ids{ $_ } = 1 } @proposed_related;
362         
363                 # The cumulative predecessors and successors of the proposed-related readings
364                 # should not overlap.
365                 my %all_pred;
366                 my %all_succ;
367                 foreach my $pr ( keys %pr_ids ) {
368                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
369                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
370                 }
371                 foreach my $k ( keys %all_pred ) {
372                         return( 0, "Relationship would create witness loop" )
373                                 if exists $all_succ{$k};
374                 }
375                 foreach my $k ( keys %pr_ids ) {
376                         return( 0, "Relationship would create witness loop" )
377                                 if exists $all_pred{$k} || exists $all_succ{$k};
378                 }
379                 return ( 1, "ok" );
380         }
381 }
382
383 sub _drop_collations {
384         my( $self, $reading ) = @_;
385         foreach my $n ( $self->graph->neighbors( $reading ) ) {
386                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
387                         $self->del_relationship( $reading, $n );
388                 }
389         }
390 }
391
392 =head2 related_readings( $reading, $filter )
393
394 Returns a list of readings that are connected via relationship links to $reading.
395 If $filter is set to a subroutine ref, returns only those related readings where
396 $filter( $relationship ) returns a true value.
397
398 =cut
399
400 sub related_readings {
401         my( $self, $reading, $filter ) = @_;
402         my $return_object;
403         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
404                 $reading = $reading->id;
405                 $return_object = 1;
406         }
407         my @answer;
408         if( $filter ) {
409                 # Backwards compat
410                 if( $filter eq 'colocated' ) {
411                         $filter = sub { $_[0]->colocated };
412                 }
413                 my %found = ( $reading => 1 );
414                 my $check = [ $reading ];
415                 my $iter = 0;
416                 while( @$check ) {
417                         my $more = [];
418                         foreach my $r ( @$check ) {
419                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
420                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
421                                                 push( @$more, $nr ) unless exists $found{$nr};
422                                                 $found{$nr} = 1;
423                                         }
424                                 }
425                         }
426                         $check = $more;
427                 }
428                 delete $found{$reading};
429                 @answer = keys %found;
430         } else {
431                 @answer = $self->graph->all_reachable( $reading );
432         }
433         if( $return_object ) {
434                 my $c = $self->collation;
435                 return map { $c->reading( $_ ) } @answer;
436         } else {
437                 return @answer;
438         }
439 }
440
441 =head2 merge_readings( $kept, $deleted );
442
443 Makes a best-effort merge of the relationship links between the given readings, and
444 stops tracking the to-be-deleted reading.
445
446 =cut
447
448 sub merge_readings {
449         my( $self, $kept, $deleted, $combined ) = @_;
450         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
451                 # Get the pair of kept / rel
452                 my @vector = ( $kept );
453                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
454                 next if $vector[0] eq $vector[1]; # Don't add a self loop
455                 
456                 # If kept changes its text, drop the relationship.
457                 next if $combined;
458                         
459                 # If kept / rel already has a relationship, warn and keep the old
460                 my $rel = $self->get_relationship( @vector );
461                 if( $rel ) {
462                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
463                         next;
464                 }
465                 
466                 # Otherwise, adopt the relationship that would be deleted.
467                 $rel = $self->get_relationship( @$edge );
468                 $self->_set_relationship( $rel, @vector );
469         }
470         $self->delete_reading( $deleted );
471 }
472
473 sub _as_graphml { 
474         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
475         
476     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
477         $rgraph->setAttribute( 'edgedefault', 'directed' );
478     $rgraph->setAttribute( 'id', 'relationships', );
479     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
480     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
481     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
482     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
483     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
484     
485     # Add the vertices according to their XML IDs
486     my %rdg_lookup = ( reverse %$node_hash );
487     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
488         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
489         $n_el->setAttribute( 'id', $n );
490         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
491     }
492     
493     # Add the relationship edges, with their object information
494     my $edge_ctr = 0;
495     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
496         # Add an edge and fill in its relationship info.
497                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
498                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
499                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
500                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
501
502                 my $rel_obj = $self->get_relationship( @$e );
503                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
504                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
505                 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
506                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
507                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
508                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
509                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
510         }
511 }
512
513 sub _by_xmlid {
514         my $tmp_a = $a;
515         my $tmp_b = $b;
516         $tmp_a =~ s/\D//g;
517         $tmp_b =~ s/\D//g;
518         return $tmp_a <=> $tmp_b;
519 }
520
521 sub _add_graphml_data {
522     my( $el, $key, $value ) = @_;
523     return unless defined $value;
524     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
525     $data_el->setAttribute( 'key', $key );
526     $data_el->appendText( $value );
527 }
528
529 sub throw {
530         Text::Tradition::Error->throw( 
531                 'ident' => 'Relationship error',
532                 'message' => $_[0],
533                 );
534 }
535
536 no Moose;
537 __PACKAGE__->meta->make_immutable;
538
539 1;