refactored Analysis module with associated changes
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89         delete_relationship => 'delete_edge',
90     },
91         );
92         
93 around 'delete_relationship' => sub {
94         my $orig = shift;
95         my $self = shift;
96         my @vector;
97         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98                 # Dereference the edge arrayref that was passed.
99                 my $edge = shift;
100                 @vector = @$edge;
101         } else {
102                 @vector = @_;
103         }
104         return $self->$orig( @vector );
105 };
106         
107 =head2 get_relationship
108
109 Return the relationship object, if any, that exists between two readings.
110
111 =cut
112
113 sub get_relationship {
114         my $self = shift;
115         my @vector;
116         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
117                 # Dereference the edge arrayref that was passed.
118                 my $edge = shift;
119                 @vector = @$edge;
120         } else {
121                 @vector = @_;
122         }
123         my $relationship;
124         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
125                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
126         }
127         return $relationship;
128 }
129
130 sub _set_relationship {
131         my( $self, $relationship, @vector ) = @_;
132         $self->graph->add_edge( @vector );
133         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
134 }
135
136 sub _remove_relationship {
137         my( $self, @vector ) = @_;
138         $self->graph->delete_edge( @vector );
139 }
140         
141 =head2 create
142
143 Create a new relationship with the given options and return it.
144 Warn and return undef if the relationship cannot be created.
145
146 =cut
147
148 sub create {
149         my( $self, $options ) = @_;
150         # Check to see if a relationship exists between the two given readings
151         my $source = delete $options->{'orig_a'};
152         my $target = delete $options->{'orig_b'};
153         my $rel = $self->get_relationship( $source, $target );
154         if( $rel ) {
155                 if( $rel->type ne $options->{'type'} ) {
156                         throw( "Another relationship of type " . $rel->type 
157                                 . " already exists between $source and $target" );
158                 } else {
159                         return $rel;
160                 }
161         }
162         
163         # Check to see if a nonlocal relationship is defined for the two readings
164         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
165                 $options->{'reading_b'} );
166         if( $rel && $rel->type eq $options->{'type'} ) {
167                 return $rel;
168         } elsif( $rel ) {
169                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
170         } else {
171                 $rel = Text::Tradition::Collation::Relationship->new( $options );
172                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
173                 return $rel;
174         }
175 }
176
177 =head2 add_scoped_relationship( $rel )
178
179 Keep track of relationships defined between specific readings that are scoped
180 non-locally.  Key on whichever reading occurs first alphabetically.
181
182 =cut
183
184 sub add_scoped_relationship {
185         my( $self, $rel ) = @_;
186         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
187         if( $r ) {
188                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
189                         $r->type, $rel->reading_a, $rel->reading_b );
190                 return;
191         }
192         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
193 }
194
195 =head2 scoped_relationship( $reading_a, $reading_b )
196
197 Returns the general (document-level or global) relationship that has been defined 
198 between the two reading strings. Returns undef if there is no general relationship.
199
200 =cut
201
202 sub scoped_relationship {
203         my( $self, $rdga, $rdgb ) = @_;
204         my( $first, $second ) = sort( $rdga, $rdgb );
205         if( exists $self->scopedrels->{$first}->{$second} ) {
206                 return $self->scopedrels->{$first}->{$second};
207         } else {
208                 return undef;
209         }
210 }
211
212 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
213
214 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
215 for the possible options) between the readings given in $source and $target.  Sets
216 up a scoped relationship between $sourcetext and $targettext if the relationship is
217 scoped non-locally.
218
219 Returns a status boolean and a list of all reading pairs connected by the call to
220 add_relationship.
221
222 =cut
223
224 sub add_relationship {
225         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
226
227         # Check the options
228         $options->{'scope'} = 'local' unless $options->{'scope'};
229         
230         my( $is_valid, $reason ) = 
231                 $self->relationship_valid( $source, $target, $options->{'type'} );
232     unless( $is_valid ) {
233         throw( "Invalid relationship: $reason" );
234     }
235     
236     # Try to create the relationship object.
237     $options->{'reading_a'} = $source_rdg->text;
238     $options->{'reading_b'} = $target_rdg->text;
239     $options->{'orig_a'} = $source;
240     $options->{'orig_b'} = $target;
241     my $relationship = $self->create( $options );  # Will throw on error
242
243         # Find all the pairs for which we need to set the relationship.
244         my @vectors = ( [ $source, $target ] ); 
245     if( $relationship->colocated && $relationship->nonlocal ) {
246         my $c = $self->collation;
247         # Set the same relationship everywhere we can, throughout the graph.
248         my @identical_readings = grep { $_->text eq $relationship->reading_a }
249                 $c->readings;
250         foreach my $ir ( @identical_readings ) {
251                 next if $ir->id eq $source;
252                 # Check to see if there is a target reading with the same text at
253                 # the same rank.
254                 my @itarget = grep 
255                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
256                         $c->readings;
257                 if( @itarget ) {
258                         # We found a hit.
259                         warn "More than one reading with text " . $target_rdg->text
260                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
261                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
262                 }
263         }       
264     }
265     
266     # Now set the relationship(s).
267     my @pairs_set;
268     foreach my $v ( @vectors ) {
269                 my $rel = $self->get_relationship( @$v );
270         if( $rel ) {
271                 if( $rel->nonlocal ) {
272                         throw( "Found conflicting relationship at @$v" );
273                 } else {
274                         warn "Not overriding local relationship set at @$v";
275                 }
276                 next;
277         }
278         $self->_set_relationship( $relationship, @$v );
279         push( @pairs_set, $v );
280     }
281     
282     return @pairs_set;
283 }
284
285 =head2 del_relationship( $source, $target )
286
287 Removes the relationship between the given readings. If the relationship is
288 non-local, removes the relationship everywhere in the graph.
289
290 =cut
291
292 sub del_relationship {
293         my( $self, $source, $target ) = @_;
294         my $rel = $self->get_relationship( $source, $target );
295         throw( "No relationship defined between $source and $target" ) unless $rel;
296         my @vectors = ( [ $source, $target ] );
297         $self->_remove_relationship( $source, $target );
298         if( $rel->nonlocal ) {
299                 # Remove the relationship wherever it occurs.
300                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
301                         $self->relationships;
302                 foreach my $re ( @rel_edges ) {
303                         $self->_remove_relationship( @$re );
304                         push( @vectors, $re );
305                 }
306         }
307         return @vectors;
308 }
309
310 =head2 relationship_valid( $source, $target, $type )
311
312 Checks whether a relationship of type $type may exist between the readings given
313 in $source and $target.  Returns a tuple of ( status, message ) where status is
314 a yes/no boolean and, if the answer is no, message gives the reason why.
315
316 =cut
317
318 sub relationship_valid {
319     my( $self, $source, $target, $rel ) = @_;
320     my $c = $self->collation;
321     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
322                 # Check that the two readings do (for a repetition) or do not (for
323                 # a transposition) appear in the same witness.
324                 # TODO this might be called before witness paths are set...
325                 my %seen_wits;
326                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
327                 foreach my $w ( $c->reading_witnesses( $target ) ) {
328                         if( $seen_wits{$w} ) {
329                                 return ( 0, "Readings both occur in witness $w" ) 
330                                         if $rel eq 'transposition';
331                                 return ( 1, "ok" ) if $rel eq 'repetition';
332                 }
333                 return $rel eq 'transposition' ? ( 1, "ok" )
334                         : ( 0, "Readings occur only in distinct witnesses" );
335                 }
336         } else {
337                 # Check that linking the source and target in a relationship won't lead
338                 # to a path loop for any witness.  If they have the same rank then fine.
339                 return( 1, "ok" ) 
340                         if $c->reading( $source )->has_rank
341                                 && $c->reading( $target )->has_rank
342                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
343                 
344                 # Otherwise, first make a lookup table of all the
345                 # readings related to either the source or the target.
346                 my @proposed_related = ( $source, $target );
347                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
348                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
349                 my %pr_ids;
350                 map { $pr_ids{ $_ } = 1 } @proposed_related;
351         
352                 # The cumulative predecessors and successors of the proposed-related readings
353                 # should not overlap.
354                 my %all_pred;
355                 my %all_succ;
356                 foreach my $pr ( keys %pr_ids ) {
357                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
358                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
359                 }
360                 foreach my $k ( keys %all_pred ) {
361                         return( 0, "Relationship would create witness loop" )
362                                 if exists $all_succ{$k};
363                 }
364                 foreach my $k ( keys %pr_ids ) {
365                         return( 0, "Relationship would create witness loop" )
366                                 if exists $all_pred{$k} || exists $all_succ{$k};
367                 }
368                 return ( 1, "ok" );
369         }
370 }
371
372 =head2 related_readings( $reading, $filter )
373
374 Returns a list of readings that are connected via relationship links to $reading.
375 If $filter is set to a subroutine ref, returns only those related readings where
376 $filter( $relationship ) returns a true value.
377
378 =cut
379
380 sub related_readings {
381         my( $self, $reading, $filter ) = @_;
382         my $return_object;
383         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
384                 $reading = $reading->id;
385                 $return_object = 1;
386         }
387         my @answer;
388         if( $filter ) {
389                 # Backwards compat
390                 if( $filter eq 'colocated' ) {
391                         $filter = sub { $_[0]->colocated };
392                 }
393                 my %found = ( $reading => 1 );
394                 my $check = [ $reading ];
395                 my $iter = 0;
396                 while( @$check ) {
397                         my $more = [];
398                         foreach my $r ( @$check ) {
399                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
400                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
401                                                 push( @$more, $nr ) unless exists $found{$nr};
402                                                 $found{$nr} = 1;
403                                         }
404                                 }
405                         }
406                         $check = $more;
407                 }
408                 delete $found{$reading};
409                 @answer = keys %found;
410         } else {
411                 @answer = $self->graph->all_reachable( $reading );
412         }
413         if( $return_object ) {
414                 my $c = $self->collation;
415                 return map { $c->reading( $_ ) } @answer;
416         } else {
417                 return @answer;
418         }
419 }
420
421 =head2 merge_readings( $kept, $deleted );
422
423 Makes a best-effort merge of the relationship links between the given readings, and
424 stops tracking the to-be-deleted reading.
425
426 =cut
427
428 sub merge_readings {
429         my( $self, $kept, $deleted, $combined ) = @_;
430         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
431                 # Get the pair of kept / rel
432                 my @vector = ( $kept );
433                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
434                 next if $vector[0] eq $vector[1]; # Don't add a self loop
435                 
436                 # If kept changes its text, drop the relationship.
437                 next if $combined;
438                         
439                 # If kept / rel already has a relationship, warn and keep the old
440                 my $rel = $self->get_relationship( @vector );
441                 if( $rel ) {
442                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
443                         next;
444                 }
445                 
446                 # Otherwise, adopt the relationship that would be deleted.
447                 $rel = $self->get_relationship( @$edge );
448                 $self->_set_relationship( $rel, @vector );
449         }
450         $self->delete_reading( $deleted );
451 }
452
453 sub _as_graphml { 
454         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
455         
456     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
457         $rgraph->setAttribute( 'edgedefault', 'directed' );
458     $rgraph->setAttribute( 'id', 'relationships', );
459     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
460     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
461     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
462     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
463     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
464     
465     # Add the vertices according to their XML IDs
466     my %rdg_lookup = ( reverse %$node_hash );
467     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
468         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
469         $n_el->setAttribute( 'id', $n );
470         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
471     }
472     
473     # Add the relationship edges, with their object information
474     my $edge_ctr = 0;
475     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
476         # Add an edge and fill in its relationship info.
477                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
478                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
479                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
480                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
481
482                 my $rel_obj = $self->get_relationship( @$e );
483                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
484                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
485                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
486                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
487                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
488                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
489         }
490 }
491
492 sub _by_xmlid {
493         my $tmp_a = $a;
494         my $tmp_b = $b;
495         $tmp_a =~ s/\D//g;
496         $tmp_b =~ s/\D//g;
497         return $tmp_a <=> $tmp_b;
498 }
499
500 sub _add_graphml_data {
501     my( $el, $key, $value ) = @_;
502     return unless defined $value;
503     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
504     $data_el->setAttribute( 'key', $key );
505     $data_el->appendText( $value );
506 }
507
508 sub throw {
509         Text::Tradition::Error->throw( 
510                 'ident' => 'Relationship error',
511                 'message' => $_[0],
512                 );
513 }
514
515 no Moose;
516 __PACKAGE__->meta->make_immutable;
517
518 1;