47d384ef4b03c092e4173ed444b7e76086f56df1
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89     },
90         );
91         
92 =head2 get_relationship
93
94 Return the relationship object, if any, that exists between two readings.
95
96 =cut
97
98 sub get_relationship {
99         my $self = shift;
100         my @vector;
101         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102                 # Dereference the edge arrayref that was passed.
103                 my $edge = shift;
104                 @vector = @$edge;
105         } else {
106                 @vector = @_;
107         }
108         my $relationship;
109         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
111         } 
112         return $relationship;
113 }
114
115 sub _set_relationship {
116         my( $self, $relationship, @vector ) = @_;
117         $self->graph->add_edge( @vector );
118         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 }
120
121 =head2 create
122
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
125
126 =cut
127
128 sub create {
129         my( $self, $options ) = @_;
130         # Check to see if a relationship exists between the two given readings
131         my $source = delete $options->{'orig_a'};
132         my $target = delete $options->{'orig_b'};
133         my $rel = $self->get_relationship( $source, $target );
134         if( $rel ) {
135                 if( $rel->type eq 'collated' ) {
136                         # Always replace a 'collated' relationship with a more descriptive
137                         # one, if asked.
138                         $self->del_relationship( $source, $target );
139                 } elsif( $rel->type ne $options->{'type'} ) {
140                         throw( "Another relationship of type " . $rel->type 
141                                 . " already exists between $source and $target" );
142                 } else {
143                         return $rel;
144                 }
145         }
146         
147         # Check to see if a nonlocal relationship is defined for the two readings
148         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
149                 $options->{'reading_b'} );
150         if( $rel && $rel->type eq $options->{'type'} ) {
151                 return $rel;
152         } elsif( $rel ) {
153                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
154         } else {
155                 $rel = Text::Tradition::Collation::Relationship->new( $options );
156                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157                 return $rel;
158         }
159 }
160
161 =head2 add_scoped_relationship( $rel )
162
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally.  Key on whichever reading occurs first alphabetically.
165
166 =cut
167
168 sub add_scoped_relationship {
169         my( $self, $rel ) = @_;
170         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
171         if( $r ) {
172                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173                         $r->type, $rel->reading_a, $rel->reading_b );
174                 return;
175         }
176         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
177 }
178
179 =head2 scoped_relationship( $reading_a, $reading_b )
180
181 Returns the general (document-level or global) relationship that has been defined 
182 between the two reading strings. Returns undef if there is no general relationship.
183
184 =cut
185
186 sub scoped_relationship {
187         my( $self, $rdga, $rdgb ) = @_;
188         my( $first, $second ) = sort( $rdga, $rdgb );
189         if( exists $self->scopedrels->{$first}->{$second} ) {
190                 return $self->scopedrels->{$first}->{$second};
191         } else {
192                 return undef;
193         }
194 }
195
196 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197
198 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
199 for the possible options) between the readings given in $source and $target.  Sets
200 up a scoped relationship between $sourcetext and $targettext if the relationship is
201 scoped non-locally.
202
203 Returns a status boolean and a list of all reading pairs connected by the call to
204 add_relationship.
205
206 =cut
207
208 sub add_relationship {
209         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
210
211         my $relationship;
212         my $thispaironly;
213         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214                 $relationship = $options;
215                 $thispaironly = 1;  # If existing rel, set only where asked.
216         } else {
217                 # Check the options
218                 $options->{'scope'} = 'local' unless $options->{'scope'};
219                 
220                 my( $is_valid, $reason ) = 
221                         $self->relationship_valid( $source, $target, $options->{'type'} );
222                 unless( $is_valid ) {
223                         throw( "Invalid relationship: $reason" );
224                 }
225                 
226                 # Try to create the relationship object.
227                 $options->{'reading_a'} = $source_rdg->text;
228                 $options->{'reading_b'} = $target_rdg->text;
229                 $options->{'orig_a'} = $source;
230                 $options->{'orig_b'} = $target;
231         if( $options->{'scope'} ne 'local' ) {
232                         # Is there a relationship with this a & b already?
233                         my $otherrel = $self->scoped_relationship( $options->{reading_a}, 
234                                 $options->{reading_b} );
235                         if( $otherrel && $otherrel->type eq $options->{type}
236                                 && $otherrel->scope eq $options->{scope} ) {
237                                 warn "Applying existing scoped relationship";
238                                 $relationship = $otherrel;
239                         }
240         }
241                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
242     }
243
244
245         # Find all the pairs for which we need to set the relationship.
246         my @vectors = ( [ $source, $target ] ); 
247     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
248         my $c = $self->collation;
249         # Set the same relationship everywhere we can, throughout the graph.
250         my @identical_readings = grep { $_->text eq $relationship->reading_a }
251                 $c->readings;
252         foreach my $ir ( @identical_readings ) {
253                 next if $ir->id eq $source;
254                 # Check to see if there is a target reading with the same text at
255                 # the same rank.
256                 my @itarget = grep 
257                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
258                         $c->readings;
259                 if( @itarget ) {
260                         # We found a hit.
261                         warn "More than one reading with text " . $target_rdg->text
262                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
263                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
264                 }
265         }       
266     }
267     
268     # Now set the relationship(s).
269     my @pairs_set;
270     foreach my $v ( @vectors ) {
271                 my $rel = $self->get_relationship( @$v );
272         if( $rel && $rel ne $relationship ) {
273                 if( $rel->nonlocal ) {
274                         $DB::single = 1;
275                         throw( "Found conflicting relationship at @$v" );
276                 } else {
277                         warn "Not overriding local relationship set at @$v";
278                 }
279                 next;
280         }
281         $self->_set_relationship( $relationship, @$v );
282         push( @pairs_set, $v );
283     }
284     
285     return @pairs_set;
286 }
287
288 =head2 del_relationship( $source, $target )
289
290 Removes the relationship between the given readings. If the relationship is
291 non-local, removes the relationship everywhere in the graph.
292
293 =cut
294
295 sub del_relationship {
296         my( $self, $source, $target ) = @_;
297         my $rel = $self->get_relationship( $source, $target );
298         throw( "No relationship defined between $source and $target" ) unless $rel;
299         my @vectors = ( [ $source, $target ] );
300         $self->_remove_relationship( $source, $target );
301         if( $rel->nonlocal ) {
302                 # Remove the relationship wherever it occurs.
303                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
304                         $self->relationships;
305                 foreach my $re ( @rel_edges ) {
306                         $self->_remove_relationship( @$re );
307                         push( @vectors, $re );
308                 }
309         }
310         return @vectors;
311 }
312
313 sub _remove_relationship {
314         my( $self, @vector ) = @_;
315         $self->graph->delete_edge( @vector );
316 }
317         
318 =head2 relationship_valid( $source, $target, $type )
319
320 Checks whether a relationship of type $type may exist between the readings given
321 in $source and $target.  Returns a tuple of ( status, message ) where status is
322 a yes/no boolean and, if the answer is no, message gives the reason why.
323
324 =cut
325
326 sub relationship_valid {
327     my( $self, $source, $target, $rel ) = @_;
328     my $c = $self->collation;
329     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
330                 # Check that the two readings do (for a repetition) or do not (for
331                 # a transposition) appear in the same witness.
332                 # TODO this might be called before witness paths are set...
333                 my %seen_wits;
334                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
335                 foreach my $w ( $c->reading_witnesses( $target ) ) {
336                         if( $seen_wits{$w} ) {
337                                 return ( 0, "Readings both occur in witness $w" ) 
338                                         if $rel eq 'transposition';
339                                 return ( 1, "ok" ) if $rel eq 'repetition';
340                 }
341                 return $rel eq 'transposition' ? ( 1, "ok" )
342                         : ( 0, "Readings occur only in distinct witnesses" );
343                 }
344         } else {
345                 # Check that linking the source and target in a relationship won't lead
346                 # to a path loop for any witness.  If they have the same rank then fine.
347                 return( 1, "ok" ) 
348                         if $c->reading( $source )->has_rank
349                                 && $c->reading( $target )->has_rank
350                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
351                 
352                 # Otherwise, first make a lookup table of all the
353                 # readings related to either the source or the target.
354                 my @proposed_related = ( $source, $target );
355                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
356                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
357                 my %pr_ids;
358                 map { $pr_ids{ $_ } = 1 } @proposed_related;
359         
360                 # The cumulative predecessors and successors of the proposed-related readings
361                 # should not overlap.
362                 my %all_pred;
363                 my %all_succ;
364                 foreach my $pr ( keys %pr_ids ) {
365                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
366                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
367                 }
368                 foreach my $k ( keys %all_pred ) {
369                         return( 0, "Relationship would create witness loop" )
370                                 if exists $all_succ{$k};
371                 }
372                 foreach my $k ( keys %pr_ids ) {
373                         return( 0, "Relationship would create witness loop" )
374                                 if exists $all_pred{$k} || exists $all_succ{$k};
375                 }
376                 return ( 1, "ok" );
377         }
378 }
379
380 =head2 related_readings( $reading, $filter )
381
382 Returns a list of readings that are connected via relationship links to $reading.
383 If $filter is set to a subroutine ref, returns only those related readings where
384 $filter( $relationship ) returns a true value.
385
386 =cut
387
388 sub related_readings {
389         my( $self, $reading, $filter ) = @_;
390         my $return_object;
391         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
392                 $reading = $reading->id;
393                 $return_object = 1;
394         }
395         my @answer;
396         if( $filter ) {
397                 # Backwards compat
398                 if( $filter eq 'colocated' ) {
399                         $filter = sub { $_[0]->colocated };
400                 }
401                 my %found = ( $reading => 1 );
402                 my $check = [ $reading ];
403                 my $iter = 0;
404                 while( @$check ) {
405                         my $more = [];
406                         foreach my $r ( @$check ) {
407                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
408                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
409                                                 push( @$more, $nr ) unless exists $found{$nr};
410                                                 $found{$nr} = 1;
411                                         }
412                                 }
413                         }
414                         $check = $more;
415                 }
416                 delete $found{$reading};
417                 @answer = keys %found;
418         } else {
419                 @answer = $self->graph->all_reachable( $reading );
420         }
421         if( $return_object ) {
422                 my $c = $self->collation;
423                 return map { $c->reading( $_ ) } @answer;
424         } else {
425                 return @answer;
426         }
427 }
428
429 =head2 merge_readings( $kept, $deleted );
430
431 Makes a best-effort merge of the relationship links between the given readings, and
432 stops tracking the to-be-deleted reading.
433
434 =cut
435
436 sub merge_readings {
437         my( $self, $kept, $deleted, $combined ) = @_;
438         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
439                 # Get the pair of kept / rel
440                 my @vector = ( $kept );
441                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
442                 next if $vector[0] eq $vector[1]; # Don't add a self loop
443                 
444                 # If kept changes its text, drop the relationship.
445                 next if $combined;
446                         
447                 # If kept / rel already has a relationship, warn and keep the old
448                 my $rel = $self->get_relationship( @vector );
449                 if( $rel ) {
450                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
451                         next;
452                 }
453                 
454                 # Otherwise, adopt the relationship that would be deleted.
455                 $rel = $self->get_relationship( @$edge );
456                 $self->_set_relationship( $rel, @vector );
457         }
458         $self->delete_reading( $deleted );
459 }
460
461 sub _as_graphml { 
462         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
463         
464     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
465         $rgraph->setAttribute( 'edgedefault', 'directed' );
466     $rgraph->setAttribute( 'id', 'relationships', );
467     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
468     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
469     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
470     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
471     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
472     
473     # Add the vertices according to their XML IDs
474     my %rdg_lookup = ( reverse %$node_hash );
475     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
476         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
477         $n_el->setAttribute( 'id', $n );
478         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
479     }
480     
481     # Add the relationship edges, with their object information
482     my $edge_ctr = 0;
483     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
484         # Add an edge and fill in its relationship info.
485                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
486                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
487                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
488                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
489
490                 my $rel_obj = $self->get_relationship( @$e );
491                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
492                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
493                 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
494                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
495                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
496                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
497                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
498         }
499 }
500
501 sub _by_xmlid {
502         my $tmp_a = $a;
503         my $tmp_b = $b;
504         $tmp_a =~ s/\D//g;
505         $tmp_b =~ s/\D//g;
506         return $tmp_a <=> $tmp_b;
507 }
508
509 sub _add_graphml_data {
510     my( $el, $key, $value ) = @_;
511     return unless defined $value;
512     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
513     $data_el->setAttribute( 'key', $key );
514     $data_el->appendText( $value );
515 }
516
517 sub throw {
518         Text::Tradition::Error->throw( 
519                 'ident' => 'Relationship error',
520                 'message' => $_[0],
521                 );
522 }
523
524 no Moose;
525 __PACKAGE__->meta->make_immutable;
526
527 1;