21faa93db59f4db770cd1838d0a75f253dae0a77
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89     },
90         );
91         
92 =head2 get_relationship
93
94 Return the relationship object, if any, that exists between two readings.
95
96 =cut
97
98 sub get_relationship {
99         my $self = shift;
100         my @vector;
101         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102                 # Dereference the edge arrayref that was passed.
103                 my $edge = shift;
104                 @vector = @$edge;
105         } else {
106                 @vector = @_;
107         }
108         my $relationship;
109         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
111         } 
112         return $relationship;
113 }
114
115 sub _set_relationship {
116         my( $self, $relationship, @vector ) = @_;
117         $self->graph->add_edge( @vector );
118         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 }
120
121 =head2 create
122
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
125
126 =cut
127
128 sub create {
129         my( $self, $options ) = @_;
130         # Check to see if a relationship exists between the two given readings
131         my $source = delete $options->{'orig_a'};
132         my $target = delete $options->{'orig_b'};
133         my $rel = $self->get_relationship( $source, $target );
134         if( $rel ) {
135                 if( $rel->type eq 'collated' ) {
136                         # Always replace a 'collated' relationship with a more descriptive
137                         # one, if asked.
138                         $self->del_relationship( $source, $target );
139                 } elsif( $rel->type ne $options->{'type'} ) {
140                         throw( "Another relationship of type " . $rel->type 
141                                 . " already exists between $source and $target" );
142                 } else {
143                         return $rel;
144                 }
145         }
146         
147         # Check to see if a nonlocal relationship is defined for the two readings
148         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
149                 $options->{'reading_b'} );
150         if( $rel && $rel->type eq $options->{'type'} ) {
151                 return $rel;
152         } elsif( $rel ) {
153                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
154         } else {
155                 $rel = Text::Tradition::Collation::Relationship->new( $options );
156                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157                 return $rel;
158         }
159 }
160
161 =head2 add_scoped_relationship( $rel )
162
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally.  Key on whichever reading occurs first alphabetically.
165
166 =cut
167
168 sub add_scoped_relationship {
169         my( $self, $rel ) = @_;
170         my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
171         my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );      
172         my $r = $self->scoped_relationship( $rdga, $rdgb );
173         if( $r ) {
174                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
175                         $r->type, $rdga, $rdgb );
176                 return;
177         }
178         my( $first, $second ) = sort ( $rdga, $rdgb );
179         $self->scopedrels->{$first}->{$second} = $rel;
180 }
181
182 =head2 scoped_relationship( $reading_a, $reading_b )
183
184 Returns the general (document-level or global) relationship that has been defined 
185 between the two reading strings. Returns undef if there is no general relationship.
186
187 =cut
188
189 sub scoped_relationship {
190         my( $self, $rdga, $rdgb ) = @_;
191         my( $first, $second ) = sort( $rdga, $rdgb );
192         if( exists $self->scopedrels->{$first}->{$second} ) {
193                 return $self->scopedrels->{$first}->{$second};
194         } else {
195                 return undef;
196         }
197 }
198
199 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
200
201 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
202 for the possible options) between the readings given in $source and $target.  Sets
203 up a scoped relationship between $sourcetext and $targettext if the relationship is
204 scoped non-locally.
205
206 Returns a status boolean and a list of all reading pairs connected by the call to
207 add_relationship.
208
209 =cut
210
211 sub add_relationship {
212         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
213
214         my $relationship;
215         my $thispaironly;
216         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
217                 $relationship = $options;
218                 $thispaironly = 1;  # If existing rel, set only where asked.
219         } else {
220                 # Check the options
221                 $options->{'scope'} = 'local' unless $options->{'scope'};
222                 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
223                 
224                 my( $is_valid, $reason ) = 
225                         $self->relationship_valid( $source, $target, $options->{'type'} );
226                 unless( $is_valid ) {
227                         throw( "Invalid relationship: $reason" );
228                 }
229                 
230                 # Try to create the relationship object.
231                 $options->{'reading_a'} = $source_rdg->text;
232                 $options->{'reading_b'} = $target_rdg->text;
233                 $options->{'orig_a'} = $source;
234                 $options->{'orig_b'} = $target;
235         if( $options->{'scope'} ne 'local' ) {
236                         # Is there a relationship with this a & b already?
237                         # Case-insensitive for non-orthographics.
238                         my $rdga = $options->{'type'} eq 'orthographic' 
239                                 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
240                         my $rdgb = $options->{'type'} eq 'orthographic' 
241                                 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
242                         my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
243                         if( $otherrel && $otherrel->type eq $options->{type}
244                                 && $otherrel->scope eq $options->{scope} ) {
245                                 warn "Applying existing scoped relationship";
246                                 $relationship = $otherrel;
247                         }
248         }
249                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
250     }
251
252
253         # Find all the pairs for which we need to set the relationship.
254         my @vectors = [ $source, $target ];
255     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
256         push( @vectors, $self->_find_applicable( $relationship ) );
257     }
258     $DB::single = 1 if grep { $_->[0] eq 'w494' || $_->[1] eq 'w494' } @vectors;
259         
260     # Now set the relationship(s).
261     my @pairs_set;
262     foreach my $v ( @vectors ) {
263                 my $rel = $self->get_relationship( @$v );
264         if( $rel && $rel ne $relationship ) {
265                 if( $rel->nonlocal ) {
266                         throw( "Found conflicting relationship at @$v" );
267                 } else {
268                         warn "Not overriding local relationship set at @$v";
269                 }
270                 next;
271         }
272         $self->_set_relationship( $relationship, @$v );
273         push( @pairs_set, $v );
274     }
275     
276     return @pairs_set;
277 }
278
279 sub _find_applicable {
280         my( $self, $rel ) = @_;
281         my $c = $self->collation;
282         # TODO Someday we might use a case sensitive language.
283         my $lang = $c->tradition->language;
284         my @vectors;
285         my @identical_readings;
286         if( $rel->type eq 'orthographic' ) {
287                 @identical_readings = grep { $_->text eq $rel->reading_a } 
288                         $c->readings;
289         } else {
290                 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
291                         $c->readings;
292         }
293         foreach my $ir ( @identical_readings ) {
294                 my @itarget;
295                 if( $rel->type eq 'orthographic' ) {
296                         @itarget = grep { $_->rank == $ir->rank 
297                                                           && $_->text eq $rel->reading_b } $c->readings;
298                 } else {
299                         @itarget = grep { $_->rank == $ir->rank 
300                                                           && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
301                 }
302                 if( @itarget ) {
303                         # Warn if there is more than one hit with no orth link between them.
304                         my $itmain = shift @itarget;
305                         if( @itarget ) {
306                                 my %all_targets;
307                                 map { $all_targets{$_} = 1 } @itarget;
308                                 map { delete $all_targets{$_} } 
309                                         $self->related_readings( $itmain, 
310                                                 sub { $_[0]->type eq 'orthographic' } );
311                         warn "More than one unrelated reading with text " . $itmain->text
312                                 . " at rank " . $ir->rank . "!" if keys %all_targets;
313                         }
314                         push( @vectors, [ $ir->id, $itmain->id ] );
315                 }
316         }
317         return @vectors;
318 }
319
320 =head2 del_relationship( $source, $target )
321
322 Removes the relationship between the given readings. If the relationship is
323 non-local, removes the relationship everywhere in the graph.
324
325 =cut
326
327 sub del_relationship {
328         my( $self, $source, $target ) = @_;
329         my $rel = $self->get_relationship( $source, $target );
330         throw( "No relationship defined between $source and $target" ) unless $rel;
331         my @vectors = ( [ $source, $target ] );
332         $self->_remove_relationship( $source, $target );
333         if( $rel->nonlocal ) {
334                 # Remove the relationship wherever it occurs.
335                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
336                         $self->relationships;
337                 foreach my $re ( @rel_edges ) {
338                         $self->_remove_relationship( @$re );
339                         push( @vectors, $re );
340                 }
341         }
342         return @vectors;
343 }
344
345 sub _remove_relationship {
346         my( $self, @vector ) = @_;
347         $self->graph->delete_edge( @vector );
348 }
349         
350 =head2 relationship_valid( $source, $target, $type )
351
352 Checks whether a relationship of type $type may exist between the readings given
353 in $source and $target.  Returns a tuple of ( status, message ) where status is
354 a yes/no boolean and, if the answer is no, message gives the reason why.
355
356 =cut
357
358 sub relationship_valid {
359     my( $self, $source, $target, $rel ) = @_;
360     my $c = $self->collation;
361     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
362                 # Check that the two readings do (for a repetition) or do not (for
363                 # a transposition) appear in the same witness.
364                 # TODO this might be called before witness paths are set...
365                 my %seen_wits;
366                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
367                 foreach my $w ( $c->reading_witnesses( $target ) ) {
368                         if( $seen_wits{$w} ) {
369                                 return ( 0, "Readings both occur in witness $w" ) 
370                                         if $rel eq 'transposition';
371                                 return ( 1, "ok" ) if $rel eq 'repetition';
372                 }
373                 return $rel eq 'transposition' ? ( 1, "ok" )
374                         : ( 0, "Readings occur only in distinct witnesses" );
375                 }
376         } else {
377                 # Check that linking the source and target in a relationship won't lead
378                 # to a path loop for any witness.  If they have the same rank then fine.
379                 return( 1, "ok" ) 
380                         if $c->reading( $source )->has_rank
381                                 && $c->reading( $target )->has_rank
382                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
383                 
384                 # Otherwise, first make a lookup table of all the
385                 # readings related to either the source or the target.
386                 my @proposed_related = ( $source, $target );
387                 # Drop the collation links of source and target, unless we want to
388                 # add a collation relationship.
389                 foreach my $r ( ( $source, $target ) ) {
390                         $self->_drop_collations( $r ) unless $rel eq 'collated';
391                         push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
392                 }
393                 my %pr_ids;
394                 map { $pr_ids{ $_ } = 1 } @proposed_related;
395         
396                 # The cumulative predecessors and successors of the proposed-related readings
397                 # should not overlap.
398                 my %all_pred;
399                 my %all_succ;
400                 foreach my $pr ( keys %pr_ids ) {
401                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
402                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
403                 }
404                 foreach my $k ( keys %all_pred ) {
405                         return( 0, "Relationship would create witness loop" )
406                                 if exists $all_succ{$k};
407                 }
408                 foreach my $k ( keys %pr_ids ) {
409                         return( 0, "Relationship would create witness loop" )
410                                 if exists $all_pred{$k} || exists $all_succ{$k};
411                 }
412                 return ( 1, "ok" );
413         }
414 }
415
416 sub _drop_collations {
417         my( $self, $reading ) = @_;
418         foreach my $n ( $self->graph->neighbors( $reading ) ) {
419                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
420                         $self->del_relationship( $reading, $n );
421                 }
422         }
423 }
424
425 =head2 related_readings( $reading, $filter )
426
427 Returns a list of readings that are connected via relationship links to $reading.
428 If $filter is set to a subroutine ref, returns only those related readings where
429 $filter( $relationship ) returns a true value.
430
431 =cut
432
433 sub related_readings {
434         my( $self, $reading, $filter ) = @_;
435         my $return_object;
436         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
437                 $reading = $reading->id;
438                 $return_object = 1;
439         }
440         my @answer;
441         if( $filter ) {
442                 # Backwards compat
443                 if( $filter eq 'colocated' ) {
444                         $filter = sub { $_[0]->colocated };
445                 }
446                 my %found = ( $reading => 1 );
447                 my $check = [ $reading ];
448                 my $iter = 0;
449                 while( @$check ) {
450                         my $more = [];
451                         foreach my $r ( @$check ) {
452                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
453                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
454                                                 push( @$more, $nr ) unless exists $found{$nr};
455                                                 $found{$nr} = 1;
456                                         }
457                                 }
458                         }
459                         $check = $more;
460                 }
461                 delete $found{$reading};
462                 @answer = keys %found;
463         } else {
464                 @answer = $self->graph->all_reachable( $reading );
465         }
466         if( $return_object ) {
467                 my $c = $self->collation;
468                 return map { $c->reading( $_ ) } @answer;
469         } else {
470                 return @answer;
471         }
472 }
473
474 =head2 merge_readings( $kept, $deleted );
475
476 Makes a best-effort merge of the relationship links between the given readings, and
477 stops tracking the to-be-deleted reading.
478
479 =cut
480
481 sub merge_readings {
482         my( $self, $kept, $deleted, $combined ) = @_;
483         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
484                 # Get the pair of kept / rel
485                 my @vector = ( $kept );
486                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
487                 next if $vector[0] eq $vector[1]; # Don't add a self loop
488                 
489                 # If kept changes its text, drop the relationship.
490                 next if $combined;
491                         
492                 # If kept / rel already has a relationship, just keep the old
493                 my $rel = $self->get_relationship( @vector );
494                 next if $rel;
495                 
496                 # Otherwise, adopt the relationship that would be deleted.
497                 $rel = $self->get_relationship( @$edge );
498                 $self->_set_relationship( $rel, @vector );
499         }
500         $self->delete_reading( $deleted );
501 }
502
503 sub _as_graphml { 
504         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
505         
506     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
507         $rgraph->setAttribute( 'edgedefault', 'directed' );
508     $rgraph->setAttribute( 'id', 'relationships', );
509     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
510     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
511     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
512     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
513     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
514     
515     # Add the vertices according to their XML IDs
516     my %rdg_lookup = ( reverse %$node_hash );
517     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
518         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
519         $n_el->setAttribute( 'id', $n );
520         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
521     }
522     
523     # Add the relationship edges, with their object information
524     my $edge_ctr = 0;
525     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
526         # Add an edge and fill in its relationship info.
527                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
528                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
529                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
530                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
531
532                 my $rel_obj = $self->get_relationship( @$e );
533                 foreach my $key ( keys %$edge_keys ) {
534                         my $value = $rel_obj->$key;
535                         _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
536                                 if defined $value;
537                 }
538         }
539 }
540
541 sub _by_xmlid {
542         my $tmp_a = $a;
543         my $tmp_b = $b;
544         $tmp_a =~ s/\D//g;
545         $tmp_b =~ s/\D//g;
546         return $tmp_a <=> $tmp_b;
547 }
548
549 sub _add_graphml_data {
550     my( $el, $key, $value ) = @_;
551     return unless defined $value;
552     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
553     $data_el->setAttribute( 'key', $key );
554     $data_el->appendText( $value );
555 }
556
557 sub throw {
558         Text::Tradition::Error->throw( 
559                 'ident' => 'Relationship error',
560                 'message' => $_[0],
561                 );
562 }
563
564 no Moose;
565 __PACKAGE__->meta->make_immutable;
566
567 1;