try to fix encoding issues in Directory name trawl
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'meaning' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n9', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n8', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 try {
52         my @v3 = $c->del_relationship( 'n1', 'n2' );
53         ok( 0, "Should have errored on non-existent relationship" );
54 } catch( Text::Tradition::Error $e ) {
55         like( $e->message, qr/No relationship defined/, "Attempt to delete non-existent relationship errored" );
56 }
57
58 =end testing
59
60 =head1 METHODS
61
62 =head2 new( collation => $collation );
63
64 Creates a new relationship store for the given collation.
65
66 =cut
67
68 has 'collation' => (
69         is => 'ro',
70         isa => 'Text::Tradition::Collation',
71         required => 1,
72         weak_ref => 1,
73         );
74
75 has 'scopedrels' => (
76         is => 'ro',
77         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
78         default => sub { {} },
79         );
80
81 has 'graph' => (
82         is => 'ro',
83         isa => 'Graph',
84         default => sub { Graph->new( undirected => 1 ) },
85     handles => {
86         relationships => 'edges',
87         add_reading => 'add_vertex',
88         delete_reading => 'delete_vertex',
89     },
90         );
91         
92 =head2 get_relationship
93
94 Return the relationship object, if any, that exists between two readings.
95
96 =cut
97
98 sub get_relationship {
99         my $self = shift;
100         my @vector;
101         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
102                 # Dereference the edge arrayref that was passed.
103                 my $edge = shift;
104                 @vector = @$edge;
105         } else {
106                 @vector = @_;
107         }
108         my $relationship;
109         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
110                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
111         } 
112         return $relationship;
113 }
114
115 sub _set_relationship {
116         my( $self, $relationship, @vector ) = @_;
117         $self->graph->add_edge( @vector );
118         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 }
120
121 =head2 create
122
123 Create a new relationship with the given options and return it.
124 Warn and return undef if the relationship cannot be created.
125
126 =cut
127
128 sub create {
129         my( $self, $options ) = @_;
130         # Check to see if a relationship exists between the two given readings
131         my $source = delete $options->{'orig_a'};
132         my $target = delete $options->{'orig_b'};
133         my $rel = $self->get_relationship( $source, $target );
134         if( $rel ) {
135                 if( $rel->type eq 'collated' ) {
136                         # Always replace a 'collated' relationship with a more descriptive
137                         # one, if asked.
138                         $self->del_relationship( $source, $target );
139                 } elsif( $rel->type ne $options->{'type'} ) {
140                         throw( "Another relationship of type " . $rel->type 
141                                 . " already exists between $source and $target" );
142                 } else {
143                         return $rel;
144                 }
145         }
146         
147         # Check to see if a nonlocal relationship is defined for the two readings
148         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
149                 $options->{'reading_b'} );
150         if( $rel && $rel->type eq $options->{'type'} ) {
151                 return $rel;
152         } elsif( $rel ) {
153                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
154         } else {
155                 $rel = Text::Tradition::Collation::Relationship->new( $options );
156                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157                 return $rel;
158         }
159 }
160
161 =head2 add_scoped_relationship( $rel )
162
163 Keep track of relationships defined between specific readings that are scoped
164 non-locally.  Key on whichever reading occurs first alphabetically.
165
166 =cut
167
168 sub add_scoped_relationship {
169         my( $self, $rel ) = @_;
170         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
171         if( $r ) {
172                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
173                         $r->type, $rel->reading_a, $rel->reading_b );
174                 return;
175         }
176         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
177 }
178
179 =head2 scoped_relationship( $reading_a, $reading_b )
180
181 Returns the general (document-level or global) relationship that has been defined 
182 between the two reading strings. Returns undef if there is no general relationship.
183
184 =cut
185
186 sub scoped_relationship {
187         my( $self, $rdga, $rdgb ) = @_;
188         my( $first, $second ) = sort( $rdga, $rdgb );
189         if( exists $self->scopedrels->{$first}->{$second} ) {
190                 return $self->scopedrels->{$first}->{$second};
191         } else {
192                 return undef;
193         }
194 }
195
196 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197
198 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
199 for the possible options) between the readings given in $source and $target.  Sets
200 up a scoped relationship between $sourcetext and $targettext if the relationship is
201 scoped non-locally.
202
203 Returns a status boolean and a list of all reading pairs connected by the call to
204 add_relationship.
205
206 =cut
207
208 sub add_relationship {
209         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
210
211         my $relationship;
212         my $thispaironly;
213         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
214                 $relationship = $options;
215                 $thispaironly = 1;  # If existing rel, set only where asked.
216         } else {
217                 # Check the options
218                 $options->{'scope'} = 'local' unless $options->{'scope'};
219                 
220                 my( $is_valid, $reason ) = 
221                         $self->relationship_valid( $source, $target, $options->{'type'} );
222                 unless( $is_valid ) {
223                         throw( "Invalid relationship: $reason" );
224                 }
225                 
226                 # Try to create the relationship object.
227                 $options->{'reading_a'} = $source_rdg->text;
228                 $options->{'reading_b'} = $target_rdg->text;
229                 $options->{'orig_a'} = $source;
230                 $options->{'orig_b'} = $target;
231         if( $options->{'scope'} ne 'local' ) {
232                         # Is there a relationship with this a & b already?
233                         my $otherrel = $self->scoped_relationship( $options->{reading_a}, 
234                                 $options->{reading_b} );
235                         if( $otherrel && $otherrel->type eq $options->{type}
236                                 && $otherrel->scope eq $options->{scope} ) {
237                                 warn "Applying existing scoped relationship";
238                                 $relationship = $otherrel;
239                         }
240         }
241                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
242     }
243
244
245         # Find all the pairs for which we need to set the relationship.
246         my @vectors = ( [ $source, $target ] ); 
247     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
248         my $c = $self->collation;
249         # Set the same relationship everywhere we can, throughout the graph.
250         my @identical_readings = grep { $_->text eq $relationship->reading_a }
251                 $c->readings;
252         foreach my $ir ( @identical_readings ) {
253                 next if $ir->id eq $source;
254                 # Check to see if there is a target reading with the same text at
255                 # the same rank.
256                 my @itarget = grep 
257                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
258                         $c->readings;
259                 if( @itarget ) {
260                         # We found a hit.
261                         warn "More than one reading with text " . $target_rdg->text
262                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
263                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
264                 }
265         }       
266     }
267     
268     # Now set the relationship(s).
269     my @pairs_set;
270     foreach my $v ( @vectors ) {
271                 my $rel = $self->get_relationship( @$v );
272         if( $rel && $rel ne $relationship ) {
273                 if( $rel->nonlocal ) {
274                         throw( "Found conflicting relationship at @$v" );
275                 } else {
276                         warn "Not overriding local relationship set at @$v";
277                 }
278                 next;
279         }
280         $self->_set_relationship( $relationship, @$v );
281         push( @pairs_set, $v );
282     }
283     
284     return @pairs_set;
285 }
286
287 =head2 del_relationship( $source, $target )
288
289 Removes the relationship between the given readings. If the relationship is
290 non-local, removes the relationship everywhere in the graph.
291
292 =cut
293
294 sub del_relationship {
295         my( $self, $source, $target ) = @_;
296         my $rel = $self->get_relationship( $source, $target );
297         throw( "No relationship defined between $source and $target" ) unless $rel;
298         my @vectors = ( [ $source, $target ] );
299         $self->_remove_relationship( $source, $target );
300         if( $rel->nonlocal ) {
301                 # Remove the relationship wherever it occurs.
302                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
303                         $self->relationships;
304                 foreach my $re ( @rel_edges ) {
305                         $self->_remove_relationship( @$re );
306                         push( @vectors, $re );
307                 }
308         }
309         return @vectors;
310 }
311
312 sub _remove_relationship {
313         my( $self, @vector ) = @_;
314         $self->graph->delete_edge( @vector );
315 }
316         
317 =head2 relationship_valid( $source, $target, $type )
318
319 Checks whether a relationship of type $type may exist between the readings given
320 in $source and $target.  Returns a tuple of ( status, message ) where status is
321 a yes/no boolean and, if the answer is no, message gives the reason why.
322
323 =cut
324
325 sub relationship_valid {
326     my( $self, $source, $target, $rel ) = @_;
327     my $c = $self->collation;
328     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
329                 # Check that the two readings do (for a repetition) or do not (for
330                 # a transposition) appear in the same witness.
331                 # TODO this might be called before witness paths are set...
332                 my %seen_wits;
333                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
334                 foreach my $w ( $c->reading_witnesses( $target ) ) {
335                         if( $seen_wits{$w} ) {
336                                 return ( 0, "Readings both occur in witness $w" ) 
337                                         if $rel eq 'transposition';
338                                 return ( 1, "ok" ) if $rel eq 'repetition';
339                 }
340                 return $rel eq 'transposition' ? ( 1, "ok" )
341                         : ( 0, "Readings occur only in distinct witnesses" );
342                 }
343         } else {
344                 # Check that linking the source and target in a relationship won't lead
345                 # to a path loop for any witness.  If they have the same rank then fine.
346                 return( 1, "ok" ) 
347                         if $c->reading( $source )->has_rank
348                                 && $c->reading( $target )->has_rank
349                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
350                 
351                 # Otherwise, first make a lookup table of all the
352                 # readings related to either the source or the target.
353                 my @proposed_related = ( $source, $target );
354                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
355                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
356                 my %pr_ids;
357                 map { $pr_ids{ $_ } = 1 } @proposed_related;
358         
359                 # The cumulative predecessors and successors of the proposed-related readings
360                 # should not overlap.
361                 my %all_pred;
362                 my %all_succ;
363                 foreach my $pr ( keys %pr_ids ) {
364                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
365                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
366                 }
367                 foreach my $k ( keys %all_pred ) {
368                         return( 0, "Relationship would create witness loop" )
369                                 if exists $all_succ{$k};
370                 }
371                 foreach my $k ( keys %pr_ids ) {
372                         return( 0, "Relationship would create witness loop" )
373                                 if exists $all_pred{$k} || exists $all_succ{$k};
374                 }
375                 return ( 1, "ok" );
376         }
377 }
378
379 =head2 related_readings( $reading, $filter )
380
381 Returns a list of readings that are connected via relationship links to $reading.
382 If $filter is set to a subroutine ref, returns only those related readings where
383 $filter( $relationship ) returns a true value.
384
385 =cut
386
387 sub related_readings {
388         my( $self, $reading, $filter ) = @_;
389         my $return_object;
390         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
391                 $reading = $reading->id;
392                 $return_object = 1;
393         }
394         my @answer;
395         if( $filter ) {
396                 # Backwards compat
397                 if( $filter eq 'colocated' ) {
398                         $filter = sub { $_[0]->colocated };
399                 }
400                 my %found = ( $reading => 1 );
401                 my $check = [ $reading ];
402                 my $iter = 0;
403                 while( @$check ) {
404                         my $more = [];
405                         foreach my $r ( @$check ) {
406                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
407                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
408                                                 push( @$more, $nr ) unless exists $found{$nr};
409                                                 $found{$nr} = 1;
410                                         }
411                                 }
412                         }
413                         $check = $more;
414                 }
415                 delete $found{$reading};
416                 @answer = keys %found;
417         } else {
418                 @answer = $self->graph->all_reachable( $reading );
419         }
420         if( $return_object ) {
421                 my $c = $self->collation;
422                 return map { $c->reading( $_ ) } @answer;
423         } else {
424                 return @answer;
425         }
426 }
427
428 =head2 merge_readings( $kept, $deleted );
429
430 Makes a best-effort merge of the relationship links between the given readings, and
431 stops tracking the to-be-deleted reading.
432
433 =cut
434
435 sub merge_readings {
436         my( $self, $kept, $deleted, $combined ) = @_;
437         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
438                 # Get the pair of kept / rel
439                 my @vector = ( $kept );
440                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
441                 next if $vector[0] eq $vector[1]; # Don't add a self loop
442                 
443                 # If kept changes its text, drop the relationship.
444                 next if $combined;
445                         
446                 # If kept / rel already has a relationship, warn and keep the old
447                 my $rel = $self->get_relationship( @vector );
448                 if( $rel ) {
449                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
450                         next;
451                 }
452                 
453                 # Otherwise, adopt the relationship that would be deleted.
454                 $rel = $self->get_relationship( @$edge );
455                 $self->_set_relationship( $rel, @vector );
456         }
457         $self->delete_reading( $deleted );
458 }
459
460 sub _as_graphml { 
461         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
462         
463     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
464         $rgraph->setAttribute( 'edgedefault', 'directed' );
465     $rgraph->setAttribute( 'id', 'relationships', );
466     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
467     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
468     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
469     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
470     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
471     
472     # Add the vertices according to their XML IDs
473     my %rdg_lookup = ( reverse %$node_hash );
474     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
475         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
476         $n_el->setAttribute( 'id', $n );
477         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
478     }
479     
480     # Add the relationship edges, with their object information
481     my $edge_ctr = 0;
482     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
483         # Add an edge and fill in its relationship info.
484                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
485                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
486                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
487                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
488
489                 my $rel_obj = $self->get_relationship( @$e );
490                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
491                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
492                 _add_graphml_data( $edge_el, $edge_keys->{'annotation'}, $rel_obj->annotation );
493                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
494                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
495                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
496                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
497         }
498 }
499
500 sub _by_xmlid {
501         my $tmp_a = $a;
502         my $tmp_b = $b;
503         $tmp_a =~ s/\D//g;
504         $tmp_b =~ s/\D//g;
505         return $tmp_a <=> $tmp_b;
506 }
507
508 sub _add_graphml_data {
509     my( $el, $key, $value ) = @_;
510     return unless defined $value;
511     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
512     $data_el->setAttribute( 'key', $key );
513     $data_el->appendText( $value );
514 }
515
516 sub throw {
517         Text::Tradition::Error->throw( 
518                 'ident' => 'Relationship error',
519                 'message' => $_[0],
520                 );
521 }
522
523 no Moose;
524 __PACKAGE__->meta->make_immutable;
525
526 1;