post-processing script to get rid of punctuation
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26
27 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
28
29 =end testing
30
31 =head1 METHODS
32
33 =head2 new( collation => $collation );
34
35 Creates a new relationship store for the given collation.
36
37 =cut
38
39 has 'collation' => (
40         is => 'ro',
41         isa => 'Text::Tradition::Collation',
42         required => 1,
43         weak_ref => 1,
44         );
45
46 has 'scopedrels' => (
47         is => 'ro',
48         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
49         default => sub { {} },
50         );
51
52 has 'graph' => (
53         is => 'ro',
54         isa => 'Graph',
55         default => sub { Graph->new( undirected => 1 ) },
56     handles => {
57         relationships => 'edges',
58         add_reading => 'add_vertex',
59         delete_reading => 'delete_vertex',
60     },
61         );
62         
63 =head2 get_relationship
64
65 Return the relationship object, if any, that exists between two readings.
66
67 =cut
68
69 sub get_relationship {
70         my( $self, @vector ) = @_;
71         my $relationship;
72         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
73                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
74         }
75         return $relationship;
76 }
77
78 sub _set_relationship {
79         my( $self, $relationship, @vector ) = @_;
80         $self->graph->add_edge( @vector );
81         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
82 }
83
84 sub _remove_relationship {
85         my( $self, @vector ) = @_;
86         $self->graph->delete_edge( @vector );
87 }
88         
89 =head2 create
90
91 Create a new relationship with the given options and return it.
92 Warn and return undef if the relationship cannot be created.
93
94 =cut
95
96 sub create {
97         my( $self, $options ) = @_;
98         # Check to see if a relationship exists between the two given readings
99         my $source = delete $options->{'orig_a'};
100         my $target = delete $options->{'orig_b'};
101         my $rel = $self->get_relationship( $source, $target );
102         if( $rel ) {
103                 if( $rel->type ne $options->{'type'} ) {
104                         throw( "Another relationship of type " . $rel->type 
105                                 . " already exists between $source and $target" );
106                 } else {
107                         return $rel;
108                 }
109         }
110         
111         # Check to see if a nonlocal relationship is defined for the two readings
112         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
113                 $options->{'reading_b'} );
114         if( $rel && $rel->type eq $options->{'type'} ) {
115                 return $rel;
116         } elsif( $rel ) {
117                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
118         } else {
119                 $rel = Text::Tradition::Collation::Relationship->new( $options );
120                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
121                 return $rel;
122         }
123 }
124
125 =head2 add_scoped_relationship( $rel )
126
127 Keep track of relationships defined between specific readings that are scoped
128 non-locally.  Key on whichever reading occurs first alphabetically.
129
130 =cut
131
132 sub add_scoped_relationship {
133         my( $self, $rel ) = @_;
134         my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
135         if( $r ) {
136                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
137                         $r->type, $rel->reading_a, $rel->reading_b );
138                 return;
139         }
140         $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
141 }
142
143 =head2 scoped_relationship( $reading_a, $reading_b )
144
145 Returns the general (document-level or global) relationship that has been defined 
146 between the two reading strings. Returns undef if there is no general relationship.
147
148 =cut
149
150 sub scoped_relationship {
151         my( $self, $rdga, $rdgb ) = @_;
152         my( $first, $second ) = sort( $rdga, $rdgb );
153         if( exists $self->scopedrels->{$first}->{$second} ) {
154                 return $self->scopedrels->{$first}->{$second};
155         } else {
156                 return undef;
157         }
158 }
159
160 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
161
162 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
163 for the possible options) between the readings given in $source and $target.  Sets
164 up a scoped relationship between $sourcetext and $targettext if the relationship is
165 scoped non-locally.
166
167 Returns a status boolean and a list of all reading pairs connected by the call to
168 add_relationship.
169
170 =cut
171
172 sub add_relationship {
173         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
174
175         # Check the options
176         $options->{'scope'} = 'local' unless $options->{'scope'};
177         
178         my( $is_valid, $reason ) = 
179                 $self->relationship_valid( $source, $target, $options->{'type'} );
180     unless( $is_valid ) {
181         throw( "Invalid relationship: $reason" );
182     }
183     
184     # Try to create the relationship object.
185     $options->{'reading_a'} = $source_rdg->text;
186     $options->{'reading_b'} = $target_rdg->text;
187     $options->{'orig_a'} = $source;
188     $options->{'orig_b'} = $target;
189     my $relationship = $self->create( $options );  # Will throw on error
190
191         # Find all the pairs for which we need to set the relationship.
192         my @vectors = ( [ $source, $target ] ); 
193     if( $relationship->colocated && $relationship->nonlocal ) {
194         my $c = $self->collation;
195         # Set the same relationship everywhere we can, throughout the graph.
196         my @identical_readings = grep { $_->text eq $relationship->reading_a }
197                 $c->readings;
198         foreach my $ir ( @identical_readings ) {
199                 next if $ir->id eq $source;
200                 # Check to see if there is a target reading with the same text at
201                 # the same rank.
202                 my @itarget = grep 
203                         { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
204                         $c->readings;
205                 if( @itarget ) {
206                         # We found a hit.
207                         warn "More than one reading with text " . $target_rdg->text
208                                 . " at rank " . $ir->rank . "!" if @itarget > 1;
209                         push( @vectors, [ $ir->id, $itarget[0]->id ] );
210                 }
211         }       
212     }
213     
214     # Now set the relationship(s).
215     my @pairs_set;
216     foreach my $v ( @vectors ) {
217                 my $rel = $self->get_relationship( @$v );
218         if( $rel ) {
219                 if( $rel->nonlocal ) {
220                         throw( "Found conflicting relationship at @$v" );
221                 } else {
222                         warn "Not overriding local relationship set at @$v";
223                 }
224                 next;
225         }
226         $self->_set_relationship( $relationship, @$v );
227         push( @pairs_set, $v );
228     }
229     
230     return @pairs_set;
231 }
232
233 =head2 relationship_valid( $source, $target, $type )
234
235 Checks whether a relationship of type $type may exist between the readings given
236 in $source and $target.  Returns a tuple of ( status, message ) where status is
237 a yes/no boolean and, if the answer is no, message gives the reason why.
238
239 =cut
240
241 sub relationship_valid {
242     my( $self, $source, $target, $rel ) = @_;
243     my $c = $self->collation;
244     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
245                 # Check that the two readings do (for a repetition) or do not (for
246                 # a transposition) appear in the same witness.
247                 my %seen_wits;
248                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
249                 foreach my $w ( $c->reading_witnesses( $target ) ) {
250                         if( $seen_wits{$w} ) {
251                                 return ( 0, "Readings both occur in witness $w" ) 
252                                         if $rel eq 'transposition';
253                                 return ( 1, "ok" ) if $rel eq 'repetition';
254                 }
255                 return $rel eq 'transposition' ? ( 1, "ok" )
256                         : ( 0, "Readings occur only in distinct witnesses" );
257                 }
258         } else {
259                 # Check that linking the source and target in a relationship won't lead
260                 # to a path loop for any witness.  If they have the same rank then fine.
261                 return( 1, "ok" ) 
262                         if $c->reading( $source )->rank == $c->reading( $target )->rank;
263                 
264                 # Otherwise, first make a lookup table of all the
265                 # readings related to either the source or the target.
266                 my @proposed_related = ( $source, $target );
267                 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
268                 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
269                 my %pr_ids;
270                 map { $pr_ids{ $_ } = 1 } @proposed_related;
271         
272                 # The cumulative predecessors and successors of the proposed-related readings
273                 # should not overlap.
274                 my %all_pred;
275                 my %all_succ;
276                 foreach my $pr ( keys %pr_ids ) {
277                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
278                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
279                 }
280                 foreach my $k ( keys %all_pred ) {
281                         return( 0, "Relationship would create witness loop" )
282                                 if exists $all_succ{$k};
283                 }
284                 foreach my $k ( keys %pr_ids ) {
285                         return( 0, "Relationship would create witness loop" )
286                                 if exists $all_pred{$k} || exists $all_succ{$k};
287                 }
288                 return ( 1, "ok" );
289         }
290 }
291
292 =head2 related_readings( $reading, $colocated_only )
293
294 Returns a list of readings that are connected via relationship links to $reading.
295 If $colocated_only is true, restricts the list to those readings that are in the
296 same logical location (and therefore have the same rank in the collation graph.)
297
298 =cut
299
300 sub related_readings {
301         my( $self, $reading, $colocated ) = @_;
302         my $return_object;
303         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
304                 $reading = $reading->id;
305                 $return_object = 1;
306         }
307         my @answer;
308         if( $colocated ) {
309                 my %found = ( $reading => 1 );
310                 my $check = [ $reading ];
311                 my $iter = 0;
312                 while( @$check ) {
313                         my $more = [];
314                         foreach my $r ( @$check ) {
315                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
316                                         if( $self->get_relationship( $r, $nr )->colocated ) {
317                                                 push( @$more, $nr ) unless exists $found{$nr};
318                                                 $found{$nr} = 1;
319                                         }
320                                 }
321                         }
322                         $check = $more;
323                 }
324                 @answer = keys %found;
325         } else {
326                 @answer = $self->graph->all_reachable( $reading );
327         }
328         if( $return_object ) {
329                 my $c = $self->collation;
330                 return map { $c->reading( $_ ) } @answer;
331         } else {
332                 return @answer;
333         }
334 }
335
336 =head2 merge_readings( $kept, $deleted );
337
338 Makes a best-effort merge of the relationship links between the given readings, and
339 stops tracking the to-be-deleted reading.
340
341 =cut
342
343 sub merge_readings {
344         my( $self, $kept, $deleted, $combined ) = @_;
345         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
346                 # Get the pair of kept / rel
347                 my @vector = ( $kept );
348                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
349                 next if $vector[0] eq $vector[1]; # Don't add a self loop
350                 
351                 # If kept changes its text, drop the relationship.
352                 next if $combined;
353                         
354                 # If kept / rel already has a relationship, warn and keep the old
355                 my $rel = $self->get_relationship( @vector );
356                 if( $rel ) {
357                         warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
358                         next;
359                 }
360                 
361                 # Otherwise, adopt the relationship that would be deleted.
362                 $rel = $self->get_relationship( @$edge );
363                 $self->_set_relationship( $rel, @vector );
364         }
365         $self->delete_reading( $deleted );
366 }
367
368 sub _as_graphml { 
369         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
370         
371     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
372         $rgraph->setAttribute( 'edgedefault', 'directed' );
373     $rgraph->setAttribute( 'id', 'relationships', );
374     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
375     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
376     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
377     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
378     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
379     
380     # Add the vertices according to their XML IDs
381     my %rdg_lookup = ( reverse %$node_hash );
382     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
383         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
384         $n_el->setAttribute( 'id', $n );
385         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
386     }
387     
388     # Add the relationship edges, with their object information
389     my $edge_ctr = 0;
390     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
391         # Add an edge and fill in its relationship info.
392                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
393                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
394                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
395                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
396
397                 my $rel_obj = $self->get_relationship( @$e );
398                 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
399                 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
400                 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'}, 
401                         $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
402                 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'}, 
403                         $rel_obj->non_independent ) if $rel_obj->nonind_set;
404         }
405 }
406
407 sub _by_xmlid {
408         my $tmp_a = $a;
409         my $tmp_b = $b;
410         $tmp_a =~ s/\D//g;
411         $tmp_b =~ s/\D//g;
412         return $tmp_a <=> $tmp_b;
413 }
414
415 sub _add_graphml_data {
416     my( $el, $key, $value ) = @_;
417     return unless defined $value;
418     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
419     $data_el->setAttribute( 'key', $key );
420     $data_el->appendText( $value );
421 }
422
423 sub throw {
424         Text::Tradition::Error->throw( 
425                 'ident' => 'Relationship error',
426                 'message' => $_[0],
427                 );
428 }
429
430 no Moose;
431 __PACKAGE__->meta->make_immutable;
432
433 1;