post-processing script to get rid of punctuation
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
26
27use_ok( 'Text::Tradition::Collation::RelationshipStore' );
28
29=end testing
30
22222af9 31=head1 METHODS
32
33=head2 new( collation => $collation );
34
35Creates a new relationship store for the given collation.
36
37=cut
38
39has 'collation' => (
40 is => 'ro',
41 isa => 'Text::Tradition::Collation',
42 required => 1,
43 weak_ref => 1,
44 );
45
46has 'scopedrels' => (
47 is => 'ro',
48 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
49 default => sub { {} },
50 );
51
52has 'graph' => (
53 is => 'ro',
54 isa => 'Graph',
55 default => sub { Graph->new( undirected => 1 ) },
56 handles => {
57 relationships => 'edges',
58 add_reading => 'add_vertex',
59 delete_reading => 'delete_vertex',
60 },
61 );
62
3ae5e2ad 63=head2 get_relationship
64
65Return the relationship object, if any, that exists between two readings.
66
67=cut
68
69sub get_relationship {
70 my( $self, @vector ) = @_;
71 my $relationship;
72 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
73 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
74 }
75 return $relationship;
76}
77
78sub _set_relationship {
79 my( $self, $relationship, @vector ) = @_;
80 $self->graph->add_edge( @vector );
81 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
82}
a1615ee4 83
84sub _remove_relationship {
85 my( $self, @vector ) = @_;
86 $self->graph->delete_edge( @vector );
87}
3ae5e2ad 88
22222af9 89=head2 create
90
91Create a new relationship with the given options and return it.
92Warn and return undef if the relationship cannot be created.
93
94=cut
95
96sub create {
97 my( $self, $options ) = @_;
98 # Check to see if a relationship exists between the two given readings
99 my $source = delete $options->{'orig_a'};
100 my $target = delete $options->{'orig_b'};
3ae5e2ad 101 my $rel = $self->get_relationship( $source, $target );
102 if( $rel ) {
a7037072 103 if( $rel->type ne $options->{'type'} ) {
63778331 104 throw( "Another relationship of type " . $rel->type
105 . " already exists between $source and $target" );
22222af9 106 } else {
107 return $rel;
108 }
109 }
110
111 # Check to see if a nonlocal relationship is defined for the two readings
112 $rel = $self->scoped_relationship( $options->{'reading_a'},
113 $options->{'reading_b'} );
114 if( $rel && $rel->type eq $options->{'type'} ) {
115 return $rel;
116 } elsif( $rel ) {
63778331 117 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 118 } else {
119 $rel = Text::Tradition::Collation::Relationship->new( $options );
120 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
121 return $rel;
122 }
123}
124
125=head2 add_scoped_relationship( $rel )
126
127Keep track of relationships defined between specific readings that are scoped
128non-locally. Key on whichever reading occurs first alphabetically.
129
130=cut
131
132sub add_scoped_relationship {
133 my( $self, $rel ) = @_;
134 my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
135 if( $r ) {
136 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
137 $r->type, $rel->reading_a, $rel->reading_b );
138 return;
139 }
140 $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
141}
142
143=head2 scoped_relationship( $reading_a, $reading_b )
144
145Returns the general (document-level or global) relationship that has been defined
146between the two reading strings. Returns undef if there is no general relationship.
147
148=cut
149
150sub scoped_relationship {
151 my( $self, $rdga, $rdgb ) = @_;
152 my( $first, $second ) = sort( $rdga, $rdgb );
153 if( exists $self->scopedrels->{$first}->{$second} ) {
154 return $self->scopedrels->{$first}->{$second};
155 } else {
156 return undef;
157 }
158}
159
160=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
161
162Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
163for the possible options) between the readings given in $source and $target. Sets
164up a scoped relationship between $sourcetext and $targettext if the relationship is
165scoped non-locally.
166
167Returns a status boolean and a list of all reading pairs connected by the call to
168add_relationship.
169
170=cut
171
172sub add_relationship {
173 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
174
175 # Check the options
176 $options->{'scope'} = 'local' unless $options->{'scope'};
177
178 my( $is_valid, $reason ) =
179 $self->relationship_valid( $source, $target, $options->{'type'} );
180 unless( $is_valid ) {
63778331 181 throw( "Invalid relationship: $reason" );
22222af9 182 }
183
184 # Try to create the relationship object.
185 $options->{'reading_a'} = $source_rdg->text;
186 $options->{'reading_b'} = $target_rdg->text;
187 $options->{'orig_a'} = $source;
188 $options->{'orig_b'} = $target;
63778331 189 my $relationship = $self->create( $options ); # Will throw on error
22222af9 190
191 # Find all the pairs for which we need to set the relationship.
192 my @vectors = ( [ $source, $target ] );
193 if( $relationship->colocated && $relationship->nonlocal ) {
194 my $c = $self->collation;
195 # Set the same relationship everywhere we can, throughout the graph.
196 my @identical_readings = grep { $_->text eq $relationship->reading_a }
197 $c->readings;
198 foreach my $ir ( @identical_readings ) {
cf6c01be 199 next if $ir->id eq $source;
22222af9 200 # Check to see if there is a target reading with the same text at
201 # the same rank.
202 my @itarget = grep
203 { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
204 $c->readings;
205 if( @itarget ) {
206 # We found a hit.
207 warn "More than one reading with text " . $target_rdg->text
208 . " at rank " . $ir->rank . "!" if @itarget > 1;
cf6c01be 209 push( @vectors, [ $ir->id, $itarget[0]->id ] );
22222af9 210 }
211 }
212 }
213
214 # Now set the relationship(s).
215 my @pairs_set;
216 foreach my $v ( @vectors ) {
3ae5e2ad 217 my $rel = $self->get_relationship( @$v );
218 if( $rel ) {
63778331 219 if( $rel->nonlocal ) {
220 throw( "Found conflicting relationship at @$v" );
221 } else {
222 warn "Not overriding local relationship set at @$v";
223 }
3ae5e2ad 224 next;
22222af9 225 }
3ae5e2ad 226 $self->_set_relationship( $relationship, @$v );
22222af9 227 push( @pairs_set, $v );
228 }
229
63778331 230 return @pairs_set;
22222af9 231}
232
233=head2 relationship_valid( $source, $target, $type )
234
235Checks whether a relationship of type $type may exist between the readings given
236in $source and $target. Returns a tuple of ( status, message ) where status is
237a yes/no boolean and, if the answer is no, message gives the reason why.
238
239=cut
240
241sub relationship_valid {
242 my( $self, $source, $target, $rel ) = @_;
243 my $c = $self->collation;
244 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
245 # Check that the two readings do (for a repetition) or do not (for
246 # a transposition) appear in the same witness.
247 my %seen_wits;
248 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
249 foreach my $w ( $c->reading_witnesses( $target ) ) {
250 if( $seen_wits{$w} ) {
251 return ( 0, "Readings both occur in witness $w" )
252 if $rel eq 'transposition';
253 return ( 1, "ok" ) if $rel eq 'repetition';
254 }
255 return $rel eq 'transposition' ? ( 1, "ok" )
256 : ( 0, "Readings occur only in distinct witnesses" );
257 }
258 } else {
259 # Check that linking the source and target in a relationship won't lead
a1615ee4 260 # to a path loop for any witness. If they have the same rank then fine.
261 return( 1, "ok" )
262 if $c->reading( $source )->rank == $c->reading( $target )->rank;
263
264 # Otherwise, first make a lookup table of all the
22222af9 265 # readings related to either the source or the target.
266 my @proposed_related = ( $source, $target );
267 push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
268 push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
269 my %pr_ids;
270 map { $pr_ids{ $_ } = 1 } @proposed_related;
271
a1615ee4 272 # The cumulative predecessors and successors of the proposed-related readings
273 # should not overlap.
274 my %all_pred;
275 my %all_succ;
22222af9 276 foreach my $pr ( keys %pr_ids ) {
a1615ee4 277 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
278 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
279 }
280 foreach my $k ( keys %all_pred ) {
281 return( 0, "Relationship would create witness loop" )
282 if exists $all_succ{$k};
283 }
284 foreach my $k ( keys %pr_ids ) {
285 return( 0, "Relationship would create witness loop" )
286 if exists $all_pred{$k} || exists $all_succ{$k};
287 }
22222af9 288 return ( 1, "ok" );
289 }
290}
291
292=head2 related_readings( $reading, $colocated_only )
293
294Returns a list of readings that are connected via relationship links to $reading.
295If $colocated_only is true, restricts the list to those readings that are in the
296same logical location (and therefore have the same rank in the collation graph.)
297
298=cut
299
300sub related_readings {
301 my( $self, $reading, $colocated ) = @_;
302 my $return_object;
303 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
304 $reading = $reading->id;
305 $return_object = 1;
306 }
c84275ff 307 my @answer;
22222af9 308 if( $colocated ) {
c84275ff 309 my %found = ( $reading => 1 );
310 my $check = [ $reading ];
311 my $iter = 0;
312 while( @$check ) {
c84275ff 313 my $more = [];
314 foreach my $r ( @$check ) {
315 foreach my $nr ( $self->graph->neighbors( $r ) ) {
3ae5e2ad 316 if( $self->get_relationship( $r, $nr )->colocated ) {
c84275ff 317 push( @$more, $nr ) unless exists $found{$nr};
318 $found{$nr} = 1;
319 }
320 }
321 }
322 $check = $more;
22222af9 323 }
c84275ff 324 @answer = keys %found;
325 } else {
326 @answer = $self->graph->all_reachable( $reading );
22222af9 327 }
328 if( $return_object ) {
329 my $c = $self->collation;
c84275ff 330 return map { $c->reading( $_ ) } @answer;
22222af9 331 } else {
c84275ff 332 return @answer;
22222af9 333 }
334}
335
336=head2 merge_readings( $kept, $deleted );
337
338Makes a best-effort merge of the relationship links between the given readings, and
339stops tracking the to-be-deleted reading.
340
341=cut
342
343sub merge_readings {
344 my( $self, $kept, $deleted, $combined ) = @_;
345 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
346 # Get the pair of kept / rel
347 my @vector = ( $kept );
348 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
349 next if $vector[0] eq $vector[1]; # Don't add a self loop
350
351 # If kept changes its text, drop the relationship.
352 next if $combined;
353
354 # If kept / rel already has a relationship, warn and keep the old
3ae5e2ad 355 my $rel = $self->get_relationship( @vector );
356 if( $rel ) {
22222af9 357 warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
358 next;
359 }
360
361 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 362 $rel = $self->get_relationship( @$edge );
363 $self->_set_relationship( $rel, @vector );
22222af9 364 }
365 $self->delete_reading( $deleted );
366}
367
027d819c 368sub _as_graphml {
2626f709 369 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 370
371 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
372 $rgraph->setAttribute( 'edgedefault', 'directed' );
373 $rgraph->setAttribute( 'id', 'relationships', );
374 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
375 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
376 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
377 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
378 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
379
380 # Add the vertices according to their XML IDs
2626f709 381 my %rdg_lookup = ( reverse %$node_hash );
382 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
c84275ff 383 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
384 $n_el->setAttribute( 'id', $n );
2626f709 385 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 386 }
387
388 # Add the relationship edges, with their object information
389 my $edge_ctr = 0;
390 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
391 # Add an edge and fill in its relationship info.
392 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
393 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
394 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
395 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
396
3ae5e2ad 397 my $rel_obj = $self->get_relationship( @$e );
c84275ff 398 _add_graphml_data( $edge_el, $edge_keys->{'relationship'}, $rel_obj->type );
399 _add_graphml_data( $edge_el, $edge_keys->{'scope'}, $rel_obj->scope );
400 _add_graphml_data( $edge_el, $edge_keys->{'non_correctable'},
401 $rel_obj->non_correctable ) if $rel_obj->noncorr_set;
402 _add_graphml_data( $edge_el, $edge_keys->{'non_independent'},
403 $rel_obj->non_independent ) if $rel_obj->nonind_set;
404 }
405}
406
407sub _by_xmlid {
2626f709 408 my $tmp_a = $a;
409 my $tmp_b = $b;
410 $tmp_a =~ s/\D//g;
411 $tmp_b =~ s/\D//g;
412 return $tmp_a <=> $tmp_b;
c84275ff 413}
414
415sub _add_graphml_data {
416 my( $el, $key, $value ) = @_;
417 return unless defined $value;
418 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
419 $data_el->setAttribute( 'key', $key );
420 $data_el->appendText( $value );
83d5ac3a 421}
422
63778331 423sub throw {
424 Text::Tradition::Error->throw(
425 'ident' => 'Relationship error',
426 'message' => $_[0],
427 );
428}
429
22222af9 430no Moose;
431__PACKAGE__->meta->make_immutable;
432
4331;