refrain from serializing init flag
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
359944f7 88=head2 equivalence_graph()
89
90Returns an equivalence graph of the collation, in which all readings
91related via a 'colocated' relationship are transformed into a single
92vertex. Can be used to determine the validity of a new relationship.
93
94=cut
95
96has 'equivalence_graph' => (
97 is => 'ro',
98 isa => 'Graph',
99 default => sub { Graph->new() },
e1083e99 100 writer => '_reset_equivalence',
359944f7 101 );
102
103has '_node_equivalences' => (
104 is => 'ro',
105 traits => ['Hash'],
106 handles => {
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
04482188 110 _clear_equivalence => 'clear',
359944f7 111 },
112 );
113
114has '_equivalence_readings' => (
115 is => 'ro',
116 traits => ['Hash'],
117 handles => {
118 eqreadings => 'get',
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
04482188 121 _clear_eqreadings => 'clear',
359944f7 122 },
123 );
124
125around add_reading => sub {
126 my $orig = shift;
127 my $self = shift;
128
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
132 $self->$orig( @_ );
133};
134
135around delete_reading => sub {
136 my $orig = shift;
137 my $self = shift;
138
359944f7 139 $self->_remove_equivalence_node( @_ );
140 $self->$orig( @_ );
141};
142
3ae5e2ad 143=head2 get_relationship
144
145Return the relationship object, if any, that exists between two readings.
146
147=cut
148
149sub get_relationship {
4633f9e4 150 my $self = shift;
151 my @vector;
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
154 my $edge = shift;
155 @vector = @$edge;
156 } else {
157 @vector = @_;
158 }
3ae5e2ad 159 my $relationship;
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 162 }
3ae5e2ad 163 return $relationship;
164}
165
166sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
176badfe 170 $self->_make_equivalence( @vector ) if $relationship->colocated;
3ae5e2ad 171}
a1615ee4 172
22222af9 173=head2 create
174
175Create a new relationship with the given options and return it.
176Warn and return undef if the relationship cannot be created.
177
178=cut
179
180sub create {
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
3ae5e2ad 185 my $rel = $self->get_relationship( $source, $target );
186 if( $rel ) {
3d14b48e 187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
189 # one, if asked.
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
63778331 192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
22222af9 194 } else {
195 return $rel;
196 }
197 }
198
199 # Check to see if a nonlocal relationship is defined for the two readings
200 $rel = $self->scoped_relationship( $options->{'reading_a'},
201 $options->{'reading_b'} );
202 if( $rel && $rel->type eq $options->{'type'} ) {
203 return $rel;
204 } elsif( $rel ) {
63778331 205 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 206 } else {
207 $rel = Text::Tradition::Collation::Relationship->new( $options );
208 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
209 return $rel;
210 }
211}
212
213=head2 add_scoped_relationship( $rel )
214
215Keep track of relationships defined between specific readings that are scoped
216non-locally. Key on whichever reading occurs first alphabetically.
217
218=cut
219
220sub add_scoped_relationship {
221 my( $self, $rel ) = @_;
f222800e 222 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
223 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
224 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 225 if( $r ) {
226 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 227 $r->type, $rdga, $rdgb );
22222af9 228 return;
229 }
f222800e 230 my( $first, $second ) = sort ( $rdga, $rdgb );
231 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 232}
233
234=head2 scoped_relationship( $reading_a, $reading_b )
235
236Returns the general (document-level or global) relationship that has been defined
237between the two reading strings. Returns undef if there is no general relationship.
238
239=cut
240
241sub scoped_relationship {
242 my( $self, $rdga, $rdgb ) = @_;
243 my( $first, $second ) = sort( $rdga, $rdgb );
244 if( exists $self->scopedrels->{$first}->{$second} ) {
245 return $self->scopedrels->{$first}->{$second};
246 } else {
247 return undef;
248 }
249}
250
251=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
252
253Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
254for the possible options) between the readings given in $source and $target. Sets
255up a scoped relationship between $sourcetext and $targettext if the relationship is
256scoped non-locally.
257
258Returns a status boolean and a list of all reading pairs connected by the call to
259add_relationship.
260
6d381462 261=begin testing
262
263use Text::Tradition;
264use TryCatch;
265
266my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
176badfe 267# Test 1.1: try to equate nodes that are prevented with an intermediate collation
6d381462 268ok( $t1, "Parsed test fragment file" );
269my $c1 = $t1->collation;
10e4b1ac 270my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
6d381462 271is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
272 "Troublesome relationship exists" );
273is( $trel->type, 'collated', "Troublesome relationship is a collation" );
274
275# Try to make the link we want
276try {
10e4b1ac 277 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
6d381462 278 ok( 1, "Added cross-collation relationship as expected" );
176badfe 279} catch( Text::Tradition::Error $e ) {
280 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
6d381462 281}
282
283try {
284 $c1->calculate_ranks();
285 ok( 1, "Successfully calculated ranks" );
176badfe 286} catch ( Text::Tradition::Error $e ) {
287 ok( 0, "Collation now has a cycle: " . $e->message );
6d381462 288}
289
176badfe 290# Test 1.2: attempt merge of an identical reading
359944f7 291try {
10e4b1ac 292 $c1->merge_readings( 'r9.3', 'r11.5' );
359944f7 293 ok( 1, "Successfully merged reading 'pontifex'" );
294} catch ( Text::Tradition::Error $e ) {
295 ok( 0, "Merge of mergeable readings failed: $e->message" );
296
297}
298
176badfe 299# Test 1.3: attempt relationship with a meta reading (should fail)
300try {
10e4b1ac 301 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
176badfe 302 ok( 0, "Allowed a meta-reading to be used in a relationship" );
303} catch ( Text::Tradition::Error $e ) {
304 is( $e->message, 'Cannot set relationship on a meta reading',
305 "Relationship link prevented for a meta reading" );
306}
307
308# Test 2.1: try to equate nodes that are prevented with a real intermediate
6d381462 309# equivalence
6d381462 310my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
6d381462 311my $c2 = $t2->collation;
10e4b1ac 312$c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
313my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
6d381462 314is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
315 "Created blocking relationship" );
316is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
317# This time the link ought to fail
318try {
10e4b1ac 319 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
414cc046 320 ok( 0, "Added cross-equivalent bad relationship" );
176badfe 321} catch ( Text::Tradition::Error $e ) {
322 like( $e->message, qr/witness loop/,
323 "Existing equivalence blocked crossing relationship" );
6d381462 324}
325
326try {
327 $c2->calculate_ranks();
328 ok( 1, "Successfully calculated ranks" );
176badfe 329} catch ( Text::Tradition::Error $e ) {
330 ok( 0, "Collation now has a cycle: " . $e->message );
6d381462 331}
332
176badfe 333# Test 3.1: make a straightforward pair of transpositions.
cc31ebaa 334my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
335# Test 1: try to equate nodes that are prevented with an intermediate collation
336my $c3 = $t3->collation;
337try {
10e4b1ac 338 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
cc31ebaa 339 ok( 1, "Added straightforward transposition" );
176badfe 340} catch ( Text::Tradition::Error $e ) {
341 ok( 0, "Failed to add normal transposition: " . $e->message );
cc31ebaa 342}
343try {
10e4b1ac 344 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
cc31ebaa 345 ok( 1, "Added straightforward transposition complement" );
176badfe 346} catch ( Text::Tradition::Error $e ) {
347 ok( 0, "Failed to add normal transposition complement: " . $e->message );
cc31ebaa 348}
349
176badfe 350# Test 3.2: try to make a transposition that could be a parallel.
cc31ebaa 351try {
10e4b1ac 352 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
cc31ebaa 353 ok( 0, "Added bad colocated transposition" );
176badfe 354} catch ( Text::Tradition::Error $e ) {
355 like( $e->message, qr/Readings appear to be colocated/,
356 "Prevented bad colocated transposition" );
cc31ebaa 357}
358
176badfe 359# Test 3.3: make the parallel, and then make the transposition again.
cc31ebaa 360try {
10e4b1ac 361 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
cc31ebaa 362 ok( 1, "Equated identical readings for transposition" );
176badfe 363} catch ( Text::Tradition::Error $e ) {
364 ok( 0, "Failed to equate identical readings: " . $e->message );
cc31ebaa 365}
366try {
10e4b1ac 367 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
cc31ebaa 368 ok( 1, "Added straightforward transposition complement" );
176badfe 369} catch ( Text::Tradition::Error $e ) {
370 ok( 0, "Failed to add normal transposition complement: " . $e->message );
cc31ebaa 371}
372
6d381462 373=end testing
374
22222af9 375=cut
376
377sub add_relationship {
414cc046 378 my( $self, $source, $target, $options ) = @_;
379 my $c = $self->collation;
176badfe 380 my $sourceobj = $c->reading( $source );
381 my $targetobj = $c->reading( $target );
359944f7 382 throw( "Adding self relationship at $source" ) if $source eq $target;
176badfe 383 throw( "Cannot set relationship on a meta reading" )
384 if( $sourceobj->is_meta || $targetobj->is_meta );
ca6e6095 385 my $relationship;
386 my $thispaironly;
414cc046 387 my $droppedcolls = [];
ca6e6095 388 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
389 $relationship = $options;
390 $thispaironly = 1; # If existing rel, set only where asked.
391 } else {
392 # Check the options
393 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 394 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
8d5c8893 395 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
ca6e6095 396
414cc046 397 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
398 $options->{'type'}, $droppedcolls );
ca6e6095 399 unless( $is_valid ) {
400 throw( "Invalid relationship: $reason" );
401 }
402
403 # Try to create the relationship object.
176badfe 404 $options->{'reading_a'} = $sourceobj->text;
405 $options->{'reading_b'} = $targetobj->text;
ca6e6095 406 $options->{'orig_a'} = $source;
407 $options->{'orig_b'} = $target;
0ac5e750 408 if( $options->{'scope'} ne 'local' ) {
409 # Is there a relationship with this a & b already?
f222800e 410 # Case-insensitive for non-orthographics.
411 my $rdga = $options->{'type'} eq 'orthographic'
412 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
413 my $rdgb = $options->{'type'} eq 'orthographic'
414 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
415 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 416 if( $otherrel && $otherrel->type eq $options->{type}
417 && $otherrel->scope eq $options->{scope} ) {
418 warn "Applying existing scoped relationship";
419 $relationship = $otherrel;
420 }
421 }
422 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 423 }
ca6e6095 424
22222af9 425
426 # Find all the pairs for which we need to set the relationship.
414cc046 427 my @vectors;
ca6e6095 428 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 429 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 430 }
bf6e338d 431
22222af9 432 # Now set the relationship(s).
433 my @pairs_set;
414cc046 434 my $rel = $self->get_relationship( $source, $target );
cc31ebaa 435 my $skip;
414cc046 436 if( $rel && $rel ne $relationship ) {
437 if( $rel->nonlocal ) {
438 throw( "Found conflicting relationship at $source - $target" );
439 } elsif( $rel->type ne 'collated' ) {
440 # Replace a collation relationship; leave any other sort in place.
441 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
442 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
443 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
444 warn sprintf( "Not overriding local relationship %s with global %s "
445 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
446 $source, $target, $rel->reading_a, $rel->reading_b );
cc31ebaa 447 $skip = 1;
414cc046 448 }
449 }
450 }
cc31ebaa 451 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
414cc046 452 push( @pairs_set, [ $source, $target ] );
453
454 # Set any additional relationships that might be in @vectors.
22222af9 455 foreach my $v ( @vectors ) {
414cc046 456 next if $v->[0] eq $source && $v->[1] eq $target;
457 next if $v->[1] eq $source && $v->[0] eq $target;
458 my @added = $self->add_relationship( @$v, $relationship );
459 push( @pairs_set, @added );
22222af9 460 }
461
414cc046 462 # Finally, restore whatever collations we can, and return.
463 $self->_restore_collations( @$droppedcolls );
63778331 464 return @pairs_set;
22222af9 465}
466
9d829138 467=head2 del_scoped_relationship( $reading_a, $reading_b )
468
469Returns the general (document-level or global) relationship that has been defined
470between the two reading strings. Returns undef if there is no general relationship.
471
472=cut
473
474sub del_scoped_relationship {
475 my( $self, $rdga, $rdgb ) = @_;
476 my( $first, $second ) = sort( $rdga, $rdgb );
477 return delete $self->scopedrels->{$first}->{$second};
478}
479
bf6e338d 480sub _find_applicable {
481 my( $self, $rel ) = @_;
482 my $c = $self->collation;
483 # TODO Someday we might use a case sensitive language.
484 my $lang = $c->tradition->language;
485 my @vectors;
486 my @identical_readings;
487 if( $rel->type eq 'orthographic' ) {
488 @identical_readings = grep { $_->text eq $rel->reading_a }
489 $c->readings;
490 } else {
491 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
492 $c->readings;
493 }
494 foreach my $ir ( @identical_readings ) {
495 my @itarget;
496 if( $rel->type eq 'orthographic' ) {
497 @itarget = grep { $_->rank == $ir->rank
498 && $_->text eq $rel->reading_b } $c->readings;
499 } else {
500 @itarget = grep { $_->rank == $ir->rank
501 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
502 }
503 if( @itarget ) {
504 # Warn if there is more than one hit with no orth link between them.
505 my $itmain = shift @itarget;
506 if( @itarget ) {
507 my %all_targets;
508 map { $all_targets{$_} = 1 } @itarget;
509 map { delete $all_targets{$_} }
510 $self->related_readings( $itmain,
511 sub { $_[0]->type eq 'orthographic' } );
512 warn "More than one unrelated reading with text " . $itmain->text
513 . " at rank " . $ir->rank . "!" if keys %all_targets;
514 }
515 push( @vectors, [ $ir->id, $itmain->id ] );
516 }
517 }
518 return @vectors;
519}
520
ee801e17 521=head2 del_relationship( $source, $target )
522
523Removes the relationship between the given readings. If the relationship is
524non-local, removes the relationship everywhere in the graph.
525
526=cut
527
528sub del_relationship {
529 my( $self, $source, $target ) = @_;
530 my $rel = $self->get_relationship( $source, $target );
681893aa 531 return () unless $rel; # Nothing to delete; return an empty set.
359944f7 532 my $colo = $rel->colocated;
ee801e17 533 my @vectors = ( [ $source, $target ] );
359944f7 534 $self->_remove_relationship( $colo, $source, $target );
ee801e17 535 if( $rel->nonlocal ) {
536 # Remove the relationship wherever it occurs.
9d829138 537 # Remove the relationship wherever it occurs.
ee801e17 538 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
539 $self->relationships;
540 foreach my $re ( @rel_edges ) {
359944f7 541 $self->_remove_relationship( $colo, @$re );
ee801e17 542 push( @vectors, $re );
543 }
9d829138 544 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 545 }
546 return @vectors;
547}
548
ca6e6095 549sub _remove_relationship {
359944f7 550 my( $self, $equiv, @vector ) = @_;
ca6e6095 551 $self->graph->delete_edge( @vector );
176badfe 552 $self->_break_equivalence( @vector ) if $equiv;
ca6e6095 553}
554
22222af9 555=head2 relationship_valid( $source, $target, $type )
556
557Checks whether a relationship of type $type may exist between the readings given
558in $source and $target. Returns a tuple of ( status, message ) where status is
559a yes/no boolean and, if the answer is no, message gives the reason why.
560
561=cut
562
563sub relationship_valid {
414cc046 564 my( $self, $source, $target, $rel, $mustdrop ) = @_;
565 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
22222af9 566 my $c = $self->collation;
10943ab0 567 ## Assume validity is okay if we are initializing from scratch.
3579c22b 568 return ( 1, "initializing" ) unless $c->tradition->_initialized;
10943ab0 569
22222af9 570 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
571 # Check that the two readings do (for a repetition) or do not (for
572 # a transposition) appear in the same witness.
573 my %seen_wits;
574 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
575 foreach my $w ( $c->reading_witnesses( $target ) ) {
576 if( $seen_wits{$w} ) {
577 return ( 0, "Readings both occur in witness $w" )
578 if $rel eq 'transposition';
579 return ( 1, "ok" ) if $rel eq 'repetition';
d6936dea 580 }
22222af9 581 }
abadc997 582 return ( 0, "Readings occur only in distinct witnesses" )
583 if $rel eq 'repetition';
584 }
585 if ( $rel eq 'transposition' ) {
586 # We also need to check both that the readings occur in distinct
587 # witnesses, and that they are not in the same place. That is,
588 # proposing to link them should cause a witness loop.
359944f7 589 if( $self->test_equivalence( $source, $target ) ) {
abadc997 590 return ( 0, "Readings appear to be colocated, not transposed" );
359944f7 591 } else {
592 return ( 1, "ok" );
abadc997 593 }
594
595 } elsif( $rel ne 'repetition' ) {
22222af9 596 # Check that linking the source and target in a relationship won't lead
414cc046 597 # to a path loop for any witness.
598 # First, drop/stash any collations that might interfere
599 my $sourceobj = $c->reading( $source );
600 my $targetobj = $c->reading( $target );
601 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
602 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
603 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
604 push( @$mustdrop, $self->_drop_collations( $source ) );
605 push( @$mustdrop, $self->_drop_collations( $target ) );
359944f7 606 if( $c->end->has_rank ) {
176badfe 607 foreach my $rk ( $sourcerank .. $targetrank ) {
414cc046 608 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
609 $c->readings_at_rank( $rk );
610 }
611 }
a1615ee4 612 }
359944f7 613 unless( $self->test_equivalence( $source, $target ) ) {
414cc046 614 $self->_restore_collations( @$mustdrop );
615 return( 0, "Relationship would create witness loop" );
a1615ee4 616 }
22222af9 617 return ( 1, "ok" );
618 }
619}
620
778251a6 621sub _drop_collations {
622 my( $self, $reading ) = @_;
414cc046 623 my @dropped;
778251a6 624 foreach my $n ( $self->graph->neighbors( $reading ) ) {
625 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
414cc046 626 push( @dropped, [ $reading, $n ] );
778251a6 627 $self->del_relationship( $reading, $n );
359944f7 628 #print STDERR "Dropped collation $reading -> $n\n";
778251a6 629 }
630 }
414cc046 631 return @dropped;
632}
633
634sub _restore_collations {
635 my( $self, @vectors ) = @_;
636 foreach my $v ( @vectors ) {
637 try {
638 $self->add_relationship( @$v, { 'type' => 'collated' } );
359944f7 639 #print STDERR "Restored collation @$v\n";
414cc046 640 } catch {
641 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
642 }
643 }
778251a6 644}
645
cc31ebaa 646=head2 filter_collations()
647
648Utility function. Removes any redundant 'collated' relationships from the graph.
649A collated relationship is redundant if the readings in question would occupy
650the same rank regardless of the existence of the relationship.
651
652=cut
653
654sub filter_collations {
655 my $self = shift;
656 my $c = $self->collation;
657 foreach my $r ( 1 .. $c->end->rank - 1 ) {
658 my $anchor;
659 my @need_collations;
660 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
661 next if $rdg->is_meta;
662 my $ip = 0;
663 foreach my $pred ( $rdg->predecessors ) {
664 if( $pred->rank == $r - 1 ) {
665 $ip = 1;
666 $anchor = $rdg unless( $anchor );
667 last;
668 }
669 }
670 push( @need_collations, $rdg ) unless $ip;
671 $c->relations->_drop_collations( "$rdg" );
672 }
673 $anchor
46e1fe14 674 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
675 unless $c->get_relationship( $anchor, $_ ) } @need_collations
cc31ebaa 676 : warn "No anchor found at $r";
677 }
678}
679
7f52eac8 680=head2 related_readings( $reading, $filter )
22222af9 681
682Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 683If $filter is set to a subroutine ref, returns only those related readings where
684$filter( $relationship ) returns a true value.
22222af9 685
686=cut
687
688sub related_readings {
7f52eac8 689 my( $self, $reading, $filter ) = @_;
22222af9 690 my $return_object;
691 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
692 $reading = $reading->id;
693 $return_object = 1;
694 }
c84275ff 695 my @answer;
7f52eac8 696 if( $filter ) {
697 # Backwards compat
698 if( $filter eq 'colocated' ) {
699 $filter = sub { $_[0]->colocated };
700 }
c84275ff 701 my %found = ( $reading => 1 );
702 my $check = [ $reading ];
703 my $iter = 0;
704 while( @$check ) {
c84275ff 705 my $more = [];
706 foreach my $r ( @$check ) {
707 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 708 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 709 push( @$more, $nr ) unless exists $found{$nr};
710 $found{$nr} = 1;
711 }
712 }
713 }
714 $check = $more;
22222af9 715 }
7f52eac8 716 delete $found{$reading};
c84275ff 717 @answer = keys %found;
718 } else {
719 @answer = $self->graph->all_reachable( $reading );
22222af9 720 }
721 if( $return_object ) {
722 my $c = $self->collation;
c84275ff 723 return map { $c->reading( $_ ) } @answer;
22222af9 724 } else {
c84275ff 725 return @answer;
22222af9 726 }
727}
728
729=head2 merge_readings( $kept, $deleted );
730
731Makes a best-effort merge of the relationship links between the given readings, and
732stops tracking the to-be-deleted reading.
733
734=cut
735
736sub merge_readings {
737 my( $self, $kept, $deleted, $combined ) = @_;
738 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
739 # Get the pair of kept / rel
740 my @vector = ( $kept );
741 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
742 next if $vector[0] eq $vector[1]; # Don't add a self loop
743
744 # If kept changes its text, drop the relationship.
745 next if $combined;
746
f222800e 747 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 748 my $rel = $self->get_relationship( @vector );
f222800e 749 next if $rel;
22222af9 750
751 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 752 $rel = $self->get_relationship( @$edge );
753 $self->_set_relationship( $rel, @vector );
22222af9 754 }
10943ab0 755 $self->_make_equivalence( $deleted, $kept, 1 );
22222af9 756}
757
359944f7 758### Equivalence logic
759
760sub _remove_equivalence_node {
761 my( $self, $node ) = @_;
762 my $group = $self->equivalence( $node );
763 my $nodelist = $self->eqreadings( $group );
764 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
3579c22b 765 $self->equivalence_graph->delete_vertex( $group );
359944f7 766 $self->remove_eqreadings( $group );
3579c22b 767 $self->remove_equivalence( $group );
359944f7 768 } elsif( @$nodelist == 1 ) {
3579c22b 769 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
770 " in group that should have only $node" );
359944f7 771 } else {
10e4b1ac 772 my @newlist = grep { $_ ne $node } @$nodelist;
359944f7 773 $self->set_eqreadings( $group, \@newlist );
774 $self->remove_equivalence( $node );
775 }
776}
777
778=head2 add_equivalence_edge
779
176badfe 780Add an edge in the equivalence graph corresponding to $source -> $target in the
781collation. Should only be called by Collation.
359944f7 782
783=cut
784
785sub add_equivalence_edge {
786 my( $self, $source, $target ) = @_;
787 my $seq = $self->equivalence( $source );
788 my $teq = $self->equivalence( $target );
359944f7 789 $self->equivalence_graph->add_edge( $seq, $teq );
790}
791
176badfe 792=head2 delete_equivalence_edge
359944f7 793
176badfe 794Remove an edge in the equivalence graph corresponding to $source -> $target in the
795collation. Should only be called by Collation.
359944f7 796
797=cut
798
799sub delete_equivalence_edge {
800 my( $self, $source, $target ) = @_;
801 my $seq = $self->equivalence( $source );
802 my $teq = $self->equivalence( $target );
359944f7 803 $self->equivalence_graph->delete_edge( $seq, $teq );
804}
805
806sub _is_disconnected {
807 my $self = shift;
808 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
809 || scalar $self->equivalence_graph->successorless_vertices > 1 );
810}
811
176badfe 812# Equate two readings in the equivalence graph
813sub _make_equivalence {
10943ab0 814 my( $self, $source, $target, $removing ) = @_;
359944f7 815 # Get the source equivalent readings
816 my $seq = $self->equivalence( $source );
817 my $teq = $self->equivalence( $target );
818 # Nothing to do if they are already equivalent...
819 return if $seq eq $teq;
10943ab0 820 # Get the readings equivalent to source
821 my @sourcepool = @{$self->eqreadings( $seq )};
822 # If we are removing the source reading entirely, don't push
823 # it into the target pool.
824 @sourcepool = grep { $_ ne $seq } @sourcepool if $removing;
359944f7 825 # and add them to the target readings.
10943ab0 826 push( @{$self->eqreadings( $teq )}, @sourcepool );
827 map { $self->set_equivalence( $_, $teq ) } @sourcepool;
359944f7 828 # Then merge the nodes in the equivalence graph.
829 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
10943ab0 830 $self->equivalence_graph->add_edge( $pred, $teq )
831 unless $teq eq $pred;
359944f7 832 }
833 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
10943ab0 834 $self->equivalence_graph->add_edge( $teq, $succ )
835 unless $teq eq $succ;
359944f7 836 }
837 $self->equivalence_graph->delete_vertex( $seq );
176badfe 838 # TODO enable this after collation parsing is done
10943ab0 839 throw( "Graph got disconnected making $source / $target equivalence" )
3579c22b 840 if $self->_is_disconnected && $self->collation->tradition->_initialized;
359944f7 841}
842
843=head2 test_equivalence
844
176badfe 845Test whether, if two readings were equated with a 'colocated' relationship,
846the graph would still be valid.
359944f7 847
848=cut
849
850sub test_equivalence {
851 my( $self, $source, $target ) = @_;
852 # Try merging the nodes in the equivalence graph; return a true value if
853 # no cycle is introduced thereby. Restore the original graph first.
854
855 # Keep track of edges we add
856 my %added_pred;
857 my %added_succ;
858 # Get the reading equivalents
859 my $seq = $self->equivalence( $source );
860 my $teq = $self->equivalence( $target );
861 # Maybe this is easy?
862 return 1 if $seq eq $teq;
863
864 # Save the first graph
865 my $checkstr = $self->equivalence_graph->stringify();
866 # Add and save relevant edges
867 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
868 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
869 $added_pred{$pred} = 0;
870 } else {
871 $self->equivalence_graph->add_edge( $pred, $teq );
872 $added_pred{$pred} = 1;
873 }
874 }
875 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
876 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
877 $added_succ{$succ} = 0;
878 } else {
879 $self->equivalence_graph->add_edge( $teq, $succ );
880 $added_succ{$succ} = 1;
881 }
882 }
883 # Delete source equivalent and test
884 $self->equivalence_graph->delete_vertex( $seq );
885 my $ret = !$self->equivalence_graph->has_a_cycle;
886
887 # Restore what we changed
888 $self->equivalence_graph->add_vertex( $seq );
889 foreach my $pred ( keys %added_pred ) {
890 $self->equivalence_graph->add_edge( $pred, $seq );
891 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
892 }
893 foreach my $succ ( keys %added_succ ) {
894 $self->equivalence_graph->add_edge( $seq, $succ );
895 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
896 }
897 unless( $self->equivalence_graph->eq( $checkstr ) ) {
898 warn "GRAPH CHANGED after testing";
899 }
900 # Return our answer
901 return $ret;
902}
903
176badfe 904# Unmake an equivalence link between two readings. Should only be called internally.
905sub _break_equivalence {
359944f7 906 my( $self, $source, $target ) = @_;
907
908 # This is the hard one. Need to reconstruct the equivalence groups without
909 # the given link.
910 my( %sng, %tng );
911 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
912 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
913 # If these groups intersect, they are still connected; do nothing.
914 foreach my $el ( keys %tng ) {
10e4b1ac 915 return if( exists $sng{$el} );
359944f7 916 }
359944f7 917 # If they don't intersect, then we split the nodes in the graph and in
918 # the hashes. First figure out which group has which name
176badfe 919 my $oldgroup = $self->equivalence( $source ); # same as $target
920 my $keepsource = $sng{$oldgroup};
921 my $newgroup = $keepsource ? $target : $source;
359944f7 922 my( $oldmembers, $newmembers );
176badfe 923 if( $keepsource ) {
359944f7 924 $oldmembers = [ keys %sng ];
925 $newmembers = [ keys %tng ];
926 } else {
927 $oldmembers = [ keys %tng ];
928 $newmembers = [ keys %sng ];
929 }
930
931 # First alter the old group in the hash
932 $self->set_eqreadings( $oldgroup, $oldmembers );
176badfe 933 foreach my $el ( @$oldmembers ) {
934 $self->set_equivalence( $el, $oldgroup );
935 }
359944f7 936
937 # then add the new group back to the hash with its new key
938 $self->set_eqreadings( $newgroup, $newmembers );
939 foreach my $el ( @$newmembers ) {
940 $self->set_equivalence( $el, $newgroup );
941 }
942
943 # Now add the new group back to the equivalence graph
944 $self->equivalence_graph->add_vertex( $newgroup );
945 # ...add the appropriate edges to the source group vertext
946 my $c = $self->collation;
947 foreach my $rdg ( @$newmembers ) {
948 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
949 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
950 }
951 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
952 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
953 }
954 }
955
956 # ...and figure out which edges on the old group vertex to delete.
957 my( %old_pred, %old_succ );
958 foreach my $rdg ( @$oldmembers ) {
959 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
960 $old_pred{$self->equivalence( $rp )} = 1;
961 }
962 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
963 $old_succ{$self->equivalence( $rs )} = 1;
964 }
965 }
966 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
967 unless( $old_pred{$p} ) {
968 $self->equivalence_graph->delete_edge( $p, $oldgroup );
969 }
970 }
971 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
972 unless( $old_succ{$s} ) {
973 $self->equivalence_graph->delete_edge( $oldgroup, $s );
974 }
975 }
176badfe 976 # TODO enable this after collation parsing is done
10943ab0 977 throw( "Graph got disconnected breaking $source / $target equivalence" )
3579c22b 978 if $self->_is_disconnected && $self->collation->tradition->_initialized;
359944f7 979}
980
981sub _find_equiv_without {
982 my( $self, $first, $second ) = @_;
983 my %found = ( $first => 1 );
984 my $check = [ $first ];
985 my $iter = 0;
986 while( @$check ) {
987 my $more = [];
988 foreach my $r ( @$check ) {
989 foreach my $nr ( $self->graph->neighbors( $r ) ) {
990 next if $r eq $second;
991 if( $self->get_relationship( $r, $nr )->colocated ) {
992 push( @$more, $nr ) unless exists $found{$nr};
993 $found{$nr} = 1;
994 }
995 }
996 }
997 $check = $more;
998 }
999 return keys %found;
1000}
1001
e1083e99 1002=head2 rebuild_equivalence
1003
1004(Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1005adds all readings and edges, then makes an equivalence for all relationships.
1006
1007=cut
1008
1009sub rebuild_equivalence {
1010 my $self = shift;
1011 my $newgraph = Graph->new();
04482188 1012 # Set this as the new equivalence graph
1013 $self->_reset_equivalence( $newgraph );
1014 # Clear out the data hashes
1015 $self->_clear_equivalence;
1016 $self->_clear_eqreadings;
1017
1018 # Add the readings
e1083e99 1019 foreach my $r ( $self->collation->readings ) {
04482188 1020 my $rid = $r->id;
1021 $newgraph->add_vertex( $rid );
1022 $self->set_equivalence( $rid, $rid );
1023 $self->set_eqreadings( $rid, [ $rid ] );
e1083e99 1024 }
04482188 1025
1026 # Now add the edges
e1083e99 1027 foreach my $e ( $self->collation->paths ) {
04482188 1028 $self->add_equivalence_edge( @$e );
e1083e99 1029 }
04482188 1030
1031 # Now equate the colocated readings. This does no testing;
1032 # it assumes that all preexisting relationships are valid.
e1083e99 1033 foreach my $rel ( $self->relationships ) {
1034 my $relobj = $self->get_relationship( $rel );
1035 next unless $relobj && $relobj->colocated;
1036 $self->_make_equivalence( @$rel );
1037 }
1038}
1039
359944f7 1040### Output logic
1041
027d819c 1042sub _as_graphml {
2626f709 1043 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 1044
1045 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1046 $rgraph->setAttribute( 'edgedefault', 'directed' );
1047 $rgraph->setAttribute( 'id', 'relationships', );
1048 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1049 $rgraph->setAttribute( 'parse.edges', 0 );
c84275ff 1050 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1051 $rgraph->setAttribute( 'parse.nodes', 0 );
c84275ff 1052 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1053
1054 # Add the vertices according to their XML IDs
2626f709 1055 my %rdg_lookup = ( reverse %$node_hash );
cc31ebaa 1056 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
826d8773 1057 my @nlist = sort keys( %rdg_lookup );
414cc046 1058 foreach my $n ( @nlist ) {
c84275ff 1059 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1060 $n_el->setAttribute( 'id', $n );
2626f709 1061 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 1062 }
cc31ebaa 1063 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
c84275ff 1064
1065 # Add the relationship edges, with their object information
1066 my $edge_ctr = 0;
1067 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1068 # Add an edge and fill in its relationship info.
a30ca502 1069 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
c84275ff 1070 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1071 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1072 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1073 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1074
3ae5e2ad 1075 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 1076 foreach my $key ( keys %$edge_keys ) {
1077 my $value = $rel_obj->$key;
1078 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1079 if defined $value;
1080 }
c84275ff 1081 }
cc31ebaa 1082 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
c84275ff 1083}
1084
1085sub _by_xmlid {
2626f709 1086 my $tmp_a = $a;
1087 my $tmp_b = $b;
1088 $tmp_a =~ s/\D//g;
1089 $tmp_b =~ s/\D//g;
1090 return $tmp_a <=> $tmp_b;
c84275ff 1091}
1092
1093sub _add_graphml_data {
1094 my( $el, $key, $value ) = @_;
1095 return unless defined $value;
1096 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1097 $data_el->setAttribute( 'key', $key );
1098 $data_el->appendText( $value );
83d5ac3a 1099}
1100
63778331 1101sub throw {
1102 Text::Tradition::Error->throw(
1103 'ident' => 'Relationship error',
1104 'message' => $_[0],
1105 );
1106}
1107
22222af9 1108no Moose;
1109__PACKAGE__->meta->make_immutable;
1110
11111;