fix bugs in persistent equivalence graph implementation
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
359944f7 88=head2 equivalence_graph()
89
90Returns an equivalence graph of the collation, in which all readings
91related via a 'colocated' relationship are transformed into a single
92vertex. Can be used to determine the validity of a new relationship.
93
94=cut
95
96has 'equivalence_graph' => (
97 is => 'ro',
98 isa => 'Graph',
99 default => sub { Graph->new() },
100 );
101
102has '_node_equivalences' => (
103 is => 'ro',
104 traits => ['Hash'],
105 handles => {
106 equivalence => 'get',
107 set_equivalence => 'set',
108 remove_equivalence => 'delete',
109 },
110 );
111
112has '_equivalence_readings' => (
113 is => 'ro',
114 traits => ['Hash'],
115 handles => {
116 eqreadings => 'get',
117 set_eqreadings => 'set',
118 remove_eqreadings => 'delete',
119 },
120 );
121
122around add_reading => sub {
123 my $orig = shift;
124 my $self = shift;
125
126 $self->equivalence_graph->add_vertex( @_ );
127 $self->set_equivalence( $_[0], $_[0] );
128 $self->set_eqreadings( $_[0], [ $_[0] ] );
129 $self->$orig( @_ );
130};
131
132around delete_reading => sub {
133 my $orig = shift;
134 my $self = shift;
135
359944f7 136 $self->_remove_equivalence_node( @_ );
137 $self->$orig( @_ );
138};
139
3ae5e2ad 140=head2 get_relationship
141
142Return the relationship object, if any, that exists between two readings.
143
144=cut
145
146sub get_relationship {
4633f9e4 147 my $self = shift;
148 my @vector;
149 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
150 # Dereference the edge arrayref that was passed.
151 my $edge = shift;
152 @vector = @$edge;
153 } else {
154 @vector = @_;
155 }
3ae5e2ad 156 my $relationship;
157 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
158 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 159 }
3ae5e2ad 160 return $relationship;
161}
162
163sub _set_relationship {
164 my( $self, $relationship, @vector ) = @_;
165 $self->graph->add_edge( @vector );
166 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
176badfe 167 $self->_make_equivalence( @vector ) if $relationship->colocated;
3ae5e2ad 168}
a1615ee4 169
22222af9 170=head2 create
171
172Create a new relationship with the given options and return it.
173Warn and return undef if the relationship cannot be created.
174
175=cut
176
177sub create {
178 my( $self, $options ) = @_;
179 # Check to see if a relationship exists between the two given readings
180 my $source = delete $options->{'orig_a'};
181 my $target = delete $options->{'orig_b'};
3ae5e2ad 182 my $rel = $self->get_relationship( $source, $target );
183 if( $rel ) {
3d14b48e 184 if( $rel->type eq 'collated' ) {
185 # Always replace a 'collated' relationship with a more descriptive
186 # one, if asked.
187 $self->del_relationship( $source, $target );
188 } elsif( $rel->type ne $options->{'type'} ) {
63778331 189 throw( "Another relationship of type " . $rel->type
190 . " already exists between $source and $target" );
22222af9 191 } else {
192 return $rel;
193 }
194 }
195
196 # Check to see if a nonlocal relationship is defined for the two readings
197 $rel = $self->scoped_relationship( $options->{'reading_a'},
198 $options->{'reading_b'} );
199 if( $rel && $rel->type eq $options->{'type'} ) {
200 return $rel;
201 } elsif( $rel ) {
63778331 202 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 203 } else {
204 $rel = Text::Tradition::Collation::Relationship->new( $options );
205 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
206 return $rel;
207 }
208}
209
210=head2 add_scoped_relationship( $rel )
211
212Keep track of relationships defined between specific readings that are scoped
213non-locally. Key on whichever reading occurs first alphabetically.
214
215=cut
216
217sub add_scoped_relationship {
218 my( $self, $rel ) = @_;
f222800e 219 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
220 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
221 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 222 if( $r ) {
223 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 224 $r->type, $rdga, $rdgb );
22222af9 225 return;
226 }
f222800e 227 my( $first, $second ) = sort ( $rdga, $rdgb );
228 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 229}
230
231=head2 scoped_relationship( $reading_a, $reading_b )
232
233Returns the general (document-level or global) relationship that has been defined
234between the two reading strings. Returns undef if there is no general relationship.
235
236=cut
237
238sub scoped_relationship {
239 my( $self, $rdga, $rdgb ) = @_;
240 my( $first, $second ) = sort( $rdga, $rdgb );
241 if( exists $self->scopedrels->{$first}->{$second} ) {
242 return $self->scopedrels->{$first}->{$second};
243 } else {
244 return undef;
245 }
246}
247
248=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
249
250Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
251for the possible options) between the readings given in $source and $target. Sets
252up a scoped relationship between $sourcetext and $targettext if the relationship is
253scoped non-locally.
254
255Returns a status boolean and a list of all reading pairs connected by the call to
256add_relationship.
257
6d381462 258=begin testing
259
260use Text::Tradition;
261use TryCatch;
262
263my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
176badfe 264# Test 1.1: try to equate nodes that are prevented with an intermediate collation
6d381462 265ok( $t1, "Parsed test fragment file" );
266my $c1 = $t1->collation;
267my $trel = $c1->get_relationship( '9,2', '9,3' );
268is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
269 "Troublesome relationship exists" );
270is( $trel->type, 'collated', "Troublesome relationship is a collation" );
271
272# Try to make the link we want
273try {
274 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
275 ok( 1, "Added cross-collation relationship as expected" );
176badfe 276} catch( Text::Tradition::Error $e ) {
277 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
6d381462 278}
279
280try {
281 $c1->calculate_ranks();
282 ok( 1, "Successfully calculated ranks" );
176badfe 283} catch ( Text::Tradition::Error $e ) {
284 ok( 0, "Collation now has a cycle: " . $e->message );
6d381462 285}
286
176badfe 287# Test 1.2: attempt merge of an identical reading
359944f7 288try {
289 $c1->merge_readings( '9,3', '11,5' );
290 ok( 1, "Successfully merged reading 'pontifex'" );
291} catch ( Text::Tradition::Error $e ) {
292 ok( 0, "Merge of mergeable readings failed: $e->message" );
293
294}
295
176badfe 296# Test 1.3: attempt relationship with a meta reading (should fail)
297try {
298 $c1->add_relationship( '8,1', '9,2', { 'type' => 'collated' } );
299 ok( 0, "Allowed a meta-reading to be used in a relationship" );
300} catch ( Text::Tradition::Error $e ) {
301 is( $e->message, 'Cannot set relationship on a meta reading',
302 "Relationship link prevented for a meta reading" );
303}
304
305# Test 2.1: try to equate nodes that are prevented with a real intermediate
6d381462 306# equivalence
6d381462 307my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
6d381462 308my $c2 = $t2->collation;
309$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
310my $trel2 = $c2->get_relationship( '9,2', '9,3' );
311is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
312 "Created blocking relationship" );
313is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
314# This time the link ought to fail
315try {
316 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
414cc046 317 ok( 0, "Added cross-equivalent bad relationship" );
176badfe 318} catch ( Text::Tradition::Error $e ) {
319 like( $e->message, qr/witness loop/,
320 "Existing equivalence blocked crossing relationship" );
6d381462 321}
322
323try {
324 $c2->calculate_ranks();
325 ok( 1, "Successfully calculated ranks" );
176badfe 326} catch ( Text::Tradition::Error $e ) {
327 ok( 0, "Collation now has a cycle: " . $e->message );
6d381462 328}
329
176badfe 330# Test 3.1: make a straightforward pair of transpositions.
cc31ebaa 331my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
332# Test 1: try to equate nodes that are prevented with an intermediate collation
333my $c3 = $t3->collation;
334try {
335 $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
336 ok( 1, "Added straightforward transposition" );
176badfe 337} catch ( Text::Tradition::Error $e ) {
338 ok( 0, "Failed to add normal transposition: " . $e->message );
cc31ebaa 339}
340try {
341 $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
342 ok( 1, "Added straightforward transposition complement" );
176badfe 343} catch ( Text::Tradition::Error $e ) {
344 ok( 0, "Failed to add normal transposition complement: " . $e->message );
cc31ebaa 345}
346
176badfe 347# Test 3.2: try to make a transposition that could be a parallel.
cc31ebaa 348try {
349 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
350 ok( 0, "Added bad colocated transposition" );
176badfe 351} catch ( Text::Tradition::Error $e ) {
352 like( $e->message, qr/Readings appear to be colocated/,
353 "Prevented bad colocated transposition" );
cc31ebaa 354}
355
176badfe 356# Test 3.3: make the parallel, and then make the transposition again.
cc31ebaa 357try {
358 $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
359 ok( 1, "Equated identical readings for transposition" );
176badfe 360} catch ( Text::Tradition::Error $e ) {
361 ok( 0, "Failed to equate identical readings: " . $e->message );
cc31ebaa 362}
363try {
364 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
365 ok( 1, "Added straightforward transposition complement" );
176badfe 366} catch ( Text::Tradition::Error $e ) {
367 ok( 0, "Failed to add normal transposition complement: " . $e->message );
cc31ebaa 368}
369
6d381462 370=end testing
371
22222af9 372=cut
373
374sub add_relationship {
414cc046 375 my( $self, $source, $target, $options ) = @_;
376 my $c = $self->collation;
176badfe 377 my $sourceobj = $c->reading( $source );
378 my $targetobj = $c->reading( $target );
359944f7 379 throw( "Adding self relationship at $source" ) if $source eq $target;
176badfe 380 throw( "Cannot set relationship on a meta reading" )
381 if( $sourceobj->is_meta || $targetobj->is_meta );
ca6e6095 382 my $relationship;
383 my $thispaironly;
414cc046 384 my $droppedcolls = [];
ca6e6095 385 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
386 $relationship = $options;
387 $thispaironly = 1; # If existing rel, set only where asked.
388 } else {
389 # Check the options
390 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 391 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
8d5c8893 392 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
ca6e6095 393
414cc046 394 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
395 $options->{'type'}, $droppedcolls );
ca6e6095 396 unless( $is_valid ) {
397 throw( "Invalid relationship: $reason" );
398 }
399
400 # Try to create the relationship object.
176badfe 401 $options->{'reading_a'} = $sourceobj->text;
402 $options->{'reading_b'} = $targetobj->text;
ca6e6095 403 $options->{'orig_a'} = $source;
404 $options->{'orig_b'} = $target;
0ac5e750 405 if( $options->{'scope'} ne 'local' ) {
406 # Is there a relationship with this a & b already?
f222800e 407 # Case-insensitive for non-orthographics.
408 my $rdga = $options->{'type'} eq 'orthographic'
409 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
410 my $rdgb = $options->{'type'} eq 'orthographic'
411 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
412 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 413 if( $otherrel && $otherrel->type eq $options->{type}
414 && $otherrel->scope eq $options->{scope} ) {
415 warn "Applying existing scoped relationship";
416 $relationship = $otherrel;
417 }
418 }
419 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 420 }
ca6e6095 421
22222af9 422
423 # Find all the pairs for which we need to set the relationship.
414cc046 424 my @vectors;
ca6e6095 425 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 426 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 427 }
bf6e338d 428
22222af9 429 # Now set the relationship(s).
430 my @pairs_set;
414cc046 431 my $rel = $self->get_relationship( $source, $target );
cc31ebaa 432 my $skip;
414cc046 433 if( $rel && $rel ne $relationship ) {
434 if( $rel->nonlocal ) {
435 throw( "Found conflicting relationship at $source - $target" );
436 } elsif( $rel->type ne 'collated' ) {
437 # Replace a collation relationship; leave any other sort in place.
438 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
439 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
440 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
441 warn sprintf( "Not overriding local relationship %s with global %s "
442 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
443 $source, $target, $rel->reading_a, $rel->reading_b );
cc31ebaa 444 $skip = 1;
414cc046 445 }
446 }
447 }
cc31ebaa 448 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
414cc046 449 push( @pairs_set, [ $source, $target ] );
450
451 # Set any additional relationships that might be in @vectors.
22222af9 452 foreach my $v ( @vectors ) {
414cc046 453 next if $v->[0] eq $source && $v->[1] eq $target;
454 next if $v->[1] eq $source && $v->[0] eq $target;
455 my @added = $self->add_relationship( @$v, $relationship );
456 push( @pairs_set, @added );
22222af9 457 }
458
414cc046 459 # Finally, restore whatever collations we can, and return.
460 $self->_restore_collations( @$droppedcolls );
63778331 461 return @pairs_set;
22222af9 462}
463
9d829138 464=head2 del_scoped_relationship( $reading_a, $reading_b )
465
466Returns the general (document-level or global) relationship that has been defined
467between the two reading strings. Returns undef if there is no general relationship.
468
469=cut
470
471sub del_scoped_relationship {
472 my( $self, $rdga, $rdgb ) = @_;
473 my( $first, $second ) = sort( $rdga, $rdgb );
474 return delete $self->scopedrels->{$first}->{$second};
475}
476
bf6e338d 477sub _find_applicable {
478 my( $self, $rel ) = @_;
479 my $c = $self->collation;
480 # TODO Someday we might use a case sensitive language.
481 my $lang = $c->tradition->language;
482 my @vectors;
483 my @identical_readings;
484 if( $rel->type eq 'orthographic' ) {
485 @identical_readings = grep { $_->text eq $rel->reading_a }
486 $c->readings;
487 } else {
488 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
489 $c->readings;
490 }
491 foreach my $ir ( @identical_readings ) {
492 my @itarget;
493 if( $rel->type eq 'orthographic' ) {
494 @itarget = grep { $_->rank == $ir->rank
495 && $_->text eq $rel->reading_b } $c->readings;
496 } else {
497 @itarget = grep { $_->rank == $ir->rank
498 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
499 }
500 if( @itarget ) {
501 # Warn if there is more than one hit with no orth link between them.
502 my $itmain = shift @itarget;
503 if( @itarget ) {
504 my %all_targets;
505 map { $all_targets{$_} = 1 } @itarget;
506 map { delete $all_targets{$_} }
507 $self->related_readings( $itmain,
508 sub { $_[0]->type eq 'orthographic' } );
509 warn "More than one unrelated reading with text " . $itmain->text
510 . " at rank " . $ir->rank . "!" if keys %all_targets;
511 }
512 push( @vectors, [ $ir->id, $itmain->id ] );
513 }
514 }
515 return @vectors;
516}
517
ee801e17 518=head2 del_relationship( $source, $target )
519
520Removes the relationship between the given readings. If the relationship is
521non-local, removes the relationship everywhere in the graph.
522
523=cut
524
525sub del_relationship {
526 my( $self, $source, $target ) = @_;
527 my $rel = $self->get_relationship( $source, $target );
681893aa 528 return () unless $rel; # Nothing to delete; return an empty set.
359944f7 529 my $colo = $rel->colocated;
ee801e17 530 my @vectors = ( [ $source, $target ] );
359944f7 531 $self->_remove_relationship( $colo, $source, $target );
ee801e17 532 if( $rel->nonlocal ) {
533 # Remove the relationship wherever it occurs.
9d829138 534 # Remove the relationship wherever it occurs.
ee801e17 535 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
536 $self->relationships;
537 foreach my $re ( @rel_edges ) {
359944f7 538 $self->_remove_relationship( $colo, @$re );
ee801e17 539 push( @vectors, $re );
540 }
9d829138 541 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 542 }
543 return @vectors;
544}
545
ca6e6095 546sub _remove_relationship {
359944f7 547 my( $self, $equiv, @vector ) = @_;
ca6e6095 548 $self->graph->delete_edge( @vector );
176badfe 549 $self->_break_equivalence( @vector ) if $equiv;
ca6e6095 550}
551
22222af9 552=head2 relationship_valid( $source, $target, $type )
553
554Checks whether a relationship of type $type may exist between the readings given
555in $source and $target. Returns a tuple of ( status, message ) where status is
556a yes/no boolean and, if the answer is no, message gives the reason why.
557
558=cut
559
560sub relationship_valid {
414cc046 561 my( $self, $source, $target, $rel, $mustdrop ) = @_;
562 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
22222af9 563 my $c = $self->collation;
564 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
565 # Check that the two readings do (for a repetition) or do not (for
566 # a transposition) appear in the same witness.
2f39215b 567 # TODO this might be called before witness paths are set...
22222af9 568 my %seen_wits;
569 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
570 foreach my $w ( $c->reading_witnesses( $target ) ) {
571 if( $seen_wits{$w} ) {
572 return ( 0, "Readings both occur in witness $w" )
573 if $rel eq 'transposition';
574 return ( 1, "ok" ) if $rel eq 'repetition';
d6936dea 575 }
22222af9 576 }
abadc997 577 return ( 0, "Readings occur only in distinct witnesses" )
578 if $rel eq 'repetition';
579 }
580 if ( $rel eq 'transposition' ) {
581 # We also need to check both that the readings occur in distinct
582 # witnesses, and that they are not in the same place. That is,
583 # proposing to link them should cause a witness loop.
359944f7 584 if( $self->test_equivalence( $source, $target ) ) {
abadc997 585 return ( 0, "Readings appear to be colocated, not transposed" );
359944f7 586 } else {
587 return ( 1, "ok" );
abadc997 588 }
589
590 } elsif( $rel ne 'repetition' ) {
22222af9 591 # Check that linking the source and target in a relationship won't lead
414cc046 592 # to a path loop for any witness.
593 # First, drop/stash any collations that might interfere
594 my $sourceobj = $c->reading( $source );
595 my $targetobj = $c->reading( $target );
596 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
597 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
598 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
599 push( @$mustdrop, $self->_drop_collations( $source ) );
600 push( @$mustdrop, $self->_drop_collations( $target ) );
359944f7 601 if( $c->end->has_rank ) {
176badfe 602 foreach my $rk ( $sourcerank .. $targetrank ) {
414cc046 603 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
604 $c->readings_at_rank( $rk );
605 }
606 }
a1615ee4 607 }
359944f7 608 unless( $self->test_equivalence( $source, $target ) ) {
414cc046 609 $self->_restore_collations( @$mustdrop );
610 return( 0, "Relationship would create witness loop" );
a1615ee4 611 }
22222af9 612 return ( 1, "ok" );
613 }
614}
615
778251a6 616sub _drop_collations {
617 my( $self, $reading ) = @_;
414cc046 618 my @dropped;
778251a6 619 foreach my $n ( $self->graph->neighbors( $reading ) ) {
620 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
414cc046 621 push( @dropped, [ $reading, $n ] );
778251a6 622 $self->del_relationship( $reading, $n );
359944f7 623 #print STDERR "Dropped collation $reading -> $n\n";
778251a6 624 }
625 }
414cc046 626 return @dropped;
627}
628
629sub _restore_collations {
630 my( $self, @vectors ) = @_;
631 foreach my $v ( @vectors ) {
632 try {
633 $self->add_relationship( @$v, { 'type' => 'collated' } );
359944f7 634 #print STDERR "Restored collation @$v\n";
414cc046 635 } catch {
636 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
637 }
638 }
778251a6 639}
640
cc31ebaa 641=head2 filter_collations()
642
643Utility function. Removes any redundant 'collated' relationships from the graph.
644A collated relationship is redundant if the readings in question would occupy
645the same rank regardless of the existence of the relationship.
646
647=cut
648
649sub filter_collations {
650 my $self = shift;
651 my $c = $self->collation;
652 foreach my $r ( 1 .. $c->end->rank - 1 ) {
653 my $anchor;
654 my @need_collations;
655 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
656 next if $rdg->is_meta;
657 my $ip = 0;
658 foreach my $pred ( $rdg->predecessors ) {
659 if( $pred->rank == $r - 1 ) {
660 $ip = 1;
661 $anchor = $rdg unless( $anchor );
662 last;
663 }
664 }
665 push( @need_collations, $rdg ) unless $ip;
666 $c->relations->_drop_collations( "$rdg" );
667 }
668 $anchor
46e1fe14 669 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
670 unless $c->get_relationship( $anchor, $_ ) } @need_collations
cc31ebaa 671 : warn "No anchor found at $r";
672 }
673}
674
7f52eac8 675=head2 related_readings( $reading, $filter )
22222af9 676
677Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 678If $filter is set to a subroutine ref, returns only those related readings where
679$filter( $relationship ) returns a true value.
22222af9 680
681=cut
682
683sub related_readings {
7f52eac8 684 my( $self, $reading, $filter ) = @_;
22222af9 685 my $return_object;
686 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
687 $reading = $reading->id;
688 $return_object = 1;
689 }
c84275ff 690 my @answer;
7f52eac8 691 if( $filter ) {
692 # Backwards compat
693 if( $filter eq 'colocated' ) {
694 $filter = sub { $_[0]->colocated };
695 }
c84275ff 696 my %found = ( $reading => 1 );
697 my $check = [ $reading ];
698 my $iter = 0;
699 while( @$check ) {
c84275ff 700 my $more = [];
701 foreach my $r ( @$check ) {
702 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 703 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 704 push( @$more, $nr ) unless exists $found{$nr};
705 $found{$nr} = 1;
706 }
707 }
708 }
709 $check = $more;
22222af9 710 }
7f52eac8 711 delete $found{$reading};
c84275ff 712 @answer = keys %found;
713 } else {
714 @answer = $self->graph->all_reachable( $reading );
22222af9 715 }
716 if( $return_object ) {
717 my $c = $self->collation;
c84275ff 718 return map { $c->reading( $_ ) } @answer;
22222af9 719 } else {
c84275ff 720 return @answer;
22222af9 721 }
722}
723
724=head2 merge_readings( $kept, $deleted );
725
726Makes a best-effort merge of the relationship links between the given readings, and
727stops tracking the to-be-deleted reading.
728
729=cut
730
731sub merge_readings {
732 my( $self, $kept, $deleted, $combined ) = @_;
733 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
734 # Get the pair of kept / rel
735 my @vector = ( $kept );
736 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
737 next if $vector[0] eq $vector[1]; # Don't add a self loop
738
739 # If kept changes its text, drop the relationship.
740 next if $combined;
741
f222800e 742 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 743 my $rel = $self->get_relationship( @vector );
f222800e 744 next if $rel;
22222af9 745
746 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 747 $rel = $self->get_relationship( @$edge );
748 $self->_set_relationship( $rel, @vector );
22222af9 749 }
176badfe 750 $self->_make_equivalence( $deleted, $kept );
22222af9 751}
752
359944f7 753### Equivalence logic
754
755sub _remove_equivalence_node {
756 my( $self, $node ) = @_;
757 my $group = $self->equivalence( $node );
758 my $nodelist = $self->eqreadings( $group );
759 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
176badfe 760 print STDERR "Removing equivalence $group for $node\n" if $node eq '451,2';
359944f7 761 $self->remove_eqreadings( $group );
762 } elsif( @$nodelist == 1 ) {
763 warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
764 " in group that should have only $node";
765 } else {
176badfe 766 print STDERR "Removing $node from equivalence $group\n" if $node eq '451,2';
359944f7 767 my @newlist = grep { $_ ne $node } @$nodelist;
768 $self->set_eqreadings( $group, \@newlist );
769 $self->remove_equivalence( $node );
770 }
771}
772
773=head2 add_equivalence_edge
774
176badfe 775Add an edge in the equivalence graph corresponding to $source -> $target in the
776collation. Should only be called by Collation.
359944f7 777
778=cut
779
780sub add_equivalence_edge {
781 my( $self, $source, $target ) = @_;
782 my $seq = $self->equivalence( $source );
783 my $teq = $self->equivalence( $target );
176badfe 784 print STDERR "Adding equivalence edge $seq -> $teq for $source -> $target\n"
785 if grep { $_ eq '451,2' } @_;
359944f7 786 $self->equivalence_graph->add_edge( $seq, $teq );
787}
788
176badfe 789=head2 delete_equivalence_edge
359944f7 790
176badfe 791Remove an edge in the equivalence graph corresponding to $source -> $target in the
792collation. Should only be called by Collation.
359944f7 793
794=cut
795
796sub delete_equivalence_edge {
797 my( $self, $source, $target ) = @_;
798 my $seq = $self->equivalence( $source );
799 my $teq = $self->equivalence( $target );
176badfe 800 print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n"
801 if grep { $_ eq '451,2' } @_;
359944f7 802 $self->equivalence_graph->delete_edge( $seq, $teq );
803}
804
805sub _is_disconnected {
806 my $self = shift;
807 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
808 || scalar $self->equivalence_graph->successorless_vertices > 1 );
809}
810
176badfe 811# Equate two readings in the equivalence graph
812sub _make_equivalence {
359944f7 813 my( $self, $source, $target ) = @_;
814 # Get the source equivalent readings
815 my $seq = $self->equivalence( $source );
816 my $teq = $self->equivalence( $target );
817 # Nothing to do if they are already equivalent...
818 return if $seq eq $teq;
176badfe 819 print STDERR "Making equivalence for $source -> $target\n"
820 if grep { $_ eq '451,2' } @_;
359944f7 821 my $sourcepool = $self->eqreadings( $seq );
822 # and add them to the target readings.
176badfe 823 print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n"
824 if grep { $_ eq '451,2' } @_;
359944f7 825 push( @{$self->eqreadings( $teq )}, @$sourcepool );
826 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
827 # Then merge the nodes in the equivalence graph.
828 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
829 $self->equivalence_graph->add_edge( $pred, $teq );
830 }
831 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
832 $self->equivalence_graph->add_edge( $teq, $succ );
833 }
834 $self->equivalence_graph->delete_vertex( $seq );
176badfe 835 # TODO enable this after collation parsing is done
359944f7 836# throw( "Graph got disconnected making $source / $target equivalence" )
837# if $self->_is_disconnected;
838}
839
840=head2 test_equivalence
841
176badfe 842Test whether, if two readings were equated with a 'colocated' relationship,
843the graph would still be valid.
359944f7 844
845=cut
846
847sub test_equivalence {
848 my( $self, $source, $target ) = @_;
849 # Try merging the nodes in the equivalence graph; return a true value if
850 # no cycle is introduced thereby. Restore the original graph first.
851
852 # Keep track of edges we add
853 my %added_pred;
854 my %added_succ;
855 # Get the reading equivalents
856 my $seq = $self->equivalence( $source );
857 my $teq = $self->equivalence( $target );
858 # Maybe this is easy?
859 return 1 if $seq eq $teq;
860
861 # Save the first graph
862 my $checkstr = $self->equivalence_graph->stringify();
863 # Add and save relevant edges
864 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
865 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
866 $added_pred{$pred} = 0;
867 } else {
868 $self->equivalence_graph->add_edge( $pred, $teq );
869 $added_pred{$pred} = 1;
870 }
871 }
872 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
873 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
874 $added_succ{$succ} = 0;
875 } else {
876 $self->equivalence_graph->add_edge( $teq, $succ );
877 $added_succ{$succ} = 1;
878 }
879 }
880 # Delete source equivalent and test
881 $self->equivalence_graph->delete_vertex( $seq );
882 my $ret = !$self->equivalence_graph->has_a_cycle;
883
884 # Restore what we changed
885 $self->equivalence_graph->add_vertex( $seq );
886 foreach my $pred ( keys %added_pred ) {
887 $self->equivalence_graph->add_edge( $pred, $seq );
888 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
889 }
890 foreach my $succ ( keys %added_succ ) {
891 $self->equivalence_graph->add_edge( $seq, $succ );
892 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
893 }
894 unless( $self->equivalence_graph->eq( $checkstr ) ) {
895 warn "GRAPH CHANGED after testing";
896 }
897 # Return our answer
898 return $ret;
899}
900
176badfe 901# Unmake an equivalence link between two readings. Should only be called internally.
902sub _break_equivalence {
359944f7 903 my( $self, $source, $target ) = @_;
904
905 # This is the hard one. Need to reconstruct the equivalence groups without
906 # the given link.
907 my( %sng, %tng );
908 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
909 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
910 # If these groups intersect, they are still connected; do nothing.
911 foreach my $el ( keys %tng ) {
912 if( exists $sng{$el} ) {
176badfe 913 print STDERR "Equivalence break $source / $target is a noop\n"
914 if grep { $_ eq '451,2' } @_;
359944f7 915 return;
916 }
917 }
176badfe 918 print STDERR "Breaking equivalence $source / $target\n"
919 if grep { $_ eq '451,2' } @_;
359944f7 920 # If they don't intersect, then we split the nodes in the graph and in
921 # the hashes. First figure out which group has which name
176badfe 922 my $oldgroup = $self->equivalence( $source ); # same as $target
923 my $keepsource = $sng{$oldgroup};
924 my $newgroup = $keepsource ? $target : $source;
359944f7 925 my( $oldmembers, $newmembers );
176badfe 926 if( $keepsource ) {
359944f7 927 $oldmembers = [ keys %sng ];
928 $newmembers = [ keys %tng ];
929 } else {
930 $oldmembers = [ keys %tng ];
931 $newmembers = [ keys %sng ];
932 }
933
934 # First alter the old group in the hash
935 $self->set_eqreadings( $oldgroup, $oldmembers );
176badfe 936 foreach my $el ( @$oldmembers ) {
937 $self->set_equivalence( $el, $oldgroup );
938 }
359944f7 939
940 # then add the new group back to the hash with its new key
941 $self->set_eqreadings( $newgroup, $newmembers );
942 foreach my $el ( @$newmembers ) {
943 $self->set_equivalence( $el, $newgroup );
944 }
945
946 # Now add the new group back to the equivalence graph
947 $self->equivalence_graph->add_vertex( $newgroup );
948 # ...add the appropriate edges to the source group vertext
949 my $c = $self->collation;
950 foreach my $rdg ( @$newmembers ) {
951 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
952 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
953 }
954 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
955 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
956 }
957 }
958
959 # ...and figure out which edges on the old group vertex to delete.
960 my( %old_pred, %old_succ );
961 foreach my $rdg ( @$oldmembers ) {
962 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
963 $old_pred{$self->equivalence( $rp )} = 1;
964 }
965 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
966 $old_succ{$self->equivalence( $rs )} = 1;
967 }
968 }
969 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
970 unless( $old_pred{$p} ) {
971 $self->equivalence_graph->delete_edge( $p, $oldgroup );
972 }
973 }
974 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
975 unless( $old_succ{$s} ) {
976 $self->equivalence_graph->delete_edge( $oldgroup, $s );
977 }
978 }
176badfe 979 # TODO enable this after collation parsing is done
359944f7 980# throw( "Graph got disconnected breaking $source / $target equivalence" )
981# if $self->_is_disconnected;
982}
983
984sub _find_equiv_without {
985 my( $self, $first, $second ) = @_;
986 my %found = ( $first => 1 );
987 my $check = [ $first ];
988 my $iter = 0;
989 while( @$check ) {
990 my $more = [];
991 foreach my $r ( @$check ) {
992 foreach my $nr ( $self->graph->neighbors( $r ) ) {
993 next if $r eq $second;
994 if( $self->get_relationship( $r, $nr )->colocated ) {
995 push( @$more, $nr ) unless exists $found{$nr};
996 $found{$nr} = 1;
997 }
998 }
999 }
1000 $check = $more;
1001 }
1002 return keys %found;
1003}
1004
1005### Output logic
1006
027d819c 1007sub _as_graphml {
2626f709 1008 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 1009
1010 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1011 $rgraph->setAttribute( 'edgedefault', 'directed' );
1012 $rgraph->setAttribute( 'id', 'relationships', );
1013 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1014 $rgraph->setAttribute( 'parse.edges', 0 );
c84275ff 1015 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1016 $rgraph->setAttribute( 'parse.nodes', 0 );
c84275ff 1017 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1018
1019 # Add the vertices according to their XML IDs
2626f709 1020 my %rdg_lookup = ( reverse %$node_hash );
cc31ebaa 1021 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
826d8773 1022 my @nlist = sort keys( %rdg_lookup );
414cc046 1023 foreach my $n ( @nlist ) {
c84275ff 1024 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1025 $n_el->setAttribute( 'id', $n );
2626f709 1026 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 1027 }
cc31ebaa 1028 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
c84275ff 1029
1030 # Add the relationship edges, with their object information
1031 my $edge_ctr = 0;
1032 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1033 # Add an edge and fill in its relationship info.
a30ca502 1034 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
c84275ff 1035 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1036 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1037 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1038 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1039
3ae5e2ad 1040 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 1041 foreach my $key ( keys %$edge_keys ) {
1042 my $value = $rel_obj->$key;
1043 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1044 if defined $value;
1045 }
c84275ff 1046 }
cc31ebaa 1047 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
c84275ff 1048}
1049
1050sub _by_xmlid {
2626f709 1051 my $tmp_a = $a;
1052 my $tmp_b = $b;
1053 $tmp_a =~ s/\D//g;
1054 $tmp_b =~ s/\D//g;
1055 return $tmp_a <=> $tmp_b;
c84275ff 1056}
1057
1058sub _add_graphml_data {
1059 my( $el, $key, $value ) = @_;
1060 return unless defined $value;
1061 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1062 $data_el->setAttribute( 'key', $key );
1063 $data_el->appendText( $value );
83d5ac3a 1064}
1065
63778331 1066sub throw {
1067 Text::Tradition::Error->throw(
1068 'ident' => 'Relationship error',
1069 'message' => $_[0],
1070 );
1071}
1072
22222af9 1073no Moose;
1074__PACKAGE__->meta->make_immutable;
1075
10761;