add method for initializing/resetting equivalence graph
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
359944f7 88=head2 equivalence_graph()
89
90Returns an equivalence graph of the collation, in which all readings
91related via a 'colocated' relationship are transformed into a single
92vertex. Can be used to determine the validity of a new relationship.
93
94=cut
95
96has 'equivalence_graph' => (
97 is => 'ro',
98 isa => 'Graph',
99 default => sub { Graph->new() },
e1083e99 100 writer => '_reset_equivalence',
359944f7 101 );
102
103has '_node_equivalences' => (
104 is => 'ro',
105 traits => ['Hash'],
106 handles => {
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 },
111 );
112
113has '_equivalence_readings' => (
114 is => 'ro',
115 traits => ['Hash'],
116 handles => {
117 eqreadings => 'get',
118 set_eqreadings => 'set',
119 remove_eqreadings => 'delete',
120 },
121 );
122
123around add_reading => sub {
124 my $orig = shift;
125 my $self = shift;
126
127 $self->equivalence_graph->add_vertex( @_ );
128 $self->set_equivalence( $_[0], $_[0] );
129 $self->set_eqreadings( $_[0], [ $_[0] ] );
130 $self->$orig( @_ );
131};
132
133around delete_reading => sub {
134 my $orig = shift;
135 my $self = shift;
136
359944f7 137 $self->_remove_equivalence_node( @_ );
138 $self->$orig( @_ );
139};
140
3ae5e2ad 141=head2 get_relationship
142
143Return the relationship object, if any, that exists between two readings.
144
145=cut
146
147sub get_relationship {
4633f9e4 148 my $self = shift;
149 my @vector;
150 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
151 # Dereference the edge arrayref that was passed.
152 my $edge = shift;
153 @vector = @$edge;
154 } else {
155 @vector = @_;
156 }
3ae5e2ad 157 my $relationship;
158 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
159 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 160 }
3ae5e2ad 161 return $relationship;
162}
163
164sub _set_relationship {
165 my( $self, $relationship, @vector ) = @_;
166 $self->graph->add_edge( @vector );
167 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
176badfe 168 $self->_make_equivalence( @vector ) if $relationship->colocated;
3ae5e2ad 169}
a1615ee4 170
22222af9 171=head2 create
172
173Create a new relationship with the given options and return it.
174Warn and return undef if the relationship cannot be created.
175
176=cut
177
178sub create {
179 my( $self, $options ) = @_;
180 # Check to see if a relationship exists between the two given readings
181 my $source = delete $options->{'orig_a'};
182 my $target = delete $options->{'orig_b'};
3ae5e2ad 183 my $rel = $self->get_relationship( $source, $target );
184 if( $rel ) {
3d14b48e 185 if( $rel->type eq 'collated' ) {
186 # Always replace a 'collated' relationship with a more descriptive
187 # one, if asked.
188 $self->del_relationship( $source, $target );
189 } elsif( $rel->type ne $options->{'type'} ) {
63778331 190 throw( "Another relationship of type " . $rel->type
191 . " already exists between $source and $target" );
22222af9 192 } else {
193 return $rel;
194 }
195 }
196
197 # Check to see if a nonlocal relationship is defined for the two readings
198 $rel = $self->scoped_relationship( $options->{'reading_a'},
199 $options->{'reading_b'} );
200 if( $rel && $rel->type eq $options->{'type'} ) {
201 return $rel;
202 } elsif( $rel ) {
63778331 203 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 204 } else {
205 $rel = Text::Tradition::Collation::Relationship->new( $options );
206 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
207 return $rel;
208 }
209}
210
211=head2 add_scoped_relationship( $rel )
212
213Keep track of relationships defined between specific readings that are scoped
214non-locally. Key on whichever reading occurs first alphabetically.
215
216=cut
217
218sub add_scoped_relationship {
219 my( $self, $rel ) = @_;
f222800e 220 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
221 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
222 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 223 if( $r ) {
224 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 225 $r->type, $rdga, $rdgb );
22222af9 226 return;
227 }
f222800e 228 my( $first, $second ) = sort ( $rdga, $rdgb );
229 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 230}
231
232=head2 scoped_relationship( $reading_a, $reading_b )
233
234Returns the general (document-level or global) relationship that has been defined
235between the two reading strings. Returns undef if there is no general relationship.
236
237=cut
238
239sub scoped_relationship {
240 my( $self, $rdga, $rdgb ) = @_;
241 my( $first, $second ) = sort( $rdga, $rdgb );
242 if( exists $self->scopedrels->{$first}->{$second} ) {
243 return $self->scopedrels->{$first}->{$second};
244 } else {
245 return undef;
246 }
247}
248
249=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
250
251Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
252for the possible options) between the readings given in $source and $target. Sets
253up a scoped relationship between $sourcetext and $targettext if the relationship is
254scoped non-locally.
255
256Returns a status boolean and a list of all reading pairs connected by the call to
257add_relationship.
258
6d381462 259=begin testing
260
261use Text::Tradition;
262use TryCatch;
263
264my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
176badfe 265# Test 1.1: try to equate nodes that are prevented with an intermediate collation
6d381462 266ok( $t1, "Parsed test fragment file" );
267my $c1 = $t1->collation;
268my $trel = $c1->get_relationship( '9,2', '9,3' );
269is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
270 "Troublesome relationship exists" );
271is( $trel->type, 'collated', "Troublesome relationship is a collation" );
272
273# Try to make the link we want
274try {
275 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
276 ok( 1, "Added cross-collation relationship as expected" );
176badfe 277} catch( Text::Tradition::Error $e ) {
278 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
6d381462 279}
280
281try {
282 $c1->calculate_ranks();
283 ok( 1, "Successfully calculated ranks" );
176badfe 284} catch ( Text::Tradition::Error $e ) {
285 ok( 0, "Collation now has a cycle: " . $e->message );
6d381462 286}
287
176badfe 288# Test 1.2: attempt merge of an identical reading
359944f7 289try {
290 $c1->merge_readings( '9,3', '11,5' );
291 ok( 1, "Successfully merged reading 'pontifex'" );
292} catch ( Text::Tradition::Error $e ) {
293 ok( 0, "Merge of mergeable readings failed: $e->message" );
294
295}
296
176badfe 297# Test 1.3: attempt relationship with a meta reading (should fail)
298try {
299 $c1->add_relationship( '8,1', '9,2', { 'type' => 'collated' } );
300 ok( 0, "Allowed a meta-reading to be used in a relationship" );
301} catch ( Text::Tradition::Error $e ) {
302 is( $e->message, 'Cannot set relationship on a meta reading',
303 "Relationship link prevented for a meta reading" );
304}
305
306# Test 2.1: try to equate nodes that are prevented with a real intermediate
6d381462 307# equivalence
6d381462 308my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
6d381462 309my $c2 = $t2->collation;
310$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
311my $trel2 = $c2->get_relationship( '9,2', '9,3' );
312is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
313 "Created blocking relationship" );
314is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
315# This time the link ought to fail
316try {
317 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
414cc046 318 ok( 0, "Added cross-equivalent bad relationship" );
176badfe 319} catch ( Text::Tradition::Error $e ) {
320 like( $e->message, qr/witness loop/,
321 "Existing equivalence blocked crossing relationship" );
6d381462 322}
323
324try {
325 $c2->calculate_ranks();
326 ok( 1, "Successfully calculated ranks" );
176badfe 327} catch ( Text::Tradition::Error $e ) {
328 ok( 0, "Collation now has a cycle: " . $e->message );
6d381462 329}
330
176badfe 331# Test 3.1: make a straightforward pair of transpositions.
cc31ebaa 332my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
333# Test 1: try to equate nodes that are prevented with an intermediate collation
334my $c3 = $t3->collation;
335try {
336 $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
337 ok( 1, "Added straightforward transposition" );
176badfe 338} catch ( Text::Tradition::Error $e ) {
339 ok( 0, "Failed to add normal transposition: " . $e->message );
cc31ebaa 340}
341try {
342 $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
343 ok( 1, "Added straightforward transposition complement" );
176badfe 344} catch ( Text::Tradition::Error $e ) {
345 ok( 0, "Failed to add normal transposition complement: " . $e->message );
cc31ebaa 346}
347
176badfe 348# Test 3.2: try to make a transposition that could be a parallel.
cc31ebaa 349try {
350 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
351 ok( 0, "Added bad colocated transposition" );
176badfe 352} catch ( Text::Tradition::Error $e ) {
353 like( $e->message, qr/Readings appear to be colocated/,
354 "Prevented bad colocated transposition" );
cc31ebaa 355}
356
176badfe 357# Test 3.3: make the parallel, and then make the transposition again.
cc31ebaa 358try {
359 $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
360 ok( 1, "Equated identical readings for transposition" );
176badfe 361} catch ( Text::Tradition::Error $e ) {
362 ok( 0, "Failed to equate identical readings: " . $e->message );
cc31ebaa 363}
364try {
365 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
366 ok( 1, "Added straightforward transposition complement" );
176badfe 367} catch ( Text::Tradition::Error $e ) {
368 ok( 0, "Failed to add normal transposition complement: " . $e->message );
cc31ebaa 369}
370
6d381462 371=end testing
372
22222af9 373=cut
374
375sub add_relationship {
414cc046 376 my( $self, $source, $target, $options ) = @_;
377 my $c = $self->collation;
176badfe 378 my $sourceobj = $c->reading( $source );
379 my $targetobj = $c->reading( $target );
359944f7 380 throw( "Adding self relationship at $source" ) if $source eq $target;
176badfe 381 throw( "Cannot set relationship on a meta reading" )
382 if( $sourceobj->is_meta || $targetobj->is_meta );
ca6e6095 383 my $relationship;
384 my $thispaironly;
414cc046 385 my $droppedcolls = [];
ca6e6095 386 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
387 $relationship = $options;
388 $thispaironly = 1; # If existing rel, set only where asked.
389 } else {
390 # Check the options
391 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 392 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
8d5c8893 393 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
ca6e6095 394
414cc046 395 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
396 $options->{'type'}, $droppedcolls );
ca6e6095 397 unless( $is_valid ) {
398 throw( "Invalid relationship: $reason" );
399 }
400
401 # Try to create the relationship object.
176badfe 402 $options->{'reading_a'} = $sourceobj->text;
403 $options->{'reading_b'} = $targetobj->text;
ca6e6095 404 $options->{'orig_a'} = $source;
405 $options->{'orig_b'} = $target;
0ac5e750 406 if( $options->{'scope'} ne 'local' ) {
407 # Is there a relationship with this a & b already?
f222800e 408 # Case-insensitive for non-orthographics.
409 my $rdga = $options->{'type'} eq 'orthographic'
410 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
411 my $rdgb = $options->{'type'} eq 'orthographic'
412 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
413 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 414 if( $otherrel && $otherrel->type eq $options->{type}
415 && $otherrel->scope eq $options->{scope} ) {
416 warn "Applying existing scoped relationship";
417 $relationship = $otherrel;
418 }
419 }
420 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 421 }
ca6e6095 422
22222af9 423
424 # Find all the pairs for which we need to set the relationship.
414cc046 425 my @vectors;
ca6e6095 426 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 427 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 428 }
bf6e338d 429
22222af9 430 # Now set the relationship(s).
431 my @pairs_set;
414cc046 432 my $rel = $self->get_relationship( $source, $target );
cc31ebaa 433 my $skip;
414cc046 434 if( $rel && $rel ne $relationship ) {
435 if( $rel->nonlocal ) {
436 throw( "Found conflicting relationship at $source - $target" );
437 } elsif( $rel->type ne 'collated' ) {
438 # Replace a collation relationship; leave any other sort in place.
439 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
440 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
441 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
442 warn sprintf( "Not overriding local relationship %s with global %s "
443 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
444 $source, $target, $rel->reading_a, $rel->reading_b );
cc31ebaa 445 $skip = 1;
414cc046 446 }
447 }
448 }
cc31ebaa 449 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
414cc046 450 push( @pairs_set, [ $source, $target ] );
451
452 # Set any additional relationships that might be in @vectors.
22222af9 453 foreach my $v ( @vectors ) {
414cc046 454 next if $v->[0] eq $source && $v->[1] eq $target;
455 next if $v->[1] eq $source && $v->[0] eq $target;
456 my @added = $self->add_relationship( @$v, $relationship );
457 push( @pairs_set, @added );
22222af9 458 }
459
414cc046 460 # Finally, restore whatever collations we can, and return.
461 $self->_restore_collations( @$droppedcolls );
63778331 462 return @pairs_set;
22222af9 463}
464
9d829138 465=head2 del_scoped_relationship( $reading_a, $reading_b )
466
467Returns the general (document-level or global) relationship that has been defined
468between the two reading strings. Returns undef if there is no general relationship.
469
470=cut
471
472sub del_scoped_relationship {
473 my( $self, $rdga, $rdgb ) = @_;
474 my( $first, $second ) = sort( $rdga, $rdgb );
475 return delete $self->scopedrels->{$first}->{$second};
476}
477
bf6e338d 478sub _find_applicable {
479 my( $self, $rel ) = @_;
480 my $c = $self->collation;
481 # TODO Someday we might use a case sensitive language.
482 my $lang = $c->tradition->language;
483 my @vectors;
484 my @identical_readings;
485 if( $rel->type eq 'orthographic' ) {
486 @identical_readings = grep { $_->text eq $rel->reading_a }
487 $c->readings;
488 } else {
489 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
490 $c->readings;
491 }
492 foreach my $ir ( @identical_readings ) {
493 my @itarget;
494 if( $rel->type eq 'orthographic' ) {
495 @itarget = grep { $_->rank == $ir->rank
496 && $_->text eq $rel->reading_b } $c->readings;
497 } else {
498 @itarget = grep { $_->rank == $ir->rank
499 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
500 }
501 if( @itarget ) {
502 # Warn if there is more than one hit with no orth link between them.
503 my $itmain = shift @itarget;
504 if( @itarget ) {
505 my %all_targets;
506 map { $all_targets{$_} = 1 } @itarget;
507 map { delete $all_targets{$_} }
508 $self->related_readings( $itmain,
509 sub { $_[0]->type eq 'orthographic' } );
510 warn "More than one unrelated reading with text " . $itmain->text
511 . " at rank " . $ir->rank . "!" if keys %all_targets;
512 }
513 push( @vectors, [ $ir->id, $itmain->id ] );
514 }
515 }
516 return @vectors;
517}
518
ee801e17 519=head2 del_relationship( $source, $target )
520
521Removes the relationship between the given readings. If the relationship is
522non-local, removes the relationship everywhere in the graph.
523
524=cut
525
526sub del_relationship {
527 my( $self, $source, $target ) = @_;
528 my $rel = $self->get_relationship( $source, $target );
681893aa 529 return () unless $rel; # Nothing to delete; return an empty set.
359944f7 530 my $colo = $rel->colocated;
ee801e17 531 my @vectors = ( [ $source, $target ] );
359944f7 532 $self->_remove_relationship( $colo, $source, $target );
ee801e17 533 if( $rel->nonlocal ) {
534 # Remove the relationship wherever it occurs.
9d829138 535 # Remove the relationship wherever it occurs.
ee801e17 536 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
537 $self->relationships;
538 foreach my $re ( @rel_edges ) {
359944f7 539 $self->_remove_relationship( $colo, @$re );
ee801e17 540 push( @vectors, $re );
541 }
9d829138 542 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 543 }
544 return @vectors;
545}
546
ca6e6095 547sub _remove_relationship {
359944f7 548 my( $self, $equiv, @vector ) = @_;
ca6e6095 549 $self->graph->delete_edge( @vector );
176badfe 550 $self->_break_equivalence( @vector ) if $equiv;
ca6e6095 551}
552
22222af9 553=head2 relationship_valid( $source, $target, $type )
554
555Checks whether a relationship of type $type may exist between the readings given
556in $source and $target. Returns a tuple of ( status, message ) where status is
557a yes/no boolean and, if the answer is no, message gives the reason why.
558
559=cut
560
561sub relationship_valid {
414cc046 562 my( $self, $source, $target, $rel, $mustdrop ) = @_;
563 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
22222af9 564 my $c = $self->collation;
565 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
566 # Check that the two readings do (for a repetition) or do not (for
567 # a transposition) appear in the same witness.
2f39215b 568 # TODO this might be called before witness paths are set...
22222af9 569 my %seen_wits;
570 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
571 foreach my $w ( $c->reading_witnesses( $target ) ) {
572 if( $seen_wits{$w} ) {
573 return ( 0, "Readings both occur in witness $w" )
574 if $rel eq 'transposition';
575 return ( 1, "ok" ) if $rel eq 'repetition';
d6936dea 576 }
22222af9 577 }
abadc997 578 return ( 0, "Readings occur only in distinct witnesses" )
579 if $rel eq 'repetition';
580 }
581 if ( $rel eq 'transposition' ) {
582 # We also need to check both that the readings occur in distinct
583 # witnesses, and that they are not in the same place. That is,
584 # proposing to link them should cause a witness loop.
359944f7 585 if( $self->test_equivalence( $source, $target ) ) {
abadc997 586 return ( 0, "Readings appear to be colocated, not transposed" );
359944f7 587 } else {
588 return ( 1, "ok" );
abadc997 589 }
590
591 } elsif( $rel ne 'repetition' ) {
22222af9 592 # Check that linking the source and target in a relationship won't lead
414cc046 593 # to a path loop for any witness.
594 # First, drop/stash any collations that might interfere
595 my $sourceobj = $c->reading( $source );
596 my $targetobj = $c->reading( $target );
597 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
598 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
599 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
600 push( @$mustdrop, $self->_drop_collations( $source ) );
601 push( @$mustdrop, $self->_drop_collations( $target ) );
359944f7 602 if( $c->end->has_rank ) {
176badfe 603 foreach my $rk ( $sourcerank .. $targetrank ) {
414cc046 604 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
605 $c->readings_at_rank( $rk );
606 }
607 }
a1615ee4 608 }
359944f7 609 unless( $self->test_equivalence( $source, $target ) ) {
414cc046 610 $self->_restore_collations( @$mustdrop );
611 return( 0, "Relationship would create witness loop" );
a1615ee4 612 }
22222af9 613 return ( 1, "ok" );
614 }
615}
616
778251a6 617sub _drop_collations {
618 my( $self, $reading ) = @_;
414cc046 619 my @dropped;
778251a6 620 foreach my $n ( $self->graph->neighbors( $reading ) ) {
621 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
414cc046 622 push( @dropped, [ $reading, $n ] );
778251a6 623 $self->del_relationship( $reading, $n );
359944f7 624 #print STDERR "Dropped collation $reading -> $n\n";
778251a6 625 }
626 }
414cc046 627 return @dropped;
628}
629
630sub _restore_collations {
631 my( $self, @vectors ) = @_;
632 foreach my $v ( @vectors ) {
633 try {
634 $self->add_relationship( @$v, { 'type' => 'collated' } );
359944f7 635 #print STDERR "Restored collation @$v\n";
414cc046 636 } catch {
637 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
638 }
639 }
778251a6 640}
641
cc31ebaa 642=head2 filter_collations()
643
644Utility function. Removes any redundant 'collated' relationships from the graph.
645A collated relationship is redundant if the readings in question would occupy
646the same rank regardless of the existence of the relationship.
647
648=cut
649
650sub filter_collations {
651 my $self = shift;
652 my $c = $self->collation;
653 foreach my $r ( 1 .. $c->end->rank - 1 ) {
654 my $anchor;
655 my @need_collations;
656 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
657 next if $rdg->is_meta;
658 my $ip = 0;
659 foreach my $pred ( $rdg->predecessors ) {
660 if( $pred->rank == $r - 1 ) {
661 $ip = 1;
662 $anchor = $rdg unless( $anchor );
663 last;
664 }
665 }
666 push( @need_collations, $rdg ) unless $ip;
667 $c->relations->_drop_collations( "$rdg" );
668 }
669 $anchor
46e1fe14 670 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
671 unless $c->get_relationship( $anchor, $_ ) } @need_collations
cc31ebaa 672 : warn "No anchor found at $r";
673 }
674}
675
7f52eac8 676=head2 related_readings( $reading, $filter )
22222af9 677
678Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 679If $filter is set to a subroutine ref, returns only those related readings where
680$filter( $relationship ) returns a true value.
22222af9 681
682=cut
683
684sub related_readings {
7f52eac8 685 my( $self, $reading, $filter ) = @_;
22222af9 686 my $return_object;
687 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
688 $reading = $reading->id;
689 $return_object = 1;
690 }
c84275ff 691 my @answer;
7f52eac8 692 if( $filter ) {
693 # Backwards compat
694 if( $filter eq 'colocated' ) {
695 $filter = sub { $_[0]->colocated };
696 }
c84275ff 697 my %found = ( $reading => 1 );
698 my $check = [ $reading ];
699 my $iter = 0;
700 while( @$check ) {
c84275ff 701 my $more = [];
702 foreach my $r ( @$check ) {
703 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 704 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 705 push( @$more, $nr ) unless exists $found{$nr};
706 $found{$nr} = 1;
707 }
708 }
709 }
710 $check = $more;
22222af9 711 }
7f52eac8 712 delete $found{$reading};
c84275ff 713 @answer = keys %found;
714 } else {
715 @answer = $self->graph->all_reachable( $reading );
22222af9 716 }
717 if( $return_object ) {
718 my $c = $self->collation;
c84275ff 719 return map { $c->reading( $_ ) } @answer;
22222af9 720 } else {
c84275ff 721 return @answer;
22222af9 722 }
723}
724
725=head2 merge_readings( $kept, $deleted );
726
727Makes a best-effort merge of the relationship links between the given readings, and
728stops tracking the to-be-deleted reading.
729
730=cut
731
732sub merge_readings {
733 my( $self, $kept, $deleted, $combined ) = @_;
734 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
735 # Get the pair of kept / rel
736 my @vector = ( $kept );
737 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
738 next if $vector[0] eq $vector[1]; # Don't add a self loop
739
740 # If kept changes its text, drop the relationship.
741 next if $combined;
742
f222800e 743 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 744 my $rel = $self->get_relationship( @vector );
f222800e 745 next if $rel;
22222af9 746
747 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 748 $rel = $self->get_relationship( @$edge );
749 $self->_set_relationship( $rel, @vector );
22222af9 750 }
176badfe 751 $self->_make_equivalence( $deleted, $kept );
22222af9 752}
753
359944f7 754### Equivalence logic
755
756sub _remove_equivalence_node {
757 my( $self, $node ) = @_;
758 my $group = $self->equivalence( $node );
759 my $nodelist = $self->eqreadings( $group );
760 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
176badfe 761 print STDERR "Removing equivalence $group for $node\n" if $node eq '451,2';
359944f7 762 $self->remove_eqreadings( $group );
763 } elsif( @$nodelist == 1 ) {
764 warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
765 " in group that should have only $node";
766 } else {
176badfe 767 print STDERR "Removing $node from equivalence $group\n" if $node eq '451,2';
359944f7 768 my @newlist = grep { $_ ne $node } @$nodelist;
769 $self->set_eqreadings( $group, \@newlist );
770 $self->remove_equivalence( $node );
771 }
772}
773
774=head2 add_equivalence_edge
775
176badfe 776Add an edge in the equivalence graph corresponding to $source -> $target in the
777collation. Should only be called by Collation.
359944f7 778
779=cut
780
781sub add_equivalence_edge {
782 my( $self, $source, $target ) = @_;
783 my $seq = $self->equivalence( $source );
784 my $teq = $self->equivalence( $target );
176badfe 785 print STDERR "Adding equivalence edge $seq -> $teq for $source -> $target\n"
786 if grep { $_ eq '451,2' } @_;
359944f7 787 $self->equivalence_graph->add_edge( $seq, $teq );
788}
789
176badfe 790=head2 delete_equivalence_edge
359944f7 791
176badfe 792Remove an edge in the equivalence graph corresponding to $source -> $target in the
793collation. Should only be called by Collation.
359944f7 794
795=cut
796
797sub delete_equivalence_edge {
798 my( $self, $source, $target ) = @_;
799 my $seq = $self->equivalence( $source );
800 my $teq = $self->equivalence( $target );
176badfe 801 print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n"
802 if grep { $_ eq '451,2' } @_;
359944f7 803 $self->equivalence_graph->delete_edge( $seq, $teq );
804}
805
806sub _is_disconnected {
807 my $self = shift;
808 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
809 || scalar $self->equivalence_graph->successorless_vertices > 1 );
810}
811
176badfe 812# Equate two readings in the equivalence graph
813sub _make_equivalence {
359944f7 814 my( $self, $source, $target ) = @_;
815 # Get the source equivalent readings
816 my $seq = $self->equivalence( $source );
817 my $teq = $self->equivalence( $target );
818 # Nothing to do if they are already equivalent...
819 return if $seq eq $teq;
176badfe 820 print STDERR "Making equivalence for $source -> $target\n"
821 if grep { $_ eq '451,2' } @_;
359944f7 822 my $sourcepool = $self->eqreadings( $seq );
823 # and add them to the target readings.
176badfe 824 print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n"
825 if grep { $_ eq '451,2' } @_;
359944f7 826 push( @{$self->eqreadings( $teq )}, @$sourcepool );
827 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
828 # Then merge the nodes in the equivalence graph.
829 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
830 $self->equivalence_graph->add_edge( $pred, $teq );
831 }
832 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
833 $self->equivalence_graph->add_edge( $teq, $succ );
834 }
835 $self->equivalence_graph->delete_vertex( $seq );
176badfe 836 # TODO enable this after collation parsing is done
359944f7 837# throw( "Graph got disconnected making $source / $target equivalence" )
838# if $self->_is_disconnected;
839}
840
841=head2 test_equivalence
842
176badfe 843Test whether, if two readings were equated with a 'colocated' relationship,
844the graph would still be valid.
359944f7 845
846=cut
847
848sub test_equivalence {
849 my( $self, $source, $target ) = @_;
850 # Try merging the nodes in the equivalence graph; return a true value if
851 # no cycle is introduced thereby. Restore the original graph first.
852
853 # Keep track of edges we add
854 my %added_pred;
855 my %added_succ;
856 # Get the reading equivalents
857 my $seq = $self->equivalence( $source );
858 my $teq = $self->equivalence( $target );
859 # Maybe this is easy?
860 return 1 if $seq eq $teq;
861
862 # Save the first graph
863 my $checkstr = $self->equivalence_graph->stringify();
864 # Add and save relevant edges
865 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
866 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
867 $added_pred{$pred} = 0;
868 } else {
869 $self->equivalence_graph->add_edge( $pred, $teq );
870 $added_pred{$pred} = 1;
871 }
872 }
873 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
874 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
875 $added_succ{$succ} = 0;
876 } else {
877 $self->equivalence_graph->add_edge( $teq, $succ );
878 $added_succ{$succ} = 1;
879 }
880 }
881 # Delete source equivalent and test
882 $self->equivalence_graph->delete_vertex( $seq );
883 my $ret = !$self->equivalence_graph->has_a_cycle;
884
885 # Restore what we changed
886 $self->equivalence_graph->add_vertex( $seq );
887 foreach my $pred ( keys %added_pred ) {
888 $self->equivalence_graph->add_edge( $pred, $seq );
889 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
890 }
891 foreach my $succ ( keys %added_succ ) {
892 $self->equivalence_graph->add_edge( $seq, $succ );
893 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
894 }
895 unless( $self->equivalence_graph->eq( $checkstr ) ) {
896 warn "GRAPH CHANGED after testing";
897 }
898 # Return our answer
899 return $ret;
900}
901
176badfe 902# Unmake an equivalence link between two readings. Should only be called internally.
903sub _break_equivalence {
359944f7 904 my( $self, $source, $target ) = @_;
905
906 # This is the hard one. Need to reconstruct the equivalence groups without
907 # the given link.
908 my( %sng, %tng );
909 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
910 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
911 # If these groups intersect, they are still connected; do nothing.
912 foreach my $el ( keys %tng ) {
913 if( exists $sng{$el} ) {
176badfe 914 print STDERR "Equivalence break $source / $target is a noop\n"
915 if grep { $_ eq '451,2' } @_;
359944f7 916 return;
917 }
918 }
176badfe 919 print STDERR "Breaking equivalence $source / $target\n"
920 if grep { $_ eq '451,2' } @_;
359944f7 921 # If they don't intersect, then we split the nodes in the graph and in
922 # the hashes. First figure out which group has which name
176badfe 923 my $oldgroup = $self->equivalence( $source ); # same as $target
924 my $keepsource = $sng{$oldgroup};
925 my $newgroup = $keepsource ? $target : $source;
359944f7 926 my( $oldmembers, $newmembers );
176badfe 927 if( $keepsource ) {
359944f7 928 $oldmembers = [ keys %sng ];
929 $newmembers = [ keys %tng ];
930 } else {
931 $oldmembers = [ keys %tng ];
932 $newmembers = [ keys %sng ];
933 }
934
935 # First alter the old group in the hash
936 $self->set_eqreadings( $oldgroup, $oldmembers );
176badfe 937 foreach my $el ( @$oldmembers ) {
938 $self->set_equivalence( $el, $oldgroup );
939 }
359944f7 940
941 # then add the new group back to the hash with its new key
942 $self->set_eqreadings( $newgroup, $newmembers );
943 foreach my $el ( @$newmembers ) {
944 $self->set_equivalence( $el, $newgroup );
945 }
946
947 # Now add the new group back to the equivalence graph
948 $self->equivalence_graph->add_vertex( $newgroup );
949 # ...add the appropriate edges to the source group vertext
950 my $c = $self->collation;
951 foreach my $rdg ( @$newmembers ) {
952 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
953 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
954 }
955 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
956 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
957 }
958 }
959
960 # ...and figure out which edges on the old group vertex to delete.
961 my( %old_pred, %old_succ );
962 foreach my $rdg ( @$oldmembers ) {
963 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
964 $old_pred{$self->equivalence( $rp )} = 1;
965 }
966 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
967 $old_succ{$self->equivalence( $rs )} = 1;
968 }
969 }
970 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
971 unless( $old_pred{$p} ) {
972 $self->equivalence_graph->delete_edge( $p, $oldgroup );
973 }
974 }
975 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
976 unless( $old_succ{$s} ) {
977 $self->equivalence_graph->delete_edge( $oldgroup, $s );
978 }
979 }
176badfe 980 # TODO enable this after collation parsing is done
359944f7 981# throw( "Graph got disconnected breaking $source / $target equivalence" )
982# if $self->_is_disconnected;
983}
984
985sub _find_equiv_without {
986 my( $self, $first, $second ) = @_;
987 my %found = ( $first => 1 );
988 my $check = [ $first ];
989 my $iter = 0;
990 while( @$check ) {
991 my $more = [];
992 foreach my $r ( @$check ) {
993 foreach my $nr ( $self->graph->neighbors( $r ) ) {
994 next if $r eq $second;
995 if( $self->get_relationship( $r, $nr )->colocated ) {
996 push( @$more, $nr ) unless exists $found{$nr};
997 $found{$nr} = 1;
998 }
999 }
1000 }
1001 $check = $more;
1002 }
1003 return keys %found;
1004}
1005
e1083e99 1006=head2 rebuild_equivalence
1007
1008(Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1009adds all readings and edges, then makes an equivalence for all relationships.
1010
1011=cut
1012
1013sub rebuild_equivalence {
1014 my $self = shift;
1015 my $newgraph = Graph->new();
1016 foreach my $r ( $self->collation->readings ) {
1017 $newgraph->add_vertex( $r->id );
1018 }
1019 foreach my $e ( $self->collation->paths ) {
1020 $newgraph->add_edge( @$e );
1021 }
1022 # Set this as the new equivalence graph
1023 $self->_reset_equivalence( $newgraph );
1024
1025 # Now collapse some nodes. This does no testing; it assumes that all
1026 # preexisting relationships are valid.
1027 foreach my $rel ( $self->relationships ) {
1028 my $relobj = $self->get_relationship( $rel );
1029 next unless $relobj && $relobj->colocated;
1030 $self->_make_equivalence( @$rel );
1031 }
1032}
1033
359944f7 1034### Output logic
1035
027d819c 1036sub _as_graphml {
2626f709 1037 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 1038
1039 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1040 $rgraph->setAttribute( 'edgedefault', 'directed' );
1041 $rgraph->setAttribute( 'id', 'relationships', );
1042 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1043 $rgraph->setAttribute( 'parse.edges', 0 );
c84275ff 1044 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1045 $rgraph->setAttribute( 'parse.nodes', 0 );
c84275ff 1046 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1047
1048 # Add the vertices according to their XML IDs
2626f709 1049 my %rdg_lookup = ( reverse %$node_hash );
cc31ebaa 1050 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
826d8773 1051 my @nlist = sort keys( %rdg_lookup );
414cc046 1052 foreach my $n ( @nlist ) {
c84275ff 1053 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1054 $n_el->setAttribute( 'id', $n );
2626f709 1055 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 1056 }
cc31ebaa 1057 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
c84275ff 1058
1059 # Add the relationship edges, with their object information
1060 my $edge_ctr = 0;
1061 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1062 # Add an edge and fill in its relationship info.
a30ca502 1063 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
c84275ff 1064 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1065 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1066 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1067 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1068
3ae5e2ad 1069 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 1070 foreach my $key ( keys %$edge_keys ) {
1071 my $value = $rel_obj->$key;
1072 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1073 if defined $value;
1074 }
c84275ff 1075 }
cc31ebaa 1076 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
c84275ff 1077}
1078
1079sub _by_xmlid {
2626f709 1080 my $tmp_a = $a;
1081 my $tmp_b = $b;
1082 $tmp_a =~ s/\D//g;
1083 $tmp_b =~ s/\D//g;
1084 return $tmp_a <=> $tmp_b;
c84275ff 1085}
1086
1087sub _add_graphml_data {
1088 my( $el, $key, $value ) = @_;
1089 return unless defined $value;
1090 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1091 $data_el->setAttribute( 'key', $key );
1092 $data_el->appendText( $value );
83d5ac3a 1093}
1094
63778331 1095sub throw {
1096 Text::Tradition::Error->throw(
1097 'ident' => 'Relationship error',
1098 'message' => $_[0],
1099 );
1100}
1101
22222af9 1102no Moose;
1103__PACKAGE__->meta->make_immutable;
1104
11051;