implement persistent equivalence graph for relationship tracking
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
CommitLineData
22222af9 1package Text::Tradition::Collation::RelationshipStore;
2
3use strict;
4use warnings;
63778331 5use Text::Tradition::Error;
22222af9 6use Text::Tradition::Collation::Relationship;
a1615ee4 7use TryCatch;
22222af9 8
9use Moose;
10
11=head1 NAME
12
2626f709 13Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14between readings in a given collation
22222af9 15
16=head1 DESCRIPTION
17
18Text::Tradition is a library for representation and analysis of collated
19texts, particularly medieval ones. The RelationshipStore is an internal object
20of the collation, to keep track of the defined relationships (both specific and
21general) between readings.
22
3ae5e2ad 23=begin testing
24
25use Text::Tradition;
ee801e17 26use TryCatch;
3ae5e2ad 27
28use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
ee801e17 30# Add some relationships, and delete them
31
32my $cxfile = 't/data/Collatex-16.xml';
33my $t = Text::Tradition->new(
34 'name' => 'inline',
35 'input' => 'CollateX',
36 'file' => $cxfile,
37 );
38my $c = $t->collation;
39
f8331a4d 40my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
ee801e17 41is( scalar @v1, 1, "Added a single relationship" );
42is( $v1[0]->[0], 'n21', "Got correct node 1" );
43is( $v1[0]->[1], 'n22', "Got correct node 2" );
679f17e1 44my @v2 = $c->add_relationship( 'n24', 'n23',
ee801e17 45 { 'type' => 'spelling', 'scope' => 'global' } );
46is( scalar @v2, 2, "Added a global relationship with two instances" );
47@v1 = $c->del_relationship( 'n22', 'n21' );
48is( scalar @v1, 1, "Deleted first relationship" );
679f17e1 49@v2 = $c->del_relationship( 'n12', 'n13' );
ee801e17 50is( scalar @v2, 2, "Deleted second global relationship" );
681893aa 51my @v3 = $c->del_relationship( 'n1', 'n2' );
52is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
ee801e17 53
3ae5e2ad 54=end testing
55
22222af9 56=head1 METHODS
57
58=head2 new( collation => $collation );
59
60Creates a new relationship store for the given collation.
61
62=cut
63
64has 'collation' => (
65 is => 'ro',
66 isa => 'Text::Tradition::Collation',
67 required => 1,
68 weak_ref => 1,
69 );
70
71has 'scopedrels' => (
72 is => 'ro',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
75 );
76
77has 'graph' => (
78 is => 'ro',
79 isa => 'Graph',
80 default => sub { Graph->new( undirected => 1 ) },
81 handles => {
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
85 },
86 );
87
359944f7 88=head2 equivalence_graph()
89
90Returns an equivalence graph of the collation, in which all readings
91related via a 'colocated' relationship are transformed into a single
92vertex. Can be used to determine the validity of a new relationship.
93
94=cut
95
96has 'equivalence_graph' => (
97 is => 'ro',
98 isa => 'Graph',
99 default => sub { Graph->new() },
100 );
101
102has '_node_equivalences' => (
103 is => 'ro',
104 traits => ['Hash'],
105 handles => {
106 equivalence => 'get',
107 set_equivalence => 'set',
108 remove_equivalence => 'delete',
109 },
110 );
111
112has '_equivalence_readings' => (
113 is => 'ro',
114 traits => ['Hash'],
115 handles => {
116 eqreadings => 'get',
117 set_eqreadings => 'set',
118 remove_eqreadings => 'delete',
119 },
120 );
121
122around add_reading => sub {
123 my $orig = shift;
124 my $self = shift;
125
126 $self->equivalence_graph->add_vertex( @_ );
127 $self->set_equivalence( $_[0], $_[0] );
128 $self->set_eqreadings( $_[0], [ $_[0] ] );
129 $self->$orig( @_ );
130};
131
132around delete_reading => sub {
133 my $orig = shift;
134 my $self = shift;
135
136 $DB::single = 1;
137 $self->_remove_equivalence_node( @_ );
138 $self->$orig( @_ );
139};
140
3ae5e2ad 141=head2 get_relationship
142
143Return the relationship object, if any, that exists between two readings.
144
145=cut
146
147sub get_relationship {
4633f9e4 148 my $self = shift;
149 my @vector;
150 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
151 # Dereference the edge arrayref that was passed.
152 my $edge = shift;
153 @vector = @$edge;
154 } else {
155 @vector = @_;
156 }
3ae5e2ad 157 my $relationship;
158 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
159 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
ca6e6095 160 }
3ae5e2ad 161 return $relationship;
162}
163
164sub _set_relationship {
165 my( $self, $relationship, @vector ) = @_;
166 $self->graph->add_edge( @vector );
167 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
359944f7 168 $self->make_equivalence( @vector ) if $relationship->colocated;
3ae5e2ad 169}
a1615ee4 170
22222af9 171=head2 create
172
173Create a new relationship with the given options and return it.
174Warn and return undef if the relationship cannot be created.
175
176=cut
177
178sub create {
179 my( $self, $options ) = @_;
180 # Check to see if a relationship exists between the two given readings
181 my $source = delete $options->{'orig_a'};
182 my $target = delete $options->{'orig_b'};
3ae5e2ad 183 my $rel = $self->get_relationship( $source, $target );
184 if( $rel ) {
3d14b48e 185 if( $rel->type eq 'collated' ) {
186 # Always replace a 'collated' relationship with a more descriptive
187 # one, if asked.
188 $self->del_relationship( $source, $target );
189 } elsif( $rel->type ne $options->{'type'} ) {
63778331 190 throw( "Another relationship of type " . $rel->type
191 . " already exists between $source and $target" );
22222af9 192 } else {
193 return $rel;
194 }
195 }
196
197 # Check to see if a nonlocal relationship is defined for the two readings
198 $rel = $self->scoped_relationship( $options->{'reading_a'},
199 $options->{'reading_b'} );
200 if( $rel && $rel->type eq $options->{'type'} ) {
201 return $rel;
202 } elsif( $rel ) {
63778331 203 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
22222af9 204 } else {
205 $rel = Text::Tradition::Collation::Relationship->new( $options );
206 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
207 return $rel;
208 }
209}
210
211=head2 add_scoped_relationship( $rel )
212
213Keep track of relationships defined between specific readings that are scoped
214non-locally. Key on whichever reading occurs first alphabetically.
215
216=cut
217
218sub add_scoped_relationship {
219 my( $self, $rel ) = @_;
f222800e 220 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
221 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
222 my $r = $self->scoped_relationship( $rdga, $rdgb );
22222af9 223 if( $r ) {
224 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
f222800e 225 $r->type, $rdga, $rdgb );
22222af9 226 return;
227 }
f222800e 228 my( $first, $second ) = sort ( $rdga, $rdgb );
229 $self->scopedrels->{$first}->{$second} = $rel;
22222af9 230}
231
232=head2 scoped_relationship( $reading_a, $reading_b )
233
234Returns the general (document-level or global) relationship that has been defined
235between the two reading strings. Returns undef if there is no general relationship.
236
237=cut
238
239sub scoped_relationship {
240 my( $self, $rdga, $rdgb ) = @_;
241 my( $first, $second ) = sort( $rdga, $rdgb );
242 if( exists $self->scopedrels->{$first}->{$second} ) {
243 return $self->scopedrels->{$first}->{$second};
244 } else {
245 return undef;
246 }
247}
248
249=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
250
251Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
252for the possible options) between the readings given in $source and $target. Sets
253up a scoped relationship between $sourcetext and $targettext if the relationship is
254scoped non-locally.
255
256Returns a status boolean and a list of all reading pairs connected by the call to
257add_relationship.
258
6d381462 259=begin testing
260
261use Text::Tradition;
262use TryCatch;
263
264my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
265# Test 1: try to equate nodes that are prevented with an intermediate collation
266ok( $t1, "Parsed test fragment file" );
267my $c1 = $t1->collation;
268my $trel = $c1->get_relationship( '9,2', '9,3' );
269is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
270 "Troublesome relationship exists" );
271is( $trel->type, 'collated', "Troublesome relationship is a collation" );
272
273# Try to make the link we want
274try {
275 $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
276 ok( 1, "Added cross-collation relationship as expected" );
277} catch {
278 ok( 0, "Existing collation blocked equivalence relationship" );
279}
280
281try {
282 $c1->calculate_ranks();
283 ok( 1, "Successfully calculated ranks" );
284} catch {
285 ok( 0, "Collation now has a cycle" );
286}
287
359944f7 288# Now attempt merge of an identical reading
289try {
290 $c1->merge_readings( '9,3', '11,5' );
291 ok( 1, "Successfully merged reading 'pontifex'" );
292} catch ( Text::Tradition::Error $e ) {
293 ok( 0, "Merge of mergeable readings failed: $e->message" );
294
295}
296
6d381462 297# Test 2: try to equate nodes that are prevented with a real intermediate
298# equivalence
6d381462 299my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
6d381462 300my $c2 = $t2->collation;
301$c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
302my $trel2 = $c2->get_relationship( '9,2', '9,3' );
303is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
304 "Created blocking relationship" );
305is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
306# This time the link ought to fail
307try {
308 $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
414cc046 309 ok( 0, "Added cross-equivalent bad relationship" );
6d381462 310} catch {
414cc046 311 ok( 1, "Existing equivalence blocked crossing relationship" );
6d381462 312}
313
314try {
315 $c2->calculate_ranks();
316 ok( 1, "Successfully calculated ranks" );
317} catch {
318 ok( 0, "Collation now has a cycle" );
319}
320
cc31ebaa 321# Test 3: make a straightforward pair of transpositions.
322my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
323# Test 1: try to equate nodes that are prevented with an intermediate collation
324my $c3 = $t3->collation;
325try {
326 $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
327 ok( 1, "Added straightforward transposition" );
328} catch {
329 ok( 0, "Failed to add normal transposition" );
330}
331try {
332 $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
333 ok( 1, "Added straightforward transposition complement" );
334} catch {
335 ok( 0, "Failed to add normal transposition complement" );
336}
337
338# Test 4: try to make a transposition that could be a parallel.
339try {
340 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
341 ok( 0, "Added bad colocated transposition" );
342} catch {
343 ok( 1, "Prevented bad colocated transposition" );
344}
345
346# Test 5: make the parallel, and then make the transposition again.
347try {
348 $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
349 ok( 1, "Equated identical readings for transposition" );
350} catch {
351 ok( 0, "Failed to equate identical readings" );
352}
353try {
354 $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
355 ok( 1, "Added straightforward transposition complement" );
356} catch {
357 ok( 0, "Failed to add normal transposition complement" );
358}
359
6d381462 360=end testing
361
22222af9 362=cut
363
364sub add_relationship {
414cc046 365 my( $self, $source, $target, $options ) = @_;
366 my $c = $self->collation;
22222af9 367
359944f7 368 throw( "Adding self relationship at $source" ) if $source eq $target;
ca6e6095 369 my $relationship;
370 my $thispaironly;
414cc046 371 my $droppedcolls = [];
ca6e6095 372 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
373 $relationship = $options;
374 $thispaironly = 1; # If existing rel, set only where asked.
375 } else {
376 # Check the options
377 $options->{'scope'} = 'local' unless $options->{'scope'};
bf6e338d 378 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
8d5c8893 379 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
ca6e6095 380
414cc046 381 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
382 $options->{'type'}, $droppedcolls );
ca6e6095 383 unless( $is_valid ) {
384 throw( "Invalid relationship: $reason" );
385 }
386
387 # Try to create the relationship object.
414cc046 388 $options->{'reading_a'} = $c->reading( $source )->text;
389 $options->{'reading_b'} = $c->reading( $target )->text;
ca6e6095 390 $options->{'orig_a'} = $source;
391 $options->{'orig_b'} = $target;
0ac5e750 392 if( $options->{'scope'} ne 'local' ) {
393 # Is there a relationship with this a & b already?
f222800e 394 # Case-insensitive for non-orthographics.
395 my $rdga = $options->{'type'} eq 'orthographic'
396 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
397 my $rdgb = $options->{'type'} eq 'orthographic'
398 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
399 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
0ac5e750 400 if( $otherrel && $otherrel->type eq $options->{type}
401 && $otherrel->scope eq $options->{scope} ) {
402 warn "Applying existing scoped relationship";
403 $relationship = $otherrel;
404 }
405 }
406 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
22222af9 407 }
ca6e6095 408
22222af9 409
410 # Find all the pairs for which we need to set the relationship.
414cc046 411 my @vectors;
ca6e6095 412 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
bf6e338d 413 push( @vectors, $self->_find_applicable( $relationship ) );
f222800e 414 }
bf6e338d 415
22222af9 416 # Now set the relationship(s).
417 my @pairs_set;
414cc046 418 my $rel = $self->get_relationship( $source, $target );
cc31ebaa 419 my $skip;
414cc046 420 if( $rel && $rel ne $relationship ) {
421 if( $rel->nonlocal ) {
422 throw( "Found conflicting relationship at $source - $target" );
423 } elsif( $rel->type ne 'collated' ) {
424 # Replace a collation relationship; leave any other sort in place.
425 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
426 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
427 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
428 warn sprintf( "Not overriding local relationship %s with global %s "
429 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
430 $source, $target, $rel->reading_a, $rel->reading_b );
cc31ebaa 431 $skip = 1;
414cc046 432 }
433 }
434 }
cc31ebaa 435 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
414cc046 436 push( @pairs_set, [ $source, $target ] );
437
438 # Set any additional relationships that might be in @vectors.
22222af9 439 foreach my $v ( @vectors ) {
414cc046 440 next if $v->[0] eq $source && $v->[1] eq $target;
441 next if $v->[1] eq $source && $v->[0] eq $target;
442 my @added = $self->add_relationship( @$v, $relationship );
443 push( @pairs_set, @added );
22222af9 444 }
445
414cc046 446 # Finally, restore whatever collations we can, and return.
447 $self->_restore_collations( @$droppedcolls );
63778331 448 return @pairs_set;
22222af9 449}
450
9d829138 451=head2 del_scoped_relationship( $reading_a, $reading_b )
452
453Returns the general (document-level or global) relationship that has been defined
454between the two reading strings. Returns undef if there is no general relationship.
455
456=cut
457
458sub del_scoped_relationship {
459 my( $self, $rdga, $rdgb ) = @_;
460 my( $first, $second ) = sort( $rdga, $rdgb );
461 return delete $self->scopedrels->{$first}->{$second};
462}
463
bf6e338d 464sub _find_applicable {
465 my( $self, $rel ) = @_;
466 my $c = $self->collation;
467 # TODO Someday we might use a case sensitive language.
468 my $lang = $c->tradition->language;
469 my @vectors;
470 my @identical_readings;
471 if( $rel->type eq 'orthographic' ) {
472 @identical_readings = grep { $_->text eq $rel->reading_a }
473 $c->readings;
474 } else {
475 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
476 $c->readings;
477 }
478 foreach my $ir ( @identical_readings ) {
479 my @itarget;
480 if( $rel->type eq 'orthographic' ) {
481 @itarget = grep { $_->rank == $ir->rank
482 && $_->text eq $rel->reading_b } $c->readings;
483 } else {
484 @itarget = grep { $_->rank == $ir->rank
485 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
486 }
487 if( @itarget ) {
488 # Warn if there is more than one hit with no orth link between them.
489 my $itmain = shift @itarget;
490 if( @itarget ) {
491 my %all_targets;
492 map { $all_targets{$_} = 1 } @itarget;
493 map { delete $all_targets{$_} }
494 $self->related_readings( $itmain,
495 sub { $_[0]->type eq 'orthographic' } );
496 warn "More than one unrelated reading with text " . $itmain->text
497 . " at rank " . $ir->rank . "!" if keys %all_targets;
498 }
499 push( @vectors, [ $ir->id, $itmain->id ] );
500 }
501 }
502 return @vectors;
503}
504
ee801e17 505=head2 del_relationship( $source, $target )
506
507Removes the relationship between the given readings. If the relationship is
508non-local, removes the relationship everywhere in the graph.
509
510=cut
511
512sub del_relationship {
513 my( $self, $source, $target ) = @_;
514 my $rel = $self->get_relationship( $source, $target );
681893aa 515 return () unless $rel; # Nothing to delete; return an empty set.
359944f7 516 my $colo = $rel->colocated;
ee801e17 517 my @vectors = ( [ $source, $target ] );
359944f7 518 $self->_remove_relationship( $colo, $source, $target );
ee801e17 519 if( $rel->nonlocal ) {
520 # Remove the relationship wherever it occurs.
9d829138 521 # Remove the relationship wherever it occurs.
ee801e17 522 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
523 $self->relationships;
524 foreach my $re ( @rel_edges ) {
359944f7 525 $self->_remove_relationship( $colo, @$re );
ee801e17 526 push( @vectors, $re );
527 }
9d829138 528 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
ee801e17 529 }
530 return @vectors;
531}
532
ca6e6095 533sub _remove_relationship {
359944f7 534 my( $self, $equiv, @vector ) = @_;
ca6e6095 535 $self->graph->delete_edge( @vector );
359944f7 536 $self->break_equivalence( @vector ) if $equiv;
ca6e6095 537}
538
22222af9 539=head2 relationship_valid( $source, $target, $type )
540
541Checks whether a relationship of type $type may exist between the readings given
542in $source and $target. Returns a tuple of ( status, message ) where status is
543a yes/no boolean and, if the answer is no, message gives the reason why.
544
545=cut
546
547sub relationship_valid {
414cc046 548 my( $self, $source, $target, $rel, $mustdrop ) = @_;
549 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
22222af9 550 my $c = $self->collation;
551 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
552 # Check that the two readings do (for a repetition) or do not (for
553 # a transposition) appear in the same witness.
2f39215b 554 # TODO this might be called before witness paths are set...
22222af9 555 my %seen_wits;
556 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
557 foreach my $w ( $c->reading_witnesses( $target ) ) {
558 if( $seen_wits{$w} ) {
559 return ( 0, "Readings both occur in witness $w" )
560 if $rel eq 'transposition';
561 return ( 1, "ok" ) if $rel eq 'repetition';
d6936dea 562 }
22222af9 563 }
abadc997 564 return ( 0, "Readings occur only in distinct witnesses" )
565 if $rel eq 'repetition';
566 }
567 if ( $rel eq 'transposition' ) {
568 # We also need to check both that the readings occur in distinct
569 # witnesses, and that they are not in the same place. That is,
570 # proposing to link them should cause a witness loop.
359944f7 571 if( $self->test_equivalence( $source, $target ) ) {
abadc997 572 return ( 0, "Readings appear to be colocated, not transposed" );
359944f7 573 } else {
574 return ( 1, "ok" );
abadc997 575 }
576
577 } elsif( $rel ne 'repetition' ) {
22222af9 578 # Check that linking the source and target in a relationship won't lead
414cc046 579 # to a path loop for any witness.
580 # First, drop/stash any collations that might interfere
581 my $sourceobj = $c->reading( $source );
582 my $targetobj = $c->reading( $target );
583 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
584 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
585 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
586 push( @$mustdrop, $self->_drop_collations( $source ) );
587 push( @$mustdrop, $self->_drop_collations( $target ) );
359944f7 588 if( $c->end->has_rank ) {
589 my $cpred = $c->common_predecessor( $source, $target );
590 my $csucc = $c->common_successor( $source, $target );
591 foreach my $rk ( $cpred->rank+1 .. $csucc->rank-1 ) {
414cc046 592 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
593 $c->readings_at_rank( $rk );
594 }
595 }
a1615ee4 596 }
359944f7 597 unless( $self->test_equivalence( $source, $target ) ) {
414cc046 598 $self->_restore_collations( @$mustdrop );
599 return( 0, "Relationship would create witness loop" );
a1615ee4 600 }
22222af9 601 return ( 1, "ok" );
602 }
603}
604
778251a6 605sub _drop_collations {
606 my( $self, $reading ) = @_;
414cc046 607 my @dropped;
778251a6 608 foreach my $n ( $self->graph->neighbors( $reading ) ) {
609 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
414cc046 610 push( @dropped, [ $reading, $n ] );
778251a6 611 $self->del_relationship( $reading, $n );
359944f7 612 #print STDERR "Dropped collation $reading -> $n\n";
778251a6 613 }
614 }
414cc046 615 return @dropped;
616}
617
618sub _restore_collations {
619 my( $self, @vectors ) = @_;
620 foreach my $v ( @vectors ) {
621 try {
622 $self->add_relationship( @$v, { 'type' => 'collated' } );
359944f7 623 #print STDERR "Restored collation @$v\n";
414cc046 624 } catch {
625 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
626 }
627 }
778251a6 628}
629
cc31ebaa 630=head2 filter_collations()
631
632Utility function. Removes any redundant 'collated' relationships from the graph.
633A collated relationship is redundant if the readings in question would occupy
634the same rank regardless of the existence of the relationship.
635
636=cut
637
638sub filter_collations {
639 my $self = shift;
640 my $c = $self->collation;
641 foreach my $r ( 1 .. $c->end->rank - 1 ) {
642 my $anchor;
643 my @need_collations;
644 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
645 next if $rdg->is_meta;
646 my $ip = 0;
647 foreach my $pred ( $rdg->predecessors ) {
648 if( $pred->rank == $r - 1 ) {
649 $ip = 1;
650 $anchor = $rdg unless( $anchor );
651 last;
652 }
653 }
654 push( @need_collations, $rdg ) unless $ip;
655 $c->relations->_drop_collations( "$rdg" );
656 }
657 $anchor
46e1fe14 658 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
659 unless $c->get_relationship( $anchor, $_ ) } @need_collations
cc31ebaa 660 : warn "No anchor found at $r";
661 }
662}
663
7f52eac8 664=head2 related_readings( $reading, $filter )
22222af9 665
666Returns a list of readings that are connected via relationship links to $reading.
7f52eac8 667If $filter is set to a subroutine ref, returns only those related readings where
668$filter( $relationship ) returns a true value.
22222af9 669
670=cut
671
672sub related_readings {
7f52eac8 673 my( $self, $reading, $filter ) = @_;
22222af9 674 my $return_object;
675 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
676 $reading = $reading->id;
677 $return_object = 1;
678 }
c84275ff 679 my @answer;
7f52eac8 680 if( $filter ) {
681 # Backwards compat
682 if( $filter eq 'colocated' ) {
683 $filter = sub { $_[0]->colocated };
684 }
c84275ff 685 my %found = ( $reading => 1 );
686 my $check = [ $reading ];
687 my $iter = 0;
688 while( @$check ) {
c84275ff 689 my $more = [];
690 foreach my $r ( @$check ) {
691 foreach my $nr ( $self->graph->neighbors( $r ) ) {
7f52eac8 692 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
c84275ff 693 push( @$more, $nr ) unless exists $found{$nr};
694 $found{$nr} = 1;
695 }
696 }
697 }
698 $check = $more;
22222af9 699 }
7f52eac8 700 delete $found{$reading};
c84275ff 701 @answer = keys %found;
702 } else {
703 @answer = $self->graph->all_reachable( $reading );
22222af9 704 }
705 if( $return_object ) {
706 my $c = $self->collation;
c84275ff 707 return map { $c->reading( $_ ) } @answer;
22222af9 708 } else {
c84275ff 709 return @answer;
22222af9 710 }
711}
712
713=head2 merge_readings( $kept, $deleted );
714
715Makes a best-effort merge of the relationship links between the given readings, and
716stops tracking the to-be-deleted reading.
717
718=cut
719
720sub merge_readings {
721 my( $self, $kept, $deleted, $combined ) = @_;
722 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
723 # Get the pair of kept / rel
724 my @vector = ( $kept );
725 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
726 next if $vector[0] eq $vector[1]; # Don't add a self loop
727
728 # If kept changes its text, drop the relationship.
729 next if $combined;
730
f222800e 731 # If kept / rel already has a relationship, just keep the old
3ae5e2ad 732 my $rel = $self->get_relationship( @vector );
f222800e 733 next if $rel;
22222af9 734
735 # Otherwise, adopt the relationship that would be deleted.
3ae5e2ad 736 $rel = $self->get_relationship( @$edge );
737 $self->_set_relationship( $rel, @vector );
22222af9 738 }
359944f7 739 $self->make_equivalence( $deleted, $kept );
22222af9 740}
741
359944f7 742### Equivalence logic
743
744sub _remove_equivalence_node {
745 my( $self, $node ) = @_;
746 my $group = $self->equivalence( $node );
747 my $nodelist = $self->eqreadings( $group );
748 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
749 #print STDERR "Removing equivalence $group for $node\n";
750 $self->remove_eqreadings( $group );
751 } elsif( @$nodelist == 1 ) {
752 warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
753 " in group that should have only $node";
754 } else {
755 #print STDERR "Removing $node from equivalence $group\n";
756 my @newlist = grep { $_ ne $node } @$nodelist;
757 $self->set_eqreadings( $group, \@newlist );
758 $self->remove_equivalence( $node );
759 }
760}
761
762=head2 add_equivalence_edge
763
764Return the relationship object, if any, that exists between two readings.
765
766=cut
767
768sub add_equivalence_edge {
769 my( $self, $source, $target ) = @_;
770 my $seq = $self->equivalence( $source );
771 my $teq = $self->equivalence( $target );
772 #print STDERR "Adding equivalence edge $seq -> $teq for $source -> $target\n";
773 $self->equivalence_graph->add_edge( $seq, $teq );
774}
775
776=head2 add_equivalence_edge
777
778Return the relationship object, if any, that exists between two readings.
779
780=cut
781
782sub delete_equivalence_edge {
783 my( $self, $source, $target ) = @_;
784 my $seq = $self->equivalence( $source );
785 my $teq = $self->equivalence( $target );
786 #print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n";
787 $self->equivalence_graph->delete_edge( $seq, $teq );
788}
789
790sub _is_disconnected {
791 my $self = shift;
792 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
793 || scalar $self->equivalence_graph->successorless_vertices > 1 );
794}
795
796=head2 make_equivalence
797
798Equate two readings in the equivalence graph. Should only be called internally.
799
800=cut
801
802sub make_equivalence {
803 my( $self, $source, $target ) = @_;
804 # Get the source equivalent readings
805 my $seq = $self->equivalence( $source );
806 my $teq = $self->equivalence( $target );
807 # Nothing to do if they are already equivalent...
808 return if $seq eq $teq;
809 #print STDERR "Making equivalence for $source -> $target\n";
810 my $sourcepool = $self->eqreadings( $seq );
811 # and add them to the target readings.
812 # print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n";
813 push( @{$self->eqreadings( $teq )}, @$sourcepool );
814 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
815 # Then merge the nodes in the equivalence graph.
816 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
817 $self->equivalence_graph->add_edge( $pred, $teq );
818 }
819 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
820 $self->equivalence_graph->add_edge( $teq, $succ );
821 }
822 $self->equivalence_graph->delete_vertex( $seq );
823# throw( "Graph got disconnected making $source / $target equivalence" )
824# if $self->_is_disconnected;
825}
826
827=head2 test_equivalence
828
829Test whether, if two readings were equated with a relationship, the graph would
830still be valid.
831
832=cut
833
834sub test_equivalence {
835 my( $self, $source, $target ) = @_;
836 # Try merging the nodes in the equivalence graph; return a true value if
837 # no cycle is introduced thereby. Restore the original graph first.
838
839 # Keep track of edges we add
840 my %added_pred;
841 my %added_succ;
842 # Get the reading equivalents
843 my $seq = $self->equivalence( $source );
844 my $teq = $self->equivalence( $target );
845 # Maybe this is easy?
846 return 1 if $seq eq $teq;
847
848 # Save the first graph
849 my $checkstr = $self->equivalence_graph->stringify();
850 # Add and save relevant edges
851 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
852 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
853 $added_pred{$pred} = 0;
854 } else {
855 $self->equivalence_graph->add_edge( $pred, $teq );
856 $added_pred{$pred} = 1;
857 }
858 }
859 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
860 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
861 $added_succ{$succ} = 0;
862 } else {
863 $self->equivalence_graph->add_edge( $teq, $succ );
864 $added_succ{$succ} = 1;
865 }
866 }
867 # Delete source equivalent and test
868 $self->equivalence_graph->delete_vertex( $seq );
869 my $ret = !$self->equivalence_graph->has_a_cycle;
870
871 # Restore what we changed
872 $self->equivalence_graph->add_vertex( $seq );
873 foreach my $pred ( keys %added_pred ) {
874 $self->equivalence_graph->add_edge( $pred, $seq );
875 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
876 }
877 foreach my $succ ( keys %added_succ ) {
878 $self->equivalence_graph->add_edge( $seq, $succ );
879 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
880 }
881 unless( $self->equivalence_graph->eq( $checkstr ) ) {
882 warn "GRAPH CHANGED after testing";
883 }
884 # Return our answer
885 return $ret;
886}
887
888=head2 break_equivalence
889
890Unmake an equivalence link between two readings. Should only be called internally.
891
892=cut
893
894sub break_equivalence {
895 my( $self, $source, $target ) = @_;
896
897 # This is the hard one. Need to reconstruct the equivalence groups without
898 # the given link.
899 my( %sng, %tng );
900 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
901 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
902 # If these groups intersect, they are still connected; do nothing.
903 foreach my $el ( keys %tng ) {
904 if( exists $sng{$el} ) {
905 #print STDERR "Equivalence break $source / $target is a noop\n";
906 return;
907 }
908 }
909 #print STDERR "Breaking equivalence $source / $target\n";
910 # If they don't intersect, then we split the nodes in the graph and in
911 # the hashes. First figure out which group has which name
912 my $oldgroup = $self->equivalence( $source ); # eq for $target
913 my $swapped = $oldgroup eq $source;
914 my $newgroup = $swapped ? $target : $source;
915 my( $oldmembers, $newmembers );
916 if( $swapped ) {
917 $oldmembers = [ keys %sng ];
918 $newmembers = [ keys %tng ];
919 } else {
920 $oldmembers = [ keys %tng ];
921 $newmembers = [ keys %sng ];
922 }
923
924 # First alter the old group in the hash
925 $self->set_eqreadings( $oldgroup, $oldmembers );
926
927 # then add the new group back to the hash with its new key
928 $self->set_eqreadings( $newgroup, $newmembers );
929 foreach my $el ( @$newmembers ) {
930 $self->set_equivalence( $el, $newgroup );
931 }
932
933 # Now add the new group back to the equivalence graph
934 $self->equivalence_graph->add_vertex( $newgroup );
935 # ...add the appropriate edges to the source group vertext
936 my $c = $self->collation;
937 foreach my $rdg ( @$newmembers ) {
938 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
939 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
940 }
941 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
942 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
943 }
944 }
945
946 # ...and figure out which edges on the old group vertex to delete.
947 my( %old_pred, %old_succ );
948 foreach my $rdg ( @$oldmembers ) {
949 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
950 $old_pred{$self->equivalence( $rp )} = 1;
951 }
952 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
953 $old_succ{$self->equivalence( $rs )} = 1;
954 }
955 }
956 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
957 unless( $old_pred{$p} ) {
958 $self->equivalence_graph->delete_edge( $p, $oldgroup );
959 }
960 }
961 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
962 unless( $old_succ{$s} ) {
963 $self->equivalence_graph->delete_edge( $oldgroup, $s );
964 }
965 }
966# throw( "Graph got disconnected breaking $source / $target equivalence" )
967# if $self->_is_disconnected;
968}
969
970sub _find_equiv_without {
971 my( $self, $first, $second ) = @_;
972 my %found = ( $first => 1 );
973 my $check = [ $first ];
974 my $iter = 0;
975 while( @$check ) {
976 my $more = [];
977 foreach my $r ( @$check ) {
978 foreach my $nr ( $self->graph->neighbors( $r ) ) {
979 next if $r eq $second;
980 if( $self->get_relationship( $r, $nr )->colocated ) {
981 push( @$more, $nr ) unless exists $found{$nr};
982 $found{$nr} = 1;
983 }
984 }
985 }
986 $check = $more;
987 }
988 return keys %found;
989}
990
991### Output logic
992
027d819c 993sub _as_graphml {
2626f709 994 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
c84275ff 995
996 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
997 $rgraph->setAttribute( 'edgedefault', 'directed' );
998 $rgraph->setAttribute( 'id', 'relationships', );
999 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
cc31ebaa 1000 $rgraph->setAttribute( 'parse.edges', 0 );
c84275ff 1001 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
cc31ebaa 1002 $rgraph->setAttribute( 'parse.nodes', 0 );
c84275ff 1003 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1004
1005 # Add the vertices according to their XML IDs
2626f709 1006 my %rdg_lookup = ( reverse %$node_hash );
cc31ebaa 1007 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
826d8773 1008 my @nlist = sort keys( %rdg_lookup );
414cc046 1009 foreach my $n ( @nlist ) {
c84275ff 1010 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1011 $n_el->setAttribute( 'id', $n );
2626f709 1012 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
c84275ff 1013 }
cc31ebaa 1014 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
c84275ff 1015
1016 # Add the relationship edges, with their object information
1017 my $edge_ctr = 0;
1018 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1019 # Add an edge and fill in its relationship info.
a30ca502 1020 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
c84275ff 1021 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1022 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1023 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1024 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1025
3ae5e2ad 1026 my $rel_obj = $self->get_relationship( @$e );
bbd064a9 1027 foreach my $key ( keys %$edge_keys ) {
1028 my $value = $rel_obj->$key;
1029 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1030 if defined $value;
1031 }
c84275ff 1032 }
cc31ebaa 1033 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
c84275ff 1034}
1035
1036sub _by_xmlid {
2626f709 1037 my $tmp_a = $a;
1038 my $tmp_b = $b;
1039 $tmp_a =~ s/\D//g;
1040 $tmp_b =~ s/\D//g;
1041 return $tmp_a <=> $tmp_b;
c84275ff 1042}
1043
1044sub _add_graphml_data {
1045 my( $el, $key, $value ) = @_;
1046 return unless defined $value;
1047 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1048 $data_el->setAttribute( 'key', $key );
1049 $data_el->appendText( $value );
83d5ac3a 1050}
1051
63778331 1052sub throw {
1053 Text::Tradition::Error->throw(
1054 'ident' => 'Relationship error',
1055 'message' => $_[0],
1056 );
1057}
1058
22222af9 1059no Moose;
1060__PACKAGE__->meta->make_immutable;
1061
10621;