implement persistent equivalence graph for relationship tracking
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
53
54 =end testing
55
56 =head1 METHODS
57
58 =head2 new( collation => $collation );
59
60 Creates a new relationship store for the given collation.
61
62 =cut
63
64 has 'collation' => (
65         is => 'ro',
66         isa => 'Text::Tradition::Collation',
67         required => 1,
68         weak_ref => 1,
69         );
70
71 has 'scopedrels' => (
72         is => 'ro',
73         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74         default => sub { {} },
75         );
76
77 has 'graph' => (
78         is => 'ro',
79         isa => 'Graph',
80         default => sub { Graph->new( undirected => 1 ) },
81     handles => {
82         relationships => 'edges',
83         add_reading => 'add_vertex',
84         delete_reading => 'delete_vertex',
85     },
86         );
87         
88 =head2 equivalence_graph()
89
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship. 
93
94 =cut
95
96 has 'equivalence_graph' => (
97         is => 'ro',
98         isa => 'Graph',
99         default => sub { Graph->new() },
100         );
101         
102 has '_node_equivalences' => (
103         is => 'ro',
104         traits => ['Hash'],
105         handles => {
106                 equivalence => 'get',
107                 set_equivalence => 'set',
108                 remove_equivalence => 'delete',
109         },
110         );
111
112 has '_equivalence_readings' => (
113         is => 'ro',
114         traits => ['Hash'],
115         handles => {
116                 eqreadings => 'get',
117                 set_eqreadings => 'set',
118                 remove_eqreadings => 'delete',
119         },
120         );
121         
122 around add_reading => sub {
123         my $orig = shift;
124         my $self = shift;
125         
126         $self->equivalence_graph->add_vertex( @_ );
127         $self->set_equivalence( $_[0], $_[0] );
128         $self->set_eqreadings( $_[0], [ $_[0] ] );
129         $self->$orig( @_ );
130 };
131
132 around delete_reading => sub {
133         my $orig = shift;
134         my $self = shift;
135         
136         $DB::single = 1;
137         $self->_remove_equivalence_node( @_ );
138         $self->$orig( @_ );
139 };
140
141 =head2 get_relationship
142
143 Return the relationship object, if any, that exists between two readings.
144
145 =cut
146
147 sub get_relationship {
148         my $self = shift;
149         my @vector;
150         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
151                 # Dereference the edge arrayref that was passed.
152                 my $edge = shift;
153                 @vector = @$edge;
154         } else {
155                 @vector = @_;
156         }
157         my $relationship;
158         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
159                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
160         } 
161         return $relationship;
162 }
163
164 sub _set_relationship {
165         my( $self, $relationship, @vector ) = @_;
166         $self->graph->add_edge( @vector );
167         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
168         $self->make_equivalence( @vector ) if $relationship->colocated;
169 }
170
171 =head2 create
172
173 Create a new relationship with the given options and return it.
174 Warn and return undef if the relationship cannot be created.
175
176 =cut
177
178 sub create {
179         my( $self, $options ) = @_;
180         # Check to see if a relationship exists between the two given readings
181         my $source = delete $options->{'orig_a'};
182         my $target = delete $options->{'orig_b'};
183         my $rel = $self->get_relationship( $source, $target );
184         if( $rel ) {
185                 if( $rel->type eq 'collated' ) {
186                         # Always replace a 'collated' relationship with a more descriptive
187                         # one, if asked.
188                         $self->del_relationship( $source, $target );
189                 } elsif( $rel->type ne $options->{'type'} ) {
190                         throw( "Another relationship of type " . $rel->type 
191                                 . " already exists between $source and $target" );
192                 } else {
193                         return $rel;
194                 }
195         }
196         
197         # Check to see if a nonlocal relationship is defined for the two readings
198         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
199                 $options->{'reading_b'} );
200         if( $rel && $rel->type eq $options->{'type'} ) {
201                 return $rel;
202         } elsif( $rel ) {
203                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
204         } else {
205                 $rel = Text::Tradition::Collation::Relationship->new( $options );
206                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
207                 return $rel;
208         }
209 }
210
211 =head2 add_scoped_relationship( $rel )
212
213 Keep track of relationships defined between specific readings that are scoped
214 non-locally.  Key on whichever reading occurs first alphabetically.
215
216 =cut
217
218 sub add_scoped_relationship {
219         my( $self, $rel ) = @_;
220         my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
221         my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );      
222         my $r = $self->scoped_relationship( $rdga, $rdgb );
223         if( $r ) {
224                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
225                         $r->type, $rdga, $rdgb );
226                 return;
227         }
228         my( $first, $second ) = sort ( $rdga, $rdgb );
229         $self->scopedrels->{$first}->{$second} = $rel;
230 }
231
232 =head2 scoped_relationship( $reading_a, $reading_b )
233
234 Returns the general (document-level or global) relationship that has been defined 
235 between the two reading strings. Returns undef if there is no general relationship.
236
237 =cut
238
239 sub scoped_relationship {
240         my( $self, $rdga, $rdgb ) = @_;
241         my( $first, $second ) = sort( $rdga, $rdgb );
242         if( exists $self->scopedrels->{$first}->{$second} ) {
243                 return $self->scopedrels->{$first}->{$second};
244         } else {
245                 return undef;
246         }
247 }
248
249 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
250
251 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
252 for the possible options) between the readings given in $source and $target.  Sets
253 up a scoped relationship between $sourcetext and $targettext if the relationship is
254 scoped non-locally.
255
256 Returns a status boolean and a list of all reading pairs connected by the call to
257 add_relationship.
258
259 =begin testing
260
261 use Text::Tradition;
262 use TryCatch;
263
264 my $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
265 # Test 1: try to equate nodes that are prevented with an intermediate collation
266 ok( $t1, "Parsed test fragment file" );
267 my $c1 = $t1->collation;
268 my $trel = $c1->get_relationship( '9,2', '9,3' );
269 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
270         "Troublesome relationship exists" );
271 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
272
273 # Try to make the link we want
274 try {
275         $c1->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
276         ok( 1, "Added cross-collation relationship as expected" );
277 } catch {
278         ok( 0, "Existing collation blocked equivalence relationship" );
279 }
280
281 try {
282         $c1->calculate_ranks();
283         ok( 1, "Successfully calculated ranks" );
284 } catch {
285         ok( 0, "Collation now has a cycle" );
286 }
287
288 # Now attempt merge of an identical reading
289 try {
290         $c1->merge_readings( '9,3', '11,5' );
291         ok( 1, "Successfully merged reading 'pontifex'" );
292 } catch ( Text::Tradition::Error $e ) {
293         ok( 0, "Merge of mergeable readings failed: $e->message" );
294         
295 }
296
297 # Test 2: try to equate nodes that are prevented with a real intermediate
298 # equivalence
299 my $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
300 my $c2 = $t2->collation;
301 $c2->add_relationship( '9,2', '9,3', { 'type' => 'lexical' } );
302 my $trel2 = $c2->get_relationship( '9,2', '9,3' );
303 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
304         "Created blocking relationship" );
305 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
306 # This time the link ought to fail
307 try {
308         $c2->add_relationship( '8,6', '10,3', { 'type' => 'orthographic' } );
309         ok( 0, "Added cross-equivalent bad relationship" );
310 } catch {
311         ok( 1, "Existing equivalence blocked crossing relationship" );
312 }
313
314 try {
315         $c2->calculate_ranks();
316         ok( 1, "Successfully calculated ranks" );
317 } catch {
318         ok( 0, "Collation now has a cycle" );
319 }
320
321 # Test 3: make a straightforward pair of transpositions.
322 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
323 # Test 1: try to equate nodes that are prevented with an intermediate collation
324 my $c3 = $t3->collation;
325 try {
326         $c3->add_relationship( '36,4', '38,3', { 'type' => 'transposition' } );
327         ok( 1, "Added straightforward transposition" );
328 } catch {
329         ok( 0, "Failed to add normal transposition" );
330 }
331 try {
332         $c3->add_relationship( '36,3', '38,2', { 'type' => 'transposition' } );
333         ok( 1, "Added straightforward transposition complement" );
334 } catch {
335         ok( 0, "Failed to add normal transposition complement" );
336 }
337
338 # Test 4: try to make a transposition that could be a parallel.
339 try {
340         $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
341         ok( 0, "Added bad colocated transposition" );
342 } catch {
343         ok( 1, "Prevented bad colocated transposition" );
344 }
345
346 # Test 5: make the parallel, and then make the transposition again.
347 try {
348         $c3->add_relationship( '28,3', '29,3', { 'type' => 'orthographic' } );
349         ok( 1, "Equated identical readings for transposition" );
350 } catch {
351         ok( 0, "Failed to equate identical readings" );
352 }
353 try {
354         $c3->add_relationship( '28,2', '29,2', { 'type' => 'transposition' } );
355         ok( 1, "Added straightforward transposition complement" );
356 } catch {
357         ok( 0, "Failed to add normal transposition complement" );
358 }
359
360 =end testing
361
362 =cut
363
364 sub add_relationship {
365         my( $self, $source, $target, $options ) = @_;
366     my $c = $self->collation;
367
368         throw( "Adding self relationship at $source" ) if $source eq $target;
369         my $relationship;
370         my $thispaironly;
371         my $droppedcolls = [];
372         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
373                 $relationship = $options;
374                 $thispaironly = 1;  # If existing rel, set only where asked.
375         } else {
376                 # Check the options
377                 $options->{'scope'} = 'local' unless $options->{'scope'};
378                 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
379                 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
380                 
381                 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target, 
382                         $options->{'type'}, $droppedcolls );
383                 unless( $is_valid ) {
384                         throw( "Invalid relationship: $reason" );
385                 }
386                 
387                 # Try to create the relationship object.
388                 $options->{'reading_a'} = $c->reading( $source )->text;
389                 $options->{'reading_b'} = $c->reading( $target )->text;
390                 $options->{'orig_a'} = $source;
391                 $options->{'orig_b'} = $target;
392         if( $options->{'scope'} ne 'local' ) {
393                         # Is there a relationship with this a & b already?
394                         # Case-insensitive for non-orthographics.
395                         my $rdga = $options->{'type'} eq 'orthographic' 
396                                 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
397                         my $rdgb = $options->{'type'} eq 'orthographic' 
398                                 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
399                         my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
400                         if( $otherrel && $otherrel->type eq $options->{type}
401                                 && $otherrel->scope eq $options->{scope} ) {
402                                 warn "Applying existing scoped relationship";
403                                 $relationship = $otherrel;
404                         }
405         }
406                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
407     }
408
409
410         # Find all the pairs for which we need to set the relationship.
411         my @vectors;
412     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
413         push( @vectors, $self->_find_applicable( $relationship ) );
414     }
415         
416     # Now set the relationship(s).
417     my @pairs_set;
418         my $rel = $self->get_relationship( $source, $target );
419         my $skip;
420         if( $rel && $rel ne $relationship ) {
421                 if( $rel->nonlocal ) {
422                         throw( "Found conflicting relationship at $source - $target" );
423                 } elsif( $rel->type ne 'collated' ) {
424                         # Replace a collation relationship; leave any other sort in place.
425                         my $r1ann = $rel->has_annotation ? $rel->annotation : '';
426                         my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
427                         unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
428                                 warn sprintf( "Not overriding local relationship %s with global %s " 
429                                         . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
430                                         $source, $target, $rel->reading_a, $rel->reading_b );
431                                 $skip = 1;
432                         }
433                 }
434         }
435         $self->_set_relationship( $relationship, $source, $target ) unless $skip;
436         push( @pairs_set, [ $source, $target ] );
437     
438     # Set any additional relationships that might be in @vectors.
439     foreach my $v ( @vectors ) {
440         next if $v->[0] eq $source && $v->[1] eq $target;
441         next if $v->[1] eq $source && $v->[0] eq $target;
442         my @added = $self->add_relationship( @$v, $relationship );
443         push( @pairs_set, @added );
444     }
445     
446     # Finally, restore whatever collations we can, and return.
447     $self->_restore_collations( @$droppedcolls );
448     return @pairs_set;
449 }
450
451 =head2 del_scoped_relationship( $reading_a, $reading_b )
452
453 Returns the general (document-level or global) relationship that has been defined 
454 between the two reading strings. Returns undef if there is no general relationship.
455
456 =cut
457
458 sub del_scoped_relationship {
459         my( $self, $rdga, $rdgb ) = @_;
460         my( $first, $second ) = sort( $rdga, $rdgb );
461         return delete $self->scopedrels->{$first}->{$second};
462 }
463
464 sub _find_applicable {
465         my( $self, $rel ) = @_;
466         my $c = $self->collation;
467         # TODO Someday we might use a case sensitive language.
468         my $lang = $c->tradition->language;
469         my @vectors;
470         my @identical_readings;
471         if( $rel->type eq 'orthographic' ) {
472                 @identical_readings = grep { $_->text eq $rel->reading_a } 
473                         $c->readings;
474         } else {
475                 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
476                         $c->readings;
477         }
478         foreach my $ir ( @identical_readings ) {
479                 my @itarget;
480                 if( $rel->type eq 'orthographic' ) {
481                         @itarget = grep { $_->rank == $ir->rank 
482                                                           && $_->text eq $rel->reading_b } $c->readings;
483                 } else {
484                         @itarget = grep { $_->rank == $ir->rank 
485                                                           && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
486                 }
487                 if( @itarget ) {
488                         # Warn if there is more than one hit with no orth link between them.
489                         my $itmain = shift @itarget;
490                         if( @itarget ) {
491                                 my %all_targets;
492                                 map { $all_targets{$_} = 1 } @itarget;
493                                 map { delete $all_targets{$_} } 
494                                         $self->related_readings( $itmain, 
495                                                 sub { $_[0]->type eq 'orthographic' } );
496                         warn "More than one unrelated reading with text " . $itmain->text
497                                 . " at rank " . $ir->rank . "!" if keys %all_targets;
498                         }
499                         push( @vectors, [ $ir->id, $itmain->id ] );
500                 }
501         }
502         return @vectors;
503 }
504
505 =head2 del_relationship( $source, $target )
506
507 Removes the relationship between the given readings. If the relationship is
508 non-local, removes the relationship everywhere in the graph.
509
510 =cut
511
512 sub del_relationship {
513         my( $self, $source, $target ) = @_;
514         my $rel = $self->get_relationship( $source, $target );
515         return () unless $rel; # Nothing to delete; return an empty set.
516         my $colo = $rel->colocated;
517         my @vectors = ( [ $source, $target ] );
518         $self->_remove_relationship( $colo, $source, $target );
519         if( $rel->nonlocal ) {
520                 # Remove the relationship wherever it occurs.
521                 # Remove the relationship wherever it occurs.
522                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
523                         $self->relationships;
524                 foreach my $re ( @rel_edges ) {
525                         $self->_remove_relationship( $colo, @$re );
526                         push( @vectors, $re );
527                 }
528                 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
529         }
530         return @vectors;
531 }
532
533 sub _remove_relationship {
534         my( $self, $equiv, @vector ) = @_;
535         $self->graph->delete_edge( @vector );
536         $self->break_equivalence( @vector ) if $equiv;
537 }
538         
539 =head2 relationship_valid( $source, $target, $type )
540
541 Checks whether a relationship of type $type may exist between the readings given
542 in $source and $target.  Returns a tuple of ( status, message ) where status is
543 a yes/no boolean and, if the answer is no, message gives the reason why.
544
545 =cut
546
547 sub relationship_valid {
548     my( $self, $source, $target, $rel, $mustdrop ) = @_;
549     $mustdrop = [] unless $mustdrop; # in case we were passed nothing
550     my $c = $self->collation;
551     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
552                 # Check that the two readings do (for a repetition) or do not (for
553                 # a transposition) appear in the same witness.
554                 # TODO this might be called before witness paths are set...
555                 my %seen_wits;
556                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
557                 foreach my $w ( $c->reading_witnesses( $target ) ) {
558                         if( $seen_wits{$w} ) {
559                                 return ( 0, "Readings both occur in witness $w" ) 
560                                         if $rel eq 'transposition';
561                                 return ( 1, "ok" ) if $rel eq 'repetition';
562                         }
563                 }
564                 return ( 0, "Readings occur only in distinct witnesses" )
565                         if $rel eq 'repetition';
566         } 
567         if ( $rel eq 'transposition' ) {
568                 # We also need to check both that the readings occur in distinct
569                 # witnesses, and that they are not in the same place. That is,
570                 # proposing to link them should cause a witness loop.
571                 if( $self->test_equivalence( $source, $target ) ) {
572                         return ( 0, "Readings appear to be colocated, not transposed" );
573                 } else {
574                         return ( 1, "ok" );
575                 }
576                 
577         } elsif( $rel ne 'repetition' ) {
578                 # Check that linking the source and target in a relationship won't lead
579                 # to a path loop for any witness. 
580                 # First, drop/stash any collations that might interfere
581                 my $sourceobj = $c->reading( $source );
582                 my $targetobj = $c->reading( $target );
583                 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
584                 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
585                 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
586                         push( @$mustdrop, $self->_drop_collations( $source ) );
587                         push( @$mustdrop, $self->_drop_collations( $target ) );
588                         if( $c->end->has_rank ) {
589                                 my $cpred = $c->common_predecessor( $source, $target );
590                                 my $csucc = $c->common_successor( $source, $target );
591                                 foreach my $rk ( $cpred->rank+1 .. $csucc->rank-1 ) {
592                                         map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
593                                                 $c->readings_at_rank( $rk );
594                                 }
595                         }
596                 }
597                 unless( $self->test_equivalence( $source, $target ) ) {
598                         $self->_restore_collations( @$mustdrop );
599                         return( 0, "Relationship would create witness loop" );
600                 }
601                 return ( 1, "ok" );
602         }
603 }
604
605 sub _drop_collations {
606         my( $self, $reading ) = @_;
607         my @dropped;
608         foreach my $n ( $self->graph->neighbors( $reading ) ) {
609                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
610                         push( @dropped, [ $reading, $n ] );
611                         $self->del_relationship( $reading, $n );
612                         #print STDERR "Dropped collation $reading -> $n\n";
613                 }
614         }
615         return @dropped;
616 }
617
618 sub _restore_collations {
619         my( $self, @vectors ) = @_;
620         foreach my $v ( @vectors ) {
621                 try {
622                         $self->add_relationship( @$v, { 'type' => 'collated' } );
623                         #print STDERR "Restored collation @$v\n";
624                 } catch {
625                         print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
626                 }
627         }
628 }
629
630 =head2 filter_collations()
631
632 Utility function. Removes any redundant 'collated' relationships from the graph.
633 A collated relationship is redundant if the readings in question would occupy
634 the same rank regardless of the existence of the relationship.
635
636 =cut
637
638 sub filter_collations {
639         my $self = shift;
640         my $c = $self->collation;
641         foreach my $r ( 1 .. $c->end->rank - 1 ) {
642                 my $anchor;
643                 my @need_collations;
644                 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
645                         next if $rdg->is_meta;
646                         my $ip = 0;
647                         foreach my $pred ( $rdg->predecessors ) {
648                                 if( $pred->rank == $r - 1 ) {
649                                         $ip = 1;
650                                         $anchor = $rdg unless( $anchor );
651                                         last;
652                                 }
653                         }
654                         push( @need_collations, $rdg ) unless $ip;
655                         $c->relations->_drop_collations( "$rdg" );
656                 }
657                 $anchor
658                         ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
659                                                 unless $c->get_relationship( $anchor, $_ ) } @need_collations
660                         : warn "No anchor found at $r";
661         }
662 }
663
664 =head2 related_readings( $reading, $filter )
665
666 Returns a list of readings that are connected via relationship links to $reading.
667 If $filter is set to a subroutine ref, returns only those related readings where
668 $filter( $relationship ) returns a true value.
669
670 =cut
671
672 sub related_readings {
673         my( $self, $reading, $filter ) = @_;
674         my $return_object;
675         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
676                 $reading = $reading->id;
677                 $return_object = 1;
678         }
679         my @answer;
680         if( $filter ) {
681                 # Backwards compat
682                 if( $filter eq 'colocated' ) {
683                         $filter = sub { $_[0]->colocated };
684                 }
685                 my %found = ( $reading => 1 );
686                 my $check = [ $reading ];
687                 my $iter = 0;
688                 while( @$check ) {
689                         my $more = [];
690                         foreach my $r ( @$check ) {
691                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
692                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
693                                                 push( @$more, $nr ) unless exists $found{$nr};
694                                                 $found{$nr} = 1;
695                                         }
696                                 }
697                         }
698                         $check = $more;
699                 }
700                 delete $found{$reading};
701                 @answer = keys %found;
702         } else {
703                 @answer = $self->graph->all_reachable( $reading );
704         }
705         if( $return_object ) {
706                 my $c = $self->collation;
707                 return map { $c->reading( $_ ) } @answer;
708         } else {
709                 return @answer;
710         }
711 }
712
713 =head2 merge_readings( $kept, $deleted );
714
715 Makes a best-effort merge of the relationship links between the given readings, and
716 stops tracking the to-be-deleted reading.
717
718 =cut
719
720 sub merge_readings {
721         my( $self, $kept, $deleted, $combined ) = @_;
722         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
723                 # Get the pair of kept / rel
724                 my @vector = ( $kept );
725                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
726                 next if $vector[0] eq $vector[1]; # Don't add a self loop
727                 
728                 # If kept changes its text, drop the relationship.
729                 next if $combined;
730                         
731                 # If kept / rel already has a relationship, just keep the old
732                 my $rel = $self->get_relationship( @vector );
733                 next if $rel;
734                 
735                 # Otherwise, adopt the relationship that would be deleted.
736                 $rel = $self->get_relationship( @$edge );
737                 $self->_set_relationship( $rel, @vector );
738         }
739         $self->make_equivalence( $deleted, $kept );
740 }
741
742 ### Equivalence logic
743
744 sub _remove_equivalence_node {
745         my( $self, $node ) = @_;
746         my $group = $self->equivalence( $node );
747         my $nodelist = $self->eqreadings( $group );
748         if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
749                 #print STDERR "Removing equivalence $group for $node\n";
750                 $self->remove_eqreadings( $group );
751         } elsif( @$nodelist == 1 ) {
752                 warn "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
753                         " in group that should have only $node";
754         } else {
755                 #print STDERR "Removing $node from equivalence $group\n";
756                 my @newlist = grep { $_ ne $node } @$nodelist;
757                 $self->set_eqreadings( $group, \@newlist );
758                 $self->remove_equivalence( $node );
759         }
760 }
761
762 =head2 add_equivalence_edge
763
764 Return the relationship object, if any, that exists between two readings.
765
766 =cut
767
768 sub add_equivalence_edge {
769         my( $self, $source, $target ) = @_;
770         my $seq = $self->equivalence( $source );
771         my $teq = $self->equivalence( $target );
772         #print STDERR "Adding equivalence edge $seq -> $teq for $source -> $target\n";
773         $self->equivalence_graph->add_edge( $seq, $teq );
774 }
775
776 =head2 add_equivalence_edge
777
778 Return the relationship object, if any, that exists between two readings.
779
780 =cut
781
782 sub delete_equivalence_edge {
783         my( $self, $source, $target ) = @_;
784         my $seq = $self->equivalence( $source );
785         my $teq = $self->equivalence( $target );
786         #print STDERR "Deleting equivalence edge $seq -> $teq for $source -> $target\n";
787         $self->equivalence_graph->delete_edge( $seq, $teq );
788 }
789
790 sub _is_disconnected {
791         my $self = shift;
792         return( scalar $self->equivalence_graph->predecessorless_vertices > 1
793                 || scalar $self->equivalence_graph->successorless_vertices > 1 );
794 }
795
796 =head2 make_equivalence
797
798 Equate two readings in the equivalence graph.  Should only be called internally.
799
800 =cut
801
802 sub make_equivalence {
803         my( $self, $source, $target ) = @_;
804         # Get the source equivalent readings
805         my $seq = $self->equivalence( $source );
806         my $teq = $self->equivalence( $target );
807         # Nothing to do if they are already equivalent...
808         return if $seq eq $teq;
809         #print STDERR "Making equivalence for $source -> $target\n";
810         my $sourcepool = $self->eqreadings( $seq );
811         # and add them to the target readings.
812         # print STDERR "Moving readings '@$sourcepool' from group $seq to $teq\n";
813         push( @{$self->eqreadings( $teq )}, @$sourcepool );
814         map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
815         # Then merge the nodes in the equivalence graph.
816         foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
817                 $self->equivalence_graph->add_edge( $pred, $teq );
818         }
819         foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
820                 $self->equivalence_graph->add_edge( $teq, $succ );
821         }
822         $self->equivalence_graph->delete_vertex( $seq );
823 #       throw( "Graph got disconnected making $source / $target equivalence" )
824 #               if $self->_is_disconnected;
825 }
826
827 =head2 test_equivalence
828
829 Test whether, if two readings were equated with a relationship, the graph would
830 still be valid.
831
832 =cut
833
834 sub test_equivalence {
835         my( $self, $source, $target ) = @_;
836         # Try merging the nodes in the equivalence graph; return a true value if
837         # no cycle is introduced thereby. Restore the original graph first.
838         
839         # Keep track of edges we add
840         my %added_pred;
841         my %added_succ;
842         # Get the reading equivalents
843         my $seq = $self->equivalence( $source );
844         my $teq = $self->equivalence( $target );
845         # Maybe this is easy?
846         return 1 if $seq eq $teq;
847         
848         # Save the first graph
849         my $checkstr = $self->equivalence_graph->stringify();
850         # Add and save relevant edges
851         foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
852                 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
853                         $added_pred{$pred} = 0;
854                 } else {
855                         $self->equivalence_graph->add_edge( $pred, $teq );
856                         $added_pred{$pred} = 1;
857                 }
858         }
859         foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
860                 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
861                         $added_succ{$succ} = 0;
862                 } else {
863                         $self->equivalence_graph->add_edge( $teq, $succ );
864                         $added_succ{$succ} = 1;
865                 }
866         }
867         # Delete source equivalent and test
868         $self->equivalence_graph->delete_vertex( $seq );
869         my $ret = !$self->equivalence_graph->has_a_cycle;
870         
871         # Restore what we changed
872         $self->equivalence_graph->add_vertex( $seq );
873         foreach my $pred ( keys %added_pred ) {
874                 $self->equivalence_graph->add_edge( $pred, $seq );
875                 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
876         }
877         foreach my $succ ( keys %added_succ ) {
878                 $self->equivalence_graph->add_edge( $seq, $succ );
879                 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
880         }
881         unless( $self->equivalence_graph->eq( $checkstr ) ) {
882                 warn "GRAPH CHANGED after testing";
883         }
884         # Return our answer
885         return $ret;
886 }
887
888 =head2 break_equivalence
889
890 Unmake an equivalence link between two readings. Should only be called internally.
891
892 =cut
893
894 sub break_equivalence {
895         my( $self, $source, $target ) = @_;
896         
897         # This is the hard one. Need to reconstruct the equivalence groups without
898         # the given link.
899         my( %sng, %tng );
900         map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
901         map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
902         # If these groups intersect, they are still connected; do nothing.
903         foreach my $el ( keys %tng ) {
904                 if( exists $sng{$el} ) {
905                         #print STDERR "Equivalence break $source / $target is a noop\n";
906                         return;
907                 }
908         }
909         #print STDERR "Breaking equivalence $source / $target\n";
910         # If they don't intersect, then we split the nodes in the graph and in
911         # the hashes. First figure out which group has which name
912         my $oldgroup = $self->equivalence( $source ); # eq for $target
913         my $swapped = $oldgroup eq $source;
914         my $newgroup = $swapped ? $target : $source;
915         my( $oldmembers, $newmembers );
916         if( $swapped ) {
917                 $oldmembers = [ keys %sng ];
918                 $newmembers = [ keys %tng ];
919         } else {
920                 $oldmembers = [ keys %tng ];
921                 $newmembers = [ keys %sng ];
922         }
923                 
924         # First alter the old group in the hash
925         $self->set_eqreadings( $oldgroup, $oldmembers );
926         
927         # then add the new group back to the hash with its new key
928         $self->set_eqreadings( $newgroup, $newmembers );
929         foreach my $el ( @$newmembers ) {
930                 $self->set_equivalence( $el, $newgroup );
931         }
932         
933         # Now add the new group back to the equivalence graph
934         $self->equivalence_graph->add_vertex( $newgroup );
935         # ...add the appropriate edges to the source group vertext
936         my $c = $self->collation;
937         foreach my $rdg ( @$newmembers ) {
938                 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
939                         $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
940                 }
941                 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
942                         $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
943                 }
944         }
945         
946         # ...and figure out which edges on the old group vertex to delete.
947         my( %old_pred, %old_succ );
948         foreach my $rdg ( @$oldmembers ) {
949                 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
950                         $old_pred{$self->equivalence( $rp )} = 1;
951                 }
952                 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
953                         $old_succ{$self->equivalence( $rs )} = 1;
954                 }
955         }
956         foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
957                 unless( $old_pred{$p} ) {
958                         $self->equivalence_graph->delete_edge( $p, $oldgroup );
959                 }
960         }
961         foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
962                 unless( $old_succ{$s} ) {
963                         $self->equivalence_graph->delete_edge( $oldgroup, $s );
964                 }
965         }
966 #       throw( "Graph got disconnected breaking $source / $target equivalence" )
967 #               if $self->_is_disconnected;
968 }
969
970 sub _find_equiv_without {
971         my( $self, $first, $second ) = @_;
972         my %found = ( $first => 1 );
973         my $check = [ $first ];
974         my $iter = 0;
975         while( @$check ) {
976                 my $more = [];
977                 foreach my $r ( @$check ) {
978                         foreach my $nr ( $self->graph->neighbors( $r ) ) {
979                                 next if $r eq $second;
980                                 if( $self->get_relationship( $r, $nr )->colocated ) {
981                                         push( @$more, $nr ) unless exists $found{$nr};
982                                         $found{$nr} = 1;
983                                 }
984                         }
985                 }
986                 $check = $more;
987         }
988         return keys %found;
989 }
990
991 ### Output logic
992
993 sub _as_graphml { 
994         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
995         
996     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
997         $rgraph->setAttribute( 'edgedefault', 'directed' );
998     $rgraph->setAttribute( 'id', 'relationships', );
999     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1000     $rgraph->setAttribute( 'parse.edges', 0 );
1001     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1002     $rgraph->setAttribute( 'parse.nodes', 0 );
1003     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1004     
1005     # Add the vertices according to their XML IDs
1006     my %rdg_lookup = ( reverse %$node_hash );
1007     # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1008     my @nlist = sort keys( %rdg_lookup );
1009     foreach my $n ( @nlist ) {
1010         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1011         $n_el->setAttribute( 'id', $n );
1012         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1013     }
1014         $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1015     
1016     # Add the relationship edges, with their object information
1017     my $edge_ctr = 0;
1018     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1019         # Add an edge and fill in its relationship info.
1020         next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1021                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1022                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1023                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1024                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1025
1026                 my $rel_obj = $self->get_relationship( @$e );
1027                 foreach my $key ( keys %$edge_keys ) {
1028                         my $value = $rel_obj->$key;
1029                         _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
1030                                 if defined $value;
1031                 }
1032         }
1033         $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1034 }
1035
1036 sub _by_xmlid {
1037         my $tmp_a = $a;
1038         my $tmp_b = $b;
1039         $tmp_a =~ s/\D//g;
1040         $tmp_b =~ s/\D//g;
1041         return $tmp_a <=> $tmp_b;
1042 }
1043
1044 sub _add_graphml_data {
1045     my( $el, $key, $value ) = @_;
1046     return unless defined $value;
1047     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1048     $data_el->setAttribute( 'key', $key );
1049     $data_el->appendText( $value );
1050 }
1051
1052 sub throw {
1053         Text::Tradition::Error->throw( 
1054                 'ident' => 'Relationship error',
1055                 'message' => $_[0],
1056                 );
1057 }
1058
1059 no Moose;
1060 __PACKAGE__->meta->make_immutable;
1061
1062 1;