enable graphml export of partial traditions
[scpubgit/stemmatology.git] / lib / Text / Tradition / Collation / RelationshipStore.pm
1 package Text::Tradition::Collation::RelationshipStore;
2
3 use strict;
4 use warnings;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
7 use TryCatch;
8
9 use Moose;
10
11 =head1 NAME
12
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
15     
16 =head1 DESCRIPTION
17
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones.  The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
22
23 =begin testing
24
25 use Text::Tradition;
26 use TryCatch;
27
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
29
30 # Add some relationships, and delete them
31
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new( 
34     'name'  => 'inline', 
35     'input' => 'CollateX',
36     'file'  => $cxfile,
37     );
38 my $c = $t->collation;
39
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23', 
45         { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
53
54 =end testing
55
56 =head1 METHODS
57
58 =head2 new( collation => $collation );
59
60 Creates a new relationship store for the given collation.
61
62 =cut
63
64 has 'collation' => (
65         is => 'ro',
66         isa => 'Text::Tradition::Collation',
67         required => 1,
68         weak_ref => 1,
69         );
70
71 has 'scopedrels' => (
72         is => 'ro',
73         isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74         default => sub { {} },
75         );
76
77 has 'graph' => (
78         is => 'ro',
79         isa => 'Graph',
80         default => sub { Graph->new( undirected => 1 ) },
81     handles => {
82         relationships => 'edges',
83         add_reading => 'add_vertex',
84         delete_reading => 'delete_vertex',
85     },
86         );
87         
88 =head2 get_relationship
89
90 Return the relationship object, if any, that exists between two readings.
91
92 =cut
93
94 sub get_relationship {
95         my $self = shift;
96         my @vector;
97         if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98                 # Dereference the edge arrayref that was passed.
99                 my $edge = shift;
100                 @vector = @$edge;
101         } else {
102                 @vector = @_;
103         }
104         my $relationship;
105         if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106                 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
107         } 
108         return $relationship;
109 }
110
111 sub _set_relationship {
112         my( $self, $relationship, @vector ) = @_;
113         $self->graph->add_edge( @vector );
114         $self->graph->set_edge_attribute( @vector, 'object', $relationship );
115 }
116
117 =head2 create
118
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
121
122 =cut
123
124 sub create {
125         my( $self, $options ) = @_;
126         # Check to see if a relationship exists between the two given readings
127         my $source = delete $options->{'orig_a'};
128         my $target = delete $options->{'orig_b'};
129         my $rel = $self->get_relationship( $source, $target );
130         if( $rel ) {
131                 if( $rel->type eq 'collated' ) {
132                         # Always replace a 'collated' relationship with a more descriptive
133                         # one, if asked.
134                         $self->del_relationship( $source, $target );
135                 } elsif( $rel->type ne $options->{'type'} ) {
136                         throw( "Another relationship of type " . $rel->type 
137                                 . " already exists between $source and $target" );
138                 } else {
139                         return $rel;
140                 }
141         }
142         
143         # Check to see if a nonlocal relationship is defined for the two readings
144         $rel = $self->scoped_relationship( $options->{'reading_a'}, 
145                 $options->{'reading_b'} );
146         if( $rel && $rel->type eq $options->{'type'} ) {
147                 return $rel;
148         } elsif( $rel ) {
149                 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
150         } else {
151                 $rel = Text::Tradition::Collation::Relationship->new( $options );
152                 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
153                 return $rel;
154         }
155 }
156
157 =head2 add_scoped_relationship( $rel )
158
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally.  Key on whichever reading occurs first alphabetically.
161
162 =cut
163
164 sub add_scoped_relationship {
165         my( $self, $rel ) = @_;
166         my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167         my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );      
168         my $r = $self->scoped_relationship( $rdga, $rdgb );
169         if( $r ) {
170                 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171                         $r->type, $rdga, $rdgb );
172                 return;
173         }
174         my( $first, $second ) = sort ( $rdga, $rdgb );
175         $self->scopedrels->{$first}->{$second} = $rel;
176 }
177
178 =head2 scoped_relationship( $reading_a, $reading_b )
179
180 Returns the general (document-level or global) relationship that has been defined 
181 between the two reading strings. Returns undef if there is no general relationship.
182
183 =cut
184
185 sub scoped_relationship {
186         my( $self, $rdga, $rdgb ) = @_;
187         my( $first, $second ) = sort( $rdga, $rdgb );
188         if( exists $self->scopedrels->{$first}->{$second} ) {
189                 return $self->scopedrels->{$first}->{$second};
190         } else {
191                 return undef;
192         }
193 }
194
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
196
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship 
198 for the possible options) between the readings given in $source and $target.  Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
200 scoped non-locally.
201
202 Returns a status boolean and a list of all reading pairs connected by the call to
203 add_relationship.
204
205 =cut
206
207 sub add_relationship {
208         my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
209
210         my $relationship;
211         my $thispaironly;
212         if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213                 $relationship = $options;
214                 $thispaironly = 1;  # If existing rel, set only where asked.
215         } else {
216                 # Check the options
217                 $options->{'scope'} = 'local' unless $options->{'scope'};
218                 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
219                 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
220                 
221                 my( $is_valid, $reason ) = 
222                         $self->relationship_valid( $source, $target, $options->{'type'} );
223                 unless( $is_valid ) {
224                         throw( "Invalid relationship: $reason" );
225                 }
226                 
227                 # Try to create the relationship object.
228                 $options->{'reading_a'} = $source_rdg->text;
229                 $options->{'reading_b'} = $target_rdg->text;
230                 $options->{'orig_a'} = $source;
231                 $options->{'orig_b'} = $target;
232         if( $options->{'scope'} ne 'local' ) {
233                         # Is there a relationship with this a & b already?
234                         # Case-insensitive for non-orthographics.
235                         my $rdga = $options->{'type'} eq 'orthographic' 
236                                 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
237                         my $rdgb = $options->{'type'} eq 'orthographic' 
238                                 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
239                         my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
240                         if( $otherrel && $otherrel->type eq $options->{type}
241                                 && $otherrel->scope eq $options->{scope} ) {
242                                 warn "Applying existing scoped relationship";
243                                 $relationship = $otherrel;
244                         }
245         }
246                 $relationship = $self->create( $options ) unless $relationship;  # Will throw on error
247     }
248
249
250         # Find all the pairs for which we need to set the relationship.
251         my @vectors = [ $source, $target ];
252     if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
253         push( @vectors, $self->_find_applicable( $relationship ) );
254     }
255         
256     # Now set the relationship(s).
257     my @pairs_set;
258     foreach my $v ( @vectors ) {
259                 my $rel = $self->get_relationship( @$v );
260         if( $rel && $rel ne $relationship ) {
261                 if( $rel->nonlocal ) {
262                         throw( "Found conflicting relationship at @$v" );
263                 } elsif( $rel->type ne 'collated' ) {
264                         # Replace a collation relationship; leave any other sort in place.
265                         warn "Not overriding local relationship set at @$v";
266                                 next;
267                 }
268         }
269         map { $self->_drop_collations( $_ ) } @$v;
270         $self->_set_relationship( $relationship, @$v );
271         push( @pairs_set, $v );
272     }
273     
274     return @pairs_set;
275 }
276
277 =head2 del_scoped_relationship( $reading_a, $reading_b )
278
279 Returns the general (document-level or global) relationship that has been defined 
280 between the two reading strings. Returns undef if there is no general relationship.
281
282 =cut
283
284 sub del_scoped_relationship {
285         my( $self, $rdga, $rdgb ) = @_;
286         my( $first, $second ) = sort( $rdga, $rdgb );
287         return delete $self->scopedrels->{$first}->{$second};
288 }
289
290 sub _find_applicable {
291         my( $self, $rel ) = @_;
292         my $c = $self->collation;
293         # TODO Someday we might use a case sensitive language.
294         my $lang = $c->tradition->language;
295         my @vectors;
296         my @identical_readings;
297         if( $rel->type eq 'orthographic' ) {
298                 @identical_readings = grep { $_->text eq $rel->reading_a } 
299                         $c->readings;
300         } else {
301                 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
302                         $c->readings;
303         }
304         foreach my $ir ( @identical_readings ) {
305                 my @itarget;
306                 if( $rel->type eq 'orthographic' ) {
307                         @itarget = grep { $_->rank == $ir->rank 
308                                                           && $_->text eq $rel->reading_b } $c->readings;
309                 } else {
310                         @itarget = grep { $_->rank == $ir->rank 
311                                                           && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
312                 }
313                 if( @itarget ) {
314                         # Warn if there is more than one hit with no orth link between them.
315                         my $itmain = shift @itarget;
316                         if( @itarget ) {
317                                 my %all_targets;
318                                 map { $all_targets{$_} = 1 } @itarget;
319                                 map { delete $all_targets{$_} } 
320                                         $self->related_readings( $itmain, 
321                                                 sub { $_[0]->type eq 'orthographic' } );
322                         warn "More than one unrelated reading with text " . $itmain->text
323                                 . " at rank " . $ir->rank . "!" if keys %all_targets;
324                         }
325                         push( @vectors, [ $ir->id, $itmain->id ] );
326                 }
327         }
328         return @vectors;
329 }
330
331 =head2 del_relationship( $source, $target )
332
333 Removes the relationship between the given readings. If the relationship is
334 non-local, removes the relationship everywhere in the graph.
335
336 =cut
337
338 sub del_relationship {
339         my( $self, $source, $target ) = @_;
340         my $rel = $self->get_relationship( $source, $target );
341         return () unless $rel; # Nothing to delete; return an empty set.
342         my @vectors = ( [ $source, $target ] );
343         $self->_remove_relationship( $source, $target );
344         if( $rel->nonlocal ) {
345                 # Remove the relationship wherever it occurs.
346                 # Remove the relationship wherever it occurs.
347                 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
348                         $self->relationships;
349                 foreach my $re ( @rel_edges ) {
350                         $self->_remove_relationship( @$re );
351                         push( @vectors, $re );
352                 }
353                 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
354         }
355         return @vectors;
356 }
357
358 sub _remove_relationship {
359         my( $self, @vector ) = @_;
360         $self->graph->delete_edge( @vector );
361 }
362         
363 =head2 relationship_valid( $source, $target, $type )
364
365 Checks whether a relationship of type $type may exist between the readings given
366 in $source and $target.  Returns a tuple of ( status, message ) where status is
367 a yes/no boolean and, if the answer is no, message gives the reason why.
368
369 =cut
370
371 sub relationship_valid {
372     my( $self, $source, $target, $rel ) = @_;
373     my $c = $self->collation;
374     if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
375                 # Check that the two readings do (for a repetition) or do not (for
376                 # a transposition) appear in the same witness.
377                 # TODO this might be called before witness paths are set...
378                 my %seen_wits;
379                 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
380                 foreach my $w ( $c->reading_witnesses( $target ) ) {
381                         if( $seen_wits{$w} ) {
382                                 return ( 0, "Readings both occur in witness $w" ) 
383                                         if $rel eq 'transposition';
384                                 return ( 1, "ok" ) if $rel eq 'repetition';
385                         }
386                 }
387                 return $rel eq 'transposition' ? ( 1, "ok" )
388                         : ( 0, "Readings occur only in distinct witnesses" );
389         } else {
390                 # Check that linking the source and target in a relationship won't lead
391                 # to a path loop for any witness.  If they have the same rank then fine.
392                 return( 1, "ok" ) 
393                         if $c->reading( $source )->has_rank
394                                 && $c->reading( $target )->has_rank
395                                 && $c->reading( $source )->rank == $c->reading( $target )->rank;
396                 
397                 # Otherwise, first make a lookup table of all the
398                 # readings related to either the source or the target.
399                 my @proposed_related = ( $source, $target );
400                 # Drop the collation links of source and target, unless we want to
401                 # add a collation relationship.
402                 foreach my $r ( ( $source, $target ) ) {
403                         $self->_drop_collations( $r ) unless $rel eq 'collated';
404                         push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
405                 }
406                 my %pr_ids;
407                 map { $pr_ids{ $_ } = 1 } @proposed_related;
408         
409                 # The cumulative predecessors and successors of the proposed-related readings
410                 # should not overlap.
411                 my %all_pred;
412                 my %all_succ;
413                 foreach my $pr ( keys %pr_ids ) {
414                         map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
415                         map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
416                 }
417                 foreach my $k ( keys %all_pred ) {
418                         return( 0, "Relationship would create witness loop" )
419                                 if exists $all_succ{$k};
420                 }
421                 foreach my $k ( keys %pr_ids ) {
422                         return( 0, "Relationship would create witness loop" )
423                                 if exists $all_pred{$k} || exists $all_succ{$k};
424                 }
425                 return ( 1, "ok" );
426         }
427 }
428
429 sub _drop_collations {
430         my( $self, $reading ) = @_;
431         foreach my $n ( $self->graph->neighbors( $reading ) ) {
432                 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
433                         $self->del_relationship( $reading, $n );
434                 }
435         }
436 }
437
438 =head2 related_readings( $reading, $filter )
439
440 Returns a list of readings that are connected via relationship links to $reading.
441 If $filter is set to a subroutine ref, returns only those related readings where
442 $filter( $relationship ) returns a true value.
443
444 =cut
445
446 sub related_readings {
447         my( $self, $reading, $filter ) = @_;
448         my $return_object;
449         if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
450                 $reading = $reading->id;
451                 $return_object = 1;
452         }
453         my @answer;
454         if( $filter ) {
455                 # Backwards compat
456                 if( $filter eq 'colocated' ) {
457                         $filter = sub { $_[0]->colocated };
458                 }
459                 my %found = ( $reading => 1 );
460                 my $check = [ $reading ];
461                 my $iter = 0;
462                 while( @$check ) {
463                         my $more = [];
464                         foreach my $r ( @$check ) {
465                                 foreach my $nr ( $self->graph->neighbors( $r ) ) {
466                                         if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
467                                                 push( @$more, $nr ) unless exists $found{$nr};
468                                                 $found{$nr} = 1;
469                                         }
470                                 }
471                         }
472                         $check = $more;
473                 }
474                 delete $found{$reading};
475                 @answer = keys %found;
476         } else {
477                 @answer = $self->graph->all_reachable( $reading );
478         }
479         if( $return_object ) {
480                 my $c = $self->collation;
481                 return map { $c->reading( $_ ) } @answer;
482         } else {
483                 return @answer;
484         }
485 }
486
487 =head2 merge_readings( $kept, $deleted );
488
489 Makes a best-effort merge of the relationship links between the given readings, and
490 stops tracking the to-be-deleted reading.
491
492 =cut
493
494 sub merge_readings {
495         my( $self, $kept, $deleted, $combined ) = @_;
496         foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
497                 # Get the pair of kept / rel
498                 my @vector = ( $kept );
499                 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
500                 next if $vector[0] eq $vector[1]; # Don't add a self loop
501                 
502                 # If kept changes its text, drop the relationship.
503                 next if $combined;
504                         
505                 # If kept / rel already has a relationship, just keep the old
506                 my $rel = $self->get_relationship( @vector );
507                 next if $rel;
508                 
509                 # Otherwise, adopt the relationship that would be deleted.
510                 $rel = $self->get_relationship( @$edge );
511                 $self->_set_relationship( $rel, @vector );
512         }
513         $self->delete_reading( $deleted );
514 }
515
516 sub _as_graphml { 
517         my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
518         
519     my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
520         $rgraph->setAttribute( 'edgedefault', 'directed' );
521     $rgraph->setAttribute( 'id', 'relationships', );
522     $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
523     $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
524     $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
525     $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
526     $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
527     
528     # Add the vertices according to their XML IDs
529     my %rdg_lookup = ( reverse %$node_hash );
530     foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
531         my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
532         $n_el->setAttribute( 'id', $n );
533         _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
534     }
535     
536     # Add the relationship edges, with their object information
537     my $edge_ctr = 0;
538     foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
539         # Add an edge and fill in its relationship info.
540         next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
541                 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
542                 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
543                 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
544                 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
545
546                 my $rel_obj = $self->get_relationship( @$e );
547                 foreach my $key ( keys %$edge_keys ) {
548                         my $value = $rel_obj->$key;
549                         _add_graphml_data( $edge_el, $edge_keys->{$key}, $value ) 
550                                 if defined $value;
551                 }
552         }
553 }
554
555 sub _by_xmlid {
556         my $tmp_a = $a;
557         my $tmp_b = $b;
558         $tmp_a =~ s/\D//g;
559         $tmp_b =~ s/\D//g;
560         return $tmp_a <=> $tmp_b;
561 }
562
563 sub _add_graphml_data {
564     my( $el, $key, $value ) = @_;
565     return unless defined $value;
566     my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
567     $data_el->setAttribute( 'key', $key );
568     $data_el->appendText( $value );
569 }
570
571 sub throw {
572         Text::Tradition::Error->throw( 
573                 'ident' => 'Relationship error',
574                 'message' => $_[0],
575                 );
576 }
577
578 no Moose;
579 __PACKAGE__->meta->make_immutable;
580
581 1;