1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 get_relationship
90 Return the relationship object, if any, that exists between two readings.
94 sub get_relationship {
97 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
98 # Dereference the edge arrayref that was passed.
105 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
106 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
108 return $relationship;
111 sub _set_relationship {
112 my( $self, $relationship, @vector ) = @_;
113 $self->graph->add_edge( @vector );
114 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
119 Create a new relationship with the given options and return it.
120 Warn and return undef if the relationship cannot be created.
125 my( $self, $options ) = @_;
126 # Check to see if a relationship exists between the two given readings
127 my $source = delete $options->{'orig_a'};
128 my $target = delete $options->{'orig_b'};
129 my $rel = $self->get_relationship( $source, $target );
131 if( $rel->type eq 'collated' ) {
132 # Always replace a 'collated' relationship with a more descriptive
134 $self->del_relationship( $source, $target );
135 } elsif( $rel->type ne $options->{'type'} ) {
136 throw( "Another relationship of type " . $rel->type
137 . " already exists between $source and $target" );
143 # Check to see if a nonlocal relationship is defined for the two readings
144 $rel = $self->scoped_relationship( $options->{'reading_a'},
145 $options->{'reading_b'} );
146 if( $rel && $rel->type eq $options->{'type'} ) {
149 throw( sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} ) );
151 $rel = Text::Tradition::Collation::Relationship->new( $options );
152 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
157 =head2 add_scoped_relationship( $rel )
159 Keep track of relationships defined between specific readings that are scoped
160 non-locally. Key on whichever reading occurs first alphabetically.
164 sub add_scoped_relationship {
165 my( $self, $rel ) = @_;
166 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
167 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
168 my $r = $self->scoped_relationship( $rdga, $rdgb );
170 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
171 $r->type, $rdga, $rdgb );
174 my( $first, $second ) = sort ( $rdga, $rdgb );
175 $self->scopedrels->{$first}->{$second} = $rel;
178 =head2 scoped_relationship( $reading_a, $reading_b )
180 Returns the general (document-level or global) relationship that has been defined
181 between the two reading strings. Returns undef if there is no general relationship.
185 sub scoped_relationship {
186 my( $self, $rdga, $rdgb ) = @_;
187 my( $first, $second ) = sort( $rdga, $rdgb );
188 if( exists $self->scopedrels->{$first}->{$second} ) {
189 return $self->scopedrels->{$first}->{$second};
195 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
197 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
198 for the possible options) between the readings given in $source and $target. Sets
199 up a scoped relationship between $sourcetext and $targettext if the relationship is
202 Returns a status boolean and a list of all reading pairs connected by the call to
207 sub add_relationship {
208 my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
212 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
213 $relationship = $options;
214 $thispaironly = 1; # If existing rel, set only where asked.
217 $options->{'scope'} = 'local' unless $options->{'scope'};
218 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
219 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
221 my( $is_valid, $reason ) =
222 $self->relationship_valid( $source, $target, $options->{'type'} );
223 unless( $is_valid ) {
224 throw( "Invalid relationship: $reason" );
227 # Try to create the relationship object.
228 $options->{'reading_a'} = $source_rdg->text;
229 $options->{'reading_b'} = $target_rdg->text;
230 $options->{'orig_a'} = $source;
231 $options->{'orig_b'} = $target;
232 if( $options->{'scope'} ne 'local' ) {
233 # Is there a relationship with this a & b already?
234 # Case-insensitive for non-orthographics.
235 my $rdga = $options->{'type'} eq 'orthographic'
236 ? $options->{'reading_a'} : lc( $options->{'reading_a'} );
237 my $rdgb = $options->{'type'} eq 'orthographic'
238 ? $options->{'reading_b'} : lc( $options->{'reading_b'} );
239 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
240 if( $otherrel && $otherrel->type eq $options->{type}
241 && $otherrel->scope eq $options->{scope} ) {
242 warn "Applying existing scoped relationship";
243 $relationship = $otherrel;
246 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
250 # Find all the pairs for which we need to set the relationship.
251 my @vectors = [ $source, $target ];
252 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
253 push( @vectors, $self->_find_applicable( $relationship ) );
256 # Now set the relationship(s).
258 foreach my $v ( @vectors ) {
259 my $rel = $self->get_relationship( @$v );
260 if( $rel && $rel ne $relationship ) {
261 if( $rel->nonlocal ) {
262 throw( "Found conflicting relationship at @$v" );
263 } elsif( $rel->type ne 'collated' ) {
264 # Replace a collation relationship; leave any other sort in place.
265 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
266 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
267 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
268 warn sprintf( "Not overriding local relationship %s with global %s "
269 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
270 @$v, $rel->reading_a, $rel->reading_b );
275 map { $self->_drop_collations( $_ ) } @$v;
276 $self->_set_relationship( $relationship, @$v );
277 push( @pairs_set, $v );
283 =head2 del_scoped_relationship( $reading_a, $reading_b )
285 Returns the general (document-level or global) relationship that has been defined
286 between the two reading strings. Returns undef if there is no general relationship.
290 sub del_scoped_relationship {
291 my( $self, $rdga, $rdgb ) = @_;
292 my( $first, $second ) = sort( $rdga, $rdgb );
293 return delete $self->scopedrels->{$first}->{$second};
296 sub _find_applicable {
297 my( $self, $rel ) = @_;
298 my $c = $self->collation;
299 # TODO Someday we might use a case sensitive language.
300 my $lang = $c->tradition->language;
302 my @identical_readings;
303 if( $rel->type eq 'orthographic' ) {
304 @identical_readings = grep { $_->text eq $rel->reading_a }
307 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
310 foreach my $ir ( @identical_readings ) {
312 if( $rel->type eq 'orthographic' ) {
313 @itarget = grep { $_->rank == $ir->rank
314 && $_->text eq $rel->reading_b } $c->readings;
316 @itarget = grep { $_->rank == $ir->rank
317 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
320 # Warn if there is more than one hit with no orth link between them.
321 my $itmain = shift @itarget;
324 map { $all_targets{$_} = 1 } @itarget;
325 map { delete $all_targets{$_} }
326 $self->related_readings( $itmain,
327 sub { $_[0]->type eq 'orthographic' } );
328 warn "More than one unrelated reading with text " . $itmain->text
329 . " at rank " . $ir->rank . "!" if keys %all_targets;
331 push( @vectors, [ $ir->id, $itmain->id ] );
337 =head2 del_relationship( $source, $target )
339 Removes the relationship between the given readings. If the relationship is
340 non-local, removes the relationship everywhere in the graph.
344 sub del_relationship {
345 my( $self, $source, $target ) = @_;
346 my $rel = $self->get_relationship( $source, $target );
347 return () unless $rel; # Nothing to delete; return an empty set.
348 my @vectors = ( [ $source, $target ] );
349 $self->_remove_relationship( $source, $target );
350 if( $rel->nonlocal ) {
351 # Remove the relationship wherever it occurs.
352 # Remove the relationship wherever it occurs.
353 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
354 $self->relationships;
355 foreach my $re ( @rel_edges ) {
356 $self->_remove_relationship( @$re );
357 push( @vectors, $re );
359 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
364 sub _remove_relationship {
365 my( $self, @vector ) = @_;
366 $self->graph->delete_edge( @vector );
369 =head2 relationship_valid( $source, $target, $type )
371 Checks whether a relationship of type $type may exist between the readings given
372 in $source and $target. Returns a tuple of ( status, message ) where status is
373 a yes/no boolean and, if the answer is no, message gives the reason why.
377 sub relationship_valid {
378 my( $self, $source, $target, $rel ) = @_;
379 my $c = $self->collation;
380 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
381 # Check that the two readings do (for a repetition) or do not (for
382 # a transposition) appear in the same witness.
383 # If we haven't made reading paths yet, take it on faith.
384 return( 1, "no paths yet" ) unless $c->sequence->successors( $c->start );
386 # We have some paths, so carry on.
388 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
389 foreach my $w ( $c->reading_witnesses( $target ) ) {
390 if( $seen_wits{$w} ) {
391 return ( 0, "Readings both occur in witness $w" )
392 if $rel eq 'transposition';
393 return ( 1, "ok" ) if $rel eq 'repetition';
396 # For transpositions, there should also be a path from one reading
398 if( $rel eq 'transposition' ) {
399 my( %sourceseq, %targetseq );
400 map { $sourceseq{$_} = 1 } $c->sequence->all_successors( $source );
401 map { $targetseq{$_} = 1 } $c->sequence->all_successors( $target );
402 return( 0, "Readings are parallel" )
403 unless $sourceseq{$target} || $targetseq{$source};
405 return $rel eq 'transposition' ? ( 1, "ok" )
406 : ( 0, "Readings occur only in distinct witnesses" );
408 if( $rel ne 'repetition' ) {
409 # Check that linking the source and target in a relationship won't lead
410 # to a path loop for any witness. If they have the same rank then
411 # they are parallel by definition.
412 # For transpositions, we want the opposite result: it is only valid if
413 # the readings cannot be parallel.
414 my $sourcerank = $c->reading( $source )->has_rank
415 ? $c->reading( $source )->rank : undef;
416 my $targetrank = $c->reading( $target )->has_rank
417 ? $c->reading( $target )->rank : undef;
418 if( $sourcerank && $targetrank && $sourcerank == $targetrank ) {
419 return( 0, "Cannot transpose readings of same rank" )
420 if $rel eq 'transposition';
424 # Otherwise, first make a lookup table of all the
425 # readings related to either the source or the target.
426 my @proposed_related = ( $source, $target );
427 # Drop the collation links of source and target, unless we want to
428 # add a collation relationship.
430 foreach my $r ( ( $source, $target ) ) {
431 push( @dropped, $self->_drop_collations( $r ) )
432 unless $rel eq 'collated';
433 push( @proposed_related, $self->related_readings( $r, 'colocated' ) );
435 # Also drop any collation links at intermediate ranks.
436 foreach my $rank ( $sourcerank+1 .. $targetrank-1 ) {
437 map { push( @dropped, $self->_drop_collations( $_ ) ) }
438 $c->readings_at_rank( $rank );
441 map { $pr_ids{ $_ } = 1 } @proposed_related;
443 # The cumulative predecessors and successors of the proposed-related readings
444 # should not overlap.
447 foreach my $pr ( keys %pr_ids ) {
448 map { $all_pred{$_} = 1 } $c->sequence->all_predecessors( $pr );
449 map { $all_succ{$_} = 1 } $c->sequence->all_successors( $pr );
451 foreach my $k ( keys %all_pred ) {
452 if( exists $all_succ{$k} ) {
453 $self->_restore_collations( @dropped );
454 return( 1, "ok" ) if $rel eq 'transposition';
455 return( 0, "Relationship would create witness loop" );
458 foreach my $k ( keys %pr_ids ) {
459 if( exists $all_pred{$k} || exists $all_succ{$k} ) {
460 $self->_restore_collations( @dropped );
461 return( 1, "ok" ) if $rel eq 'transposition';
462 return( 0, "Relationship would create witness loop" );
465 if( $rel eq 'transposition' ) {
466 $self->_restore_collations( @dropped );
467 return ( 0, "Cannot transpose parallel readings" );
473 sub _drop_collations {
474 my( $self, $reading ) = @_;
476 foreach my $n ( $self->graph->neighbors( $reading ) ) {
477 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
478 $self->del_relationship( $reading, $n );
479 push( @deleted, [ $reading, $n ] );
485 sub _restore_collations {
486 my( $self, @vectors ) = @_;
487 foreach my $v ( @vectors ) {
489 $self->add_relationship( @$v, { 'type' => 'collated' } );
490 } catch ( Text::Tradition::Error $e ) {
491 warn "Could not restore collation " . join( ' -> ', @$v );
496 =head2 related_readings( $reading, $filter )
498 Returns a list of readings that are connected via relationship links to $reading.
499 If $filter is set to a subroutine ref, returns only those related readings where
500 $filter( $relationship ) returns a true value.
504 sub related_readings {
505 my( $self, $reading, $filter ) = @_;
507 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
508 $reading = $reading->id;
514 if( $filter eq 'colocated' ) {
515 $filter = sub { $_[0]->colocated };
517 my %found = ( $reading => 1 );
518 my $check = [ $reading ];
522 foreach my $r ( @$check ) {
523 foreach my $nr ( $self->graph->neighbors( $r ) ) {
524 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
525 push( @$more, $nr ) unless exists $found{$nr};
532 delete $found{$reading};
533 @answer = keys %found;
535 @answer = $self->graph->all_reachable( $reading );
537 if( $return_object ) {
538 my $c = $self->collation;
539 return map { $c->reading( $_ ) } @answer;
545 =head2 merge_readings( $kept, $deleted );
547 Makes a best-effort merge of the relationship links between the given readings, and
548 stops tracking the to-be-deleted reading.
553 my( $self, $kept, $deleted, $combined ) = @_;
554 # Delete any relationship between kept and deleted
555 $self->del_relationship( $kept, $deleted );
556 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
557 # Get the pair of kept / rel
558 my @vector = ( $kept );
559 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
560 next if $vector[0] eq $vector[1]; # Don't add a self loop
562 # If kept changes its text, drop the relationship.
565 # If kept / rel already has a relationship, just keep the old
566 my $rel = $self->get_relationship( @vector );
569 # Otherwise, adopt the relationship that would be deleted.
570 $rel = $self->get_relationship( @$edge );
571 $self->_set_relationship( $rel, @vector );
573 $self->delete_reading( $deleted );
577 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
579 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
580 $rgraph->setAttribute( 'edgedefault', 'directed' );
581 $rgraph->setAttribute( 'id', 'relationships', );
582 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
583 $rgraph->setAttribute( 'parse.edges', scalar($self->graph->edges) );
584 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
585 $rgraph->setAttribute( 'parse.nodes', scalar($self->graph->vertices) );
586 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
588 # Add the vertices according to their XML IDs
589 my %rdg_lookup = ( reverse %$node_hash );
590 foreach my $n ( sort _by_xmlid keys( %rdg_lookup ) ) {
591 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
592 $n_el->setAttribute( 'id', $n );
593 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
596 # Add the relationship edges, with their object information
598 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
599 # Add an edge and fill in its relationship info.
600 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
601 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
602 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
603 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
604 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
606 my $rel_obj = $self->get_relationship( @$e );
607 foreach my $key ( keys %$edge_keys ) {
608 my $value = $rel_obj->$key;
609 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
620 return $tmp_a <=> $tmp_b;
623 sub _add_graphml_data {
624 my( $el, $key, $value ) = @_;
625 return unless defined $value;
626 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
627 $data_el->setAttribute( 'key', $key );
628 $data_el->appendText( $value );
632 Text::Tradition::Error->throw(
633 'ident' => 'Relationship error',
639 __PACKAGE__->meta->make_immutable;