1 package Text::Tradition::Collation::RelationshipStore;
5 use Text::Tradition::Error;
6 use Text::Tradition::Collation::Relationship;
13 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
14 between readings in a given collation
18 Text::Tradition is a library for representation and analysis of collated
19 texts, particularly medieval ones. The RelationshipStore is an internal object
20 of the collation, to keep track of the defined relationships (both specific and
21 general) between readings.
28 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
30 # Add some relationships, and delete them
32 my $cxfile = 't/data/Collatex-16.xml';
33 my $t = Text::Tradition->new(
35 'input' => 'CollateX',
38 my $c = $t->collation;
40 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
41 is( scalar @v1, 1, "Added a single relationship" );
42 is( $v1[0]->[0], 'n21', "Got correct node 1" );
43 is( $v1[0]->[1], 'n22', "Got correct node 2" );
44 my @v2 = $c->add_relationship( 'n24', 'n23',
45 { 'type' => 'spelling', 'scope' => 'global' } );
46 is( scalar @v2, 2, "Added a global relationship with two instances" );
47 @v1 = $c->del_relationship( 'n22', 'n21' );
48 is( scalar @v1, 1, "Deleted first relationship" );
49 @v2 = $c->del_relationship( 'n12', 'n13' );
50 is( scalar @v2, 2, "Deleted second global relationship" );
51 my @v3 = $c->del_relationship( 'n1', 'n2' );
52 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
58 =head2 new( collation => $collation );
60 Creates a new relationship store for the given collation.
66 isa => 'Text::Tradition::Collation',
73 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
74 default => sub { {} },
80 default => sub { Graph->new( undirected => 1 ) },
82 relationships => 'edges',
83 add_reading => 'add_vertex',
84 delete_reading => 'delete_vertex',
88 =head2 equivalence_graph()
90 Returns an equivalence graph of the collation, in which all readings
91 related via a 'colocated' relationship are transformed into a single
92 vertex. Can be used to determine the validity of a new relationship.
96 has 'equivalence_graph' => (
99 default => sub { Graph->new() },
100 writer => '_reset_equivalence',
103 has '_node_equivalences' => (
107 equivalence => 'get',
108 set_equivalence => 'set',
109 remove_equivalence => 'delete',
110 _clear_equivalence => 'clear',
114 has '_equivalence_readings' => (
119 set_eqreadings => 'set',
120 remove_eqreadings => 'delete',
121 _clear_eqreadings => 'clear',
125 around add_reading => sub {
129 $self->equivalence_graph->add_vertex( @_ );
130 $self->set_equivalence( $_[0], $_[0] );
131 $self->set_eqreadings( $_[0], [ $_[0] ] );
135 around delete_reading => sub {
139 $self->_remove_equivalence_node( @_ );
143 =head2 get_relationship
145 Return the relationship object, if any, that exists between two readings.
149 sub get_relationship {
152 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
153 # Dereference the edge arrayref that was passed.
160 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
161 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
163 return $relationship;
166 sub _set_relationship {
167 my( $self, $relationship, @vector ) = @_;
168 $self->graph->add_edge( @vector );
169 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
170 $self->_make_equivalence( @vector ) if $relationship->colocated;
175 Create a new relationship with the given options and return it.
176 Warn and return undef if the relationship cannot be created.
181 my( $self, $options ) = @_;
182 # Check to see if a relationship exists between the two given readings
183 my $source = delete $options->{'orig_a'};
184 my $target = delete $options->{'orig_b'};
185 my $rel = $self->get_relationship( $source, $target );
187 if( $rel->type eq 'collated' ) {
188 # Always replace a 'collated' relationship with a more descriptive
190 $self->del_relationship( $source, $target );
191 } elsif( $rel->type ne $options->{'type'} ) {
192 throw( "Another relationship of type " . $rel->type
193 . " already exists between $source and $target" );
199 $rel = Text::Tradition::Collation::Relationship->new( $options );
200 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
204 =head2 add_scoped_relationship( $rel )
206 Keep track of relationships defined between specific readings that are scoped
207 non-locally. Key on whichever reading occurs first alphabetically.
211 sub add_scoped_relationship {
212 my( $self, $rel ) = @_;
213 my $rdga = $rel->type eq 'orthographic' ? $rel->reading_a : lc( $rel->reading_a );
214 my $rdgb = $rel->type eq 'orthographic' ? $rel->reading_b : lc( $rel->reading_b );
215 my $r = $self->scoped_relationship( $rdga, $rdgb );
217 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
218 $r->type, $rdga, $rdgb );
221 my( $first, $second ) = sort ( $rdga, $rdgb );
222 $self->scopedrels->{$first}->{$second} = $rel;
225 =head2 scoped_relationship( $reading_a, $reading_b )
227 Returns the general (document-level or global) relationship that has been defined
228 between the two reading strings. Returns undef if there is no general relationship.
232 sub scoped_relationship {
233 my( $self, $rdga, $rdgb ) = @_;
234 my( $first, $second ) = sort( $rdga, $rdgb );
235 my( $lcfirst, $lcsecond ) = sort( lc( $rdga ), lc( $rdgb ) );
236 if( exists $self->scopedrels->{$first}->{$second} ) {
237 return $self->scopedrels->{$first}->{$second};
238 } elsif( exists $self->scopedrels->{$lcfirst}->{$lcsecond} ) {
239 my $rel = $self->scopedrels->{$lcfirst}->{$lcsecond};
240 return $rel->type ne 'orthographic' ? $rel : undef;
246 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
248 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
249 for the possible options) between the readings given in $source and $target. Sets
250 up a scoped relationship between $sourcetext and $targettext if the relationship is
253 Returns a status boolean and a list of all reading pairs connected by the call to
264 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
265 } [qr/Cannot set relationship on a meta reading/],
266 "Got expected relationship drop warning on parse";
268 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
269 ok( $t1, "Parsed test fragment file" );
270 my $c1 = $t1->collation;
271 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
272 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
273 "Troublesome relationship exists" );
274 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
276 # Try to make the link we want
278 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
279 ok( 1, "Added cross-collation relationship as expected" );
280 } catch( Text::Tradition::Error $e ) {
281 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
285 $c1->calculate_ranks();
286 ok( 1, "Successfully calculated ranks" );
287 } catch ( Text::Tradition::Error $e ) {
288 ok( 0, "Collation now has a cycle: " . $e->message );
291 # Test 1.2: attempt merge of an identical reading
293 $c1->merge_readings( 'r9.3', 'r11.5' );
294 ok( 1, "Successfully merged reading 'pontifex'" );
295 } catch ( Text::Tradition::Error $e ) {
296 ok( 0, "Merge of mergeable readings failed: $e->message" );
300 # Test 1.3: attempt relationship with a meta reading (should fail)
302 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
303 ok( 0, "Allowed a meta-reading to be used in a relationship" );
304 } catch ( Text::Tradition::Error $e ) {
305 is( $e->message, 'Cannot set relationship on a meta reading',
306 "Relationship link prevented for a meta reading" );
309 # Test 1.4: try to break a relationship near a meta reading
310 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
312 $c1->del_relationship( 'r7.6', 'r7.7' );
313 $c1->del_relationship( 'r7.6', 'r7.3' );
314 ok( 1, "Relationship broken with a meta reading as neighbor" );
316 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
319 # Test 2.1: try to equate nodes that are prevented with a real intermediate
323 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
324 } [qr/Cannot set relationship on a meta reading/],
325 "Got expected relationship drop warning on parse";
326 my $c2 = $t2->collation;
327 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
328 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
329 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
330 "Created blocking relationship" );
331 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
332 # This time the link ought to fail
334 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
335 ok( 0, "Added cross-equivalent bad relationship" );
336 } catch ( Text::Tradition::Error $e ) {
337 like( $e->message, qr/witness loop/,
338 "Existing equivalence blocked crossing relationship" );
342 $c2->calculate_ranks();
343 ok( 1, "Successfully calculated ranks" );
344 } catch ( Text::Tradition::Error $e ) {
345 ok( 0, "Collation now has a cycle: " . $e->message );
348 # Test 3.1: make a straightforward pair of transpositions.
349 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
350 # Test 1: try to equate nodes that are prevented with an intermediate collation
351 my $c3 = $t3->collation;
353 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
354 ok( 1, "Added straightforward transposition" );
355 } catch ( Text::Tradition::Error $e ) {
356 ok( 0, "Failed to add normal transposition: " . $e->message );
359 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
360 ok( 1, "Added straightforward transposition complement" );
361 } catch ( Text::Tradition::Error $e ) {
362 ok( 0, "Failed to add normal transposition complement: " . $e->message );
365 # Test 3.2: try to make a transposition that could be a parallel.
367 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
368 ok( 0, "Added bad colocated transposition" );
369 } catch ( Text::Tradition::Error $e ) {
370 like( $e->message, qr/Readings appear to be colocated/,
371 "Prevented bad colocated transposition" );
374 # Test 3.3: make the parallel, and then make the transposition again.
376 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
377 ok( 1, "Equated identical readings for transposition" );
378 } catch ( Text::Tradition::Error $e ) {
379 ok( 0, "Failed to equate identical readings: " . $e->message );
382 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
383 ok( 1, "Added straightforward transposition complement" );
384 } catch ( Text::Tradition::Error $e ) {
385 ok( 0, "Failed to add normal transposition complement: " . $e->message );
392 sub add_relationship {
393 my( $self, $source, $target, $options ) = @_;
394 my $c = $self->collation;
395 my $sourceobj = $c->reading( $source );
396 my $targetobj = $c->reading( $target );
397 throw( "Adding self relationship at $source" ) if $source eq $target;
398 throw( "Cannot set relationship on a meta reading" )
399 if( $sourceobj->is_meta || $targetobj->is_meta );
402 my $droppedcolls = [];
403 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
404 $relationship = $options;
405 $thispaironly = 1; # If existing rel, set only where asked.
408 $options->{'scope'} = 'local' unless $options->{'scope'};
409 $options->{'scope'} = 'local' if $options->{'type'} eq 'collated';
410 $options->{'scope'} = 'local' if $options->{'type'} eq 'transposition';
412 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
413 $options->{'type'}, $droppedcolls );
414 unless( $is_valid ) {
415 throw( "Invalid relationship: $reason" );
418 # Try to create the relationship object.
419 $options->{'reading_a'} = $sourceobj->text;
420 $options->{'reading_b'} = $targetobj->text;
421 $options->{'orig_a'} = $source;
422 $options->{'orig_b'} = $target;
423 if( $options->{'scope'} ne 'local' ) {
424 # Is there a relationship with this a & b already?
425 # Case-insensitive for non-orthographics.
426 my $rdga = $options->{'reading_a'};
427 my $rdgb = $options->{'reading_b'};
428 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
429 if( $otherrel && $otherrel->type eq $options->{type}
430 && $otherrel->scope eq $options->{scope} ) {
431 warn "Applying existing scoped relationship for $rdga / $rdgb";
432 $relationship = $otherrel;
433 } elsif( $otherrel ) {
434 throw( "Conflicting scoped relationship for $rdga / $rdgb at $source / $target" );
437 $relationship = $self->create( $options ) unless $relationship; # Will throw on error
441 # Now set the relationship(s).
443 my $rel = $self->get_relationship( $source, $target );
445 if( $rel && $rel ne $relationship ) {
446 if( $rel->nonlocal ) {
447 throw( "Found conflicting relationship at $source - $target" );
448 } elsif( $rel->type ne 'collated' ) {
449 # Replace a collation relationship; leave any other sort in place.
450 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
451 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
452 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
453 warn sprintf( "Not overriding local relationship %s with global %s "
454 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
455 $source, $target, $rel->reading_a, $rel->reading_b );
460 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
461 push( @pairs_set, [ $source, $target ] );
463 # Find all the pairs for which we need to set the relationship.
464 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
465 push( @pairs_set, $self->add_global_relationship( $relationship ) );
467 # Finally, restore whatever collations we can, and return.
468 $self->_restore_collations( @$droppedcolls );
472 =head2 add_global_relationship( $options, $skipvector )
474 Adds the relationship specified wherever the relevant readings appear together
475 in the graph. Options as in add_relationship above.
479 sub add_global_relationship {
480 my( $self, $options ) = @_;
481 # First see if we are dealing with a relationship object already
483 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
484 $relationship = $options;
486 # Then see if a scoped relationship already applies for the words.
487 my $scopedrel = $self->scoped_relationship(
488 $options->{reading_a}, $options->{reading_b} );
489 $relationship = $scopedrel ? $scopedrel
490 : $self->create( $options );
493 throw( "Relationship passed to add_global is not global" )
494 unless $relationship->nonlocal;
495 throw( "Relationship passed to add_global is not a valid global type" )
496 unless $relationship->colocated && $relationship->type ne 'collated';
498 # Apply the relationship wherever it is valid
500 foreach my $v ( $self->_find_applicable( $relationship ) ) {
501 my $exists = $self->get_relationship( @$v );
502 if( $exists && $exists->type ne 'collated' ) {
503 throw( "Found conflicting relationship at @$v" )
504 unless $exists->type eq $relationship->type
505 && $exists->scope eq $relationship->scope;
507 my @added = $self->add_relationship( @$v, $relationship );
508 push( @pairs_set, @added );
515 =head2 del_scoped_relationship( $reading_a, $reading_b )
517 Returns the general (document-level or global) relationship that has been defined
518 between the two reading strings. Returns undef if there is no general relationship.
522 sub del_scoped_relationship {
523 my( $self, $rdga, $rdgb ) = @_;
524 my( $first, $second ) = sort( $rdga, $rdgb );
525 return delete $self->scopedrels->{$first}->{$second};
528 sub _find_applicable {
529 my( $self, $rel ) = @_;
530 my $c = $self->collation;
531 # TODO Someday we might use a case sensitive language.
533 my @identical_readings;
534 if( $rel->type eq 'orthographic' ) {
535 @identical_readings = grep { $_->text eq $rel->reading_a }
538 @identical_readings = grep { lc( $_->text ) eq lc( $rel->reading_a ) }
541 foreach my $ir ( @identical_readings ) {
543 if( $rel->type eq 'orthographic' ) {
544 @itarget = grep { $_->rank == $ir->rank
545 && $_->text eq $rel->reading_b } $c->readings;
547 @itarget = grep { $_->rank == $ir->rank
548 && lc( $_->text ) eq lc( $rel->reading_b ) } $c->readings;
551 # Warn if there is more than one hit with no orth link between them.
552 my $itmain = shift @itarget;
555 map { $all_targets{$_} = 1 } @itarget;
556 map { delete $all_targets{$_} }
557 $self->related_readings( $itmain,
558 sub { $_[0]->type eq 'orthographic' } );
559 warn "More than one unrelated reading with text " . $itmain->text
560 . " at rank " . $ir->rank . "!" if keys %all_targets;
562 push( @vectors, [ $ir->id, $itmain->id ] );
568 =head2 del_relationship( $source, $target )
570 Removes the relationship between the given readings. If the relationship is
571 non-local, removes the relationship everywhere in the graph.
575 sub del_relationship {
576 my( $self, $source, $target ) = @_;
577 my $rel = $self->get_relationship( $source, $target );
578 return () unless $rel; # Nothing to delete; return an empty set.
579 my $colo = $rel->colocated;
580 my @vectors = ( [ $source, $target ] );
581 $self->_remove_relationship( $colo, $source, $target );
582 if( $rel->nonlocal ) {
583 # Remove the relationship wherever it occurs.
584 # Remove the relationship wherever it occurs.
585 my @rel_edges = grep { $self->get_relationship( @$_ ) == $rel }
586 $self->relationships;
587 foreach my $re ( @rel_edges ) {
588 $self->_remove_relationship( $colo, @$re );
589 push( @vectors, $re );
591 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
596 sub _remove_relationship {
597 my( $self, $equiv, @vector ) = @_;
598 $self->graph->delete_edge( @vector );
599 $self->_break_equivalence( @vector ) if $equiv;
602 =head2 relationship_valid( $source, $target, $type )
604 Checks whether a relationship of type $type may exist between the readings given
605 in $source and $target. Returns a tuple of ( status, message ) where status is
606 a yes/no boolean and, if the answer is no, message gives the reason why.
610 sub relationship_valid {
611 my( $self, $source, $target, $rel, $mustdrop ) = @_;
612 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
613 my $c = $self->collation;
614 ## Assume validity is okay if we are initializing from scratch.
615 return ( 1, "initializing" ) unless $c->tradition->_initialized;
616 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
617 # Check that the two readings do (for a repetition) or do not (for
618 # a transposition) appear in the same witness.
619 # TODO this might be called before witness paths are set...
621 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
622 foreach my $w ( $c->reading_witnesses( $target ) ) {
623 if( $seen_wits{$w} ) {
624 return ( 0, "Readings both occur in witness $w" )
625 if $rel eq 'transposition';
626 return ( 1, "ok" ) if $rel eq 'repetition';
629 return ( 0, "Readings occur only in distinct witnesses" )
630 if $rel eq 'repetition';
632 if ( $rel eq 'transposition' ) {
633 # We also need to check both that the readings occur in distinct
634 # witnesses, and that they are not in the same place. That is,
635 # proposing to link them should cause a witness loop.
636 if( $self->test_equivalence( $source, $target ) ) {
637 return ( 0, "Readings appear to be colocated, not transposed" );
642 } elsif( $rel ne 'repetition' ) {
643 # Check that linking the source and target in a relationship won't lead
644 # to a path loop for any witness.
645 # First, drop/stash any collations that might interfere
646 my $sourceobj = $c->reading( $source );
647 my $targetobj = $c->reading( $target );
648 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
649 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
650 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
651 push( @$mustdrop, $self->_drop_collations( $source ) );
652 push( @$mustdrop, $self->_drop_collations( $target ) );
653 if( $c->end->has_rank ) {
654 foreach my $rk ( $sourcerank .. $targetrank ) {
655 map { push( @$mustdrop, $self->_drop_collations( $_->id ) ) }
656 $c->readings_at_rank( $rk );
660 unless( $self->test_equivalence( $source, $target ) ) {
661 $self->_restore_collations( @$mustdrop );
662 return( 0, "Relationship would create witness loop" );
668 sub _drop_collations {
669 my( $self, $reading ) = @_;
671 foreach my $n ( $self->graph->neighbors( $reading ) ) {
672 if( $self->get_relationship( $reading, $n )->type eq 'collated' ) {
673 push( @dropped, [ $reading, $n ] );
674 $self->del_relationship( $reading, $n );
675 #print STDERR "Dropped collation $reading -> $n\n";
681 sub _restore_collations {
682 my( $self, @vectors ) = @_;
683 foreach my $v ( @vectors ) {
685 $self->add_relationship( @$v, { 'type' => 'collated' } );
686 #print STDERR "Restored collation @$v\n";
688 print STDERR $v->[0] . " - " . $v->[1] . " no longer collate\n";
693 =head2 filter_collations()
695 Utility function. Removes any redundant 'collated' relationships from the graph.
696 A collated relationship is redundant if the readings in question would occupy
697 the same rank regardless of the existence of the relationship.
701 sub filter_collations {
703 my $c = $self->collation;
704 foreach my $r ( 1 .. $c->end->rank - 1 ) {
707 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
708 next if $rdg->is_meta;
710 foreach my $pred ( $rdg->predecessors ) {
711 if( $pred->rank == $r - 1 ) {
713 $anchor = $rdg unless( $anchor );
717 push( @need_collations, $rdg ) unless $ip;
718 $c->relations->_drop_collations( "$rdg" );
721 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
722 unless $c->get_relationship( $anchor, $_ ) } @need_collations
723 : warn "No anchor found at $r";
727 =head2 related_readings( $reading, $filter )
729 Returns a list of readings that are connected via relationship links to $reading.
730 If $filter is set to a subroutine ref, returns only those related readings where
731 $filter( $relationship ) returns a true value.
735 sub related_readings {
736 my( $self, $reading, $filter ) = @_;
738 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
739 $reading = $reading->id;
745 if( $filter eq 'colocated' ) {
746 $filter = sub { $_[0]->colocated };
747 } elsif( !ref( $filter ) ) {
749 $filter = sub { $_[0]->type eq $type };
751 my %found = ( $reading => 1 );
752 my $check = [ $reading ];
756 foreach my $r ( @$check ) {
757 foreach my $nr ( $self->graph->neighbors( $r ) ) {
758 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
759 push( @$more, $nr ) unless exists $found{$nr};
766 delete $found{$reading};
767 @answer = keys %found;
769 @answer = $self->graph->all_reachable( $reading );
771 if( $return_object ) {
772 my $c = $self->collation;
773 return map { $c->reading( $_ ) } @answer;
779 =head2 merge_readings( $kept, $deleted );
781 Makes a best-effort merge of the relationship links between the given readings, and
782 stops tracking the to-be-deleted reading.
787 my( $self, $kept, $deleted, $combined ) = @_;
788 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
789 # Get the pair of kept / rel
790 my @vector = ( $kept );
791 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
792 next if $vector[0] eq $vector[1]; # Don't add a self loop
794 # If kept changes its text, drop the relationship.
797 # If kept / rel already has a relationship, just keep the old
798 my $rel = $self->get_relationship( @vector );
801 # Otherwise, adopt the relationship that would be deleted.
802 $rel = $self->get_relationship( @$edge );
803 $self->_set_relationship( $rel, @vector );
805 $self->_make_equivalence( $deleted, $kept );
808 ### Equivalence logic
810 sub _remove_equivalence_node {
811 my( $self, $node ) = @_;
812 my $group = $self->equivalence( $node );
813 my $nodelist = $self->eqreadings( $group );
814 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
815 $self->equivalence_graph->delete_vertex( $group );
816 $self->remove_eqreadings( $group );
817 $self->remove_equivalence( $group );
818 } elsif( @$nodelist == 1 ) {
819 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
820 " in group that should have only $node" );
822 my @newlist = grep { $_ ne $node } @$nodelist;
823 $self->set_eqreadings( $group, \@newlist );
824 $self->remove_equivalence( $node );
828 =head2 add_equivalence_edge
830 Add an edge in the equivalence graph corresponding to $source -> $target in the
831 collation. Should only be called by Collation.
835 sub add_equivalence_edge {
836 my( $self, $source, $target ) = @_;
837 my $seq = $self->equivalence( $source );
838 my $teq = $self->equivalence( $target );
839 $self->equivalence_graph->add_edge( $seq, $teq );
842 =head2 delete_equivalence_edge
844 Remove an edge in the equivalence graph corresponding to $source -> $target in the
845 collation. Should only be called by Collation.
849 sub delete_equivalence_edge {
850 my( $self, $source, $target ) = @_;
851 my $seq = $self->equivalence( $source );
852 my $teq = $self->equivalence( $target );
853 $self->equivalence_graph->delete_edge( $seq, $teq );
856 sub _is_disconnected {
858 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
859 || scalar $self->equivalence_graph->successorless_vertices > 1 );
862 # Equate two readings in the equivalence graph
863 sub _make_equivalence {
864 my( $self, $source, $target ) = @_;
865 # Get the source equivalent readings
866 my $seq = $self->equivalence( $source );
867 my $teq = $self->equivalence( $target );
868 # Nothing to do if they are already equivalent...
869 return if $seq eq $teq;
870 my $sourcepool = $self->eqreadings( $seq );
871 # and add them to the target readings.
872 push( @{$self->eqreadings( $teq )}, @$sourcepool );
873 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
874 # Then merge the nodes in the equivalence graph.
875 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
876 $self->equivalence_graph->add_edge( $pred, $teq );
878 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
879 $self->equivalence_graph->add_edge( $teq, $succ );
881 $self->equivalence_graph->delete_vertex( $seq );
882 # TODO enable this after collation parsing is done
883 throw( "Graph got disconnected making $source / $target equivalence" )
884 if $self->_is_disconnected && $self->collation->tradition->_initialized;
887 =head2 test_equivalence
889 Test whether, if two readings were equated with a 'colocated' relationship,
890 the graph would still be valid.
894 sub test_equivalence {
895 my( $self, $source, $target ) = @_;
896 # Try merging the nodes in the equivalence graph; return a true value if
897 # no cycle is introduced thereby. Restore the original graph first.
899 # Keep track of edges we add
902 # Get the reading equivalents
903 my $seq = $self->equivalence( $source );
904 my $teq = $self->equivalence( $target );
905 # Maybe this is easy?
906 return 1 if $seq eq $teq;
908 # Save the first graph
909 my $checkstr = $self->equivalence_graph->stringify();
910 # Add and save relevant edges
911 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
912 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
913 $added_pred{$pred} = 0;
915 $self->equivalence_graph->add_edge( $pred, $teq );
916 $added_pred{$pred} = 1;
919 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
920 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
921 $added_succ{$succ} = 0;
923 $self->equivalence_graph->add_edge( $teq, $succ );
924 $added_succ{$succ} = 1;
927 # Delete source equivalent and test
928 $self->equivalence_graph->delete_vertex( $seq );
929 my $ret = !$self->equivalence_graph->has_a_cycle;
931 # Restore what we changed
932 $self->equivalence_graph->add_vertex( $seq );
933 foreach my $pred ( keys %added_pred ) {
934 $self->equivalence_graph->add_edge( $pred, $seq );
935 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
937 foreach my $succ ( keys %added_succ ) {
938 $self->equivalence_graph->add_edge( $seq, $succ );
939 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
941 unless( $self->equivalence_graph->eq( $checkstr ) ) {
942 warn "GRAPH CHANGED after testing";
948 # Unmake an equivalence link between two readings. Should only be called internally.
949 sub _break_equivalence {
950 my( $self, $source, $target ) = @_;
952 # This is the hard one. Need to reconstruct the equivalence groups without
955 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
956 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
957 # If these groups intersect, they are still connected; do nothing.
958 foreach my $el ( keys %tng ) {
959 return if( exists $sng{$el} );
961 # If they don't intersect, then we split the nodes in the graph and in
962 # the hashes. First figure out which group has which name
963 my $oldgroup = $self->equivalence( $source ); # same as $target
964 my $keepsource = $sng{$oldgroup};
965 my $newgroup = $keepsource ? $target : $source;
966 my( $oldmembers, $newmembers );
968 $oldmembers = [ keys %sng ];
969 $newmembers = [ keys %tng ];
971 $oldmembers = [ keys %tng ];
972 $newmembers = [ keys %sng ];
975 # First alter the old group in the hash
976 $self->set_eqreadings( $oldgroup, $oldmembers );
977 foreach my $el ( @$oldmembers ) {
978 $self->set_equivalence( $el, $oldgroup );
981 # then add the new group back to the hash with its new key
982 $self->set_eqreadings( $newgroup, $newmembers );
983 foreach my $el ( @$newmembers ) {
984 $self->set_equivalence( $el, $newgroup );
987 # Now add the new group back to the equivalence graph
988 $self->equivalence_graph->add_vertex( $newgroup );
989 # ...add the appropriate edges to the source group vertext
990 my $c = $self->collation;
991 foreach my $rdg ( @$newmembers ) {
992 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
993 next unless $self->equivalence( $rp );
994 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
996 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
997 next unless $self->equivalence( $rs );
998 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
1002 # ...and figure out which edges on the old group vertex to delete.
1003 my( %old_pred, %old_succ );
1004 foreach my $rdg ( @$oldmembers ) {
1005 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1006 next unless $self->equivalence( $rp );
1007 $old_pred{$self->equivalence( $rp )} = 1;
1009 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1010 next unless $self->equivalence( $rs );
1011 $old_succ{$self->equivalence( $rs )} = 1;
1014 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
1015 unless( $old_pred{$p} ) {
1016 $self->equivalence_graph->delete_edge( $p, $oldgroup );
1019 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
1020 unless( $old_succ{$s} ) {
1021 $self->equivalence_graph->delete_edge( $oldgroup, $s );
1024 # TODO enable this after collation parsing is done
1025 throw( "Graph got disconnected breaking $source / $target equivalence" )
1026 if $self->_is_disconnected && $self->collation->tradition->_initialized;
1029 sub _find_equiv_without {
1030 my( $self, $first, $second ) = @_;
1031 my %found = ( $first => 1 );
1032 my $check = [ $first ];
1036 foreach my $r ( @$check ) {
1037 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1038 next if $r eq $second;
1039 if( $self->get_relationship( $r, $nr )->colocated ) {
1040 push( @$more, $nr ) unless exists $found{$nr};
1050 =head2 rebuild_equivalence
1052 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1053 adds all readings and edges, then makes an equivalence for all relationships.
1057 sub rebuild_equivalence {
1059 my $newgraph = Graph->new();
1060 # Set this as the new equivalence graph
1061 $self->_reset_equivalence( $newgraph );
1062 # Clear out the data hashes
1063 $self->_clear_equivalence;
1064 $self->_clear_eqreadings;
1066 $self->collation->tradition->_init_done(0);
1068 foreach my $r ( $self->collation->readings ) {
1070 $newgraph->add_vertex( $rid );
1071 $self->set_equivalence( $rid, $rid );
1072 $self->set_eqreadings( $rid, [ $rid ] );
1076 foreach my $e ( $self->collation->paths ) {
1077 $self->add_equivalence_edge( @$e );
1080 # Now equate the colocated readings. This does no testing;
1081 # it assumes that all preexisting relationships are valid.
1082 foreach my $rel ( $self->relationships ) {
1083 my $relobj = $self->get_relationship( $rel );
1084 next unless $relobj && $relobj->colocated;
1085 $self->_make_equivalence( @$rel );
1087 $self->collation->tradition->_init_done(1);
1090 =head2 equivalence_ranks
1092 Rank all vertices in the equivalence graph, and return a hash reference with
1093 vertex => rank mapping.
1097 sub equivalence_ranks {
1099 my $eqstart = $self->equivalence( $self->collation->start );
1100 my $eqranks = { $eqstart => 0 };
1101 my $rankeqs = { 0 => [ $eqstart ] };
1102 my @curr_origin = ( $eqstart );
1103 # A little iterative function.
1104 while( @curr_origin ) {
1105 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1107 return( $eqranks, $rankeqs );
1111 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1112 my $graph = $self->equivalence_graph;
1113 # Look at each of the children of @current_nodes. If all the child's
1114 # parents have a rank, assign it the highest rank + 1 and add it to
1115 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1116 # parent gets a rank.
1118 foreach my $c ( @current_nodes ) {
1119 warn "Current reading $c has no rank!"
1120 unless exists $node_ranks->{$c};
1121 foreach my $child ( $graph->successors( $c ) ) {
1122 next if exists $node_ranks->{$child};
1123 my $highest_rank = -1;
1125 foreach my $parent ( $graph->predecessors( $child ) ) {
1126 if( exists $node_ranks->{$parent} ) {
1127 $highest_rank = $node_ranks->{$parent}
1128 if $highest_rank <= $node_ranks->{$parent};
1135 my $c_rank = $highest_rank + 1;
1136 # print STDERR "Assigning rank $c_rank to node $child \n";
1137 $node_ranks->{$child} = $c_rank if $node_ranks;
1138 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1139 push( @next_nodes, $child );
1148 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1150 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1151 $rgraph->setAttribute( 'edgedefault', 'directed' );
1152 $rgraph->setAttribute( 'id', 'relationships', );
1153 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1154 $rgraph->setAttribute( 'parse.edges', 0 );
1155 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1156 $rgraph->setAttribute( 'parse.nodes', 0 );
1157 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1159 # Add the vertices according to their XML IDs
1160 my %rdg_lookup = ( reverse %$node_hash );
1161 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1162 my @nlist = sort keys( %rdg_lookup );
1163 foreach my $n ( @nlist ) {
1164 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1165 $n_el->setAttribute( 'id', $n );
1166 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1168 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1170 # Add the relationship edges, with their object information
1172 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1173 # Add an edge and fill in its relationship info.
1174 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1175 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1176 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1177 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1178 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1180 my $rel_obj = $self->get_relationship( @$e );
1181 foreach my $key ( keys %$edge_keys ) {
1182 my $value = $rel_obj->$key;
1183 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1187 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1195 return $tmp_a <=> $tmp_b;
1198 sub _add_graphml_data {
1199 my( $el, $key, $value ) = @_;
1200 return unless defined $value;
1201 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1202 $data_el->setAttribute( 'key', $key );
1203 $data_el->appendText( $value );
1207 Text::Tradition::Error->throw(
1208 'ident' => 'Relationship error',
1214 __PACKAGE__->meta->make_immutable;