1 package Text::Tradition::Collation::RelationshipStore;
6 use Text::Tradition::Error;
7 use Text::Tradition::Collation::Relationship;
8 use Text::Tradition::Collation::RelationshipType;
15 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
16 between readings in a given collation
20 Text::Tradition is a library for representation and analysis of collated
21 texts, particularly medieval ones. The RelationshipStore is an internal object
22 of the collation, to keep track of the defined relationships (both specific and
23 general) between readings.
30 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
32 # Add some relationships, and delete them
34 my $cxfile = 't/data/Collatex-16.xml';
35 my $t = Text::Tradition->new(
37 'input' => 'CollateX',
40 my $c = $t->collation;
42 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
43 is( scalar @v1, 1, "Added a single relationship" );
44 is( $v1[0]->[0], 'n21', "Got correct node 1" );
45 is( $v1[0]->[1], 'n22', "Got correct node 2" );
46 my @v2 = $c->add_relationship( 'n24', 'n23',
47 { 'type' => 'spelling', 'scope' => 'global' } );
48 is( scalar @v2, 2, "Added a global relationship with two instances" );
49 @v1 = $c->del_relationship( 'n22', 'n21' );
50 is( scalar @v1, 1, "Deleted first relationship" );
51 @v2 = $c->del_relationship( 'n12', 'n13' );
52 is( scalar @v2, 2, "Deleted second global relationship" );
53 my @v3 = $c->del_relationship( 'n1', 'n2' );
54 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
60 =head2 new( collation => $collation );
62 Creates a new relationship store for the given collation.
68 isa => 'Text::Tradition::Collation',
75 Registry of possible relationship types. See RelationshipType for more info.
79 has 'relationship_types' => (
92 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
93 default => sub { {} },
99 default => sub { Graph->new( undirected => 1 ) },
101 relationships => 'edges',
102 add_reading => 'add_vertex',
103 delete_reading => 'delete_vertex',
107 =head2 equivalence_graph()
109 Returns an equivalence graph of the collation, in which all readings
110 related via a 'colocated' relationship are transformed into a single
111 vertex. Can be used to determine the validity of a new relationship.
115 has 'equivalence_graph' => (
118 default => sub { Graph->new() },
119 writer => '_reset_equivalence',
122 has '_node_equivalences' => (
126 equivalence => 'get',
127 set_equivalence => 'set',
128 remove_equivalence => 'delete',
129 _clear_equivalence => 'clear',
133 has '_equivalence_readings' => (
138 set_eqreadings => 'set',
139 remove_eqreadings => 'delete',
140 _clear_eqreadings => 'clear',
144 ## Build function - here we have our default set of relationship types.
149 my @DEFAULT_TYPES = (
150 { name => 'collated', bindlevel => 50, is_weak => 1, is_transitive => 0, is_generalizable => 0 },
151 { name => 'orthographic', bindlevel => 0, use_regular => 0 },
152 { name => 'spelling', bindlevel => 1 },
153 { name => 'punctuation', bindlevel => 2 },
154 { name => 'grammatical', bindlevel => 2 },
155 { name => 'lexical', bindlevel => 2 },
156 { name => 'uncertain', bindlevel => 50, is_transitive => 0, is_generalizable => 0 },
157 { name => 'other', bindlevel => 50, is_transitive => 0, is_generalizable => 0 },
158 { name => 'transposition', bindlevel => 50, is_colocation => 0, is_transitive => 0 },
159 { name => 'repetition', bindlevel => 50, is_colocation => 0, is_transitive => 0 }
162 foreach my $type ( @DEFAULT_TYPES ) {
163 $self->add_type( $type );
167 around add_type => sub {
171 if( @_ == 1 && $_[0]->$_isa( 'Text::Tradition::Collation::RelationshipType' ) ) {
174 my %args = @_ == 1 ? %{$_[0]} : @_;
175 $new_type = Text::Tradition::Collation::RelationshipType->new( %args );
177 $self->$orig( $new_type->name => $new_type );
181 around add_reading => sub {
185 $self->equivalence_graph->add_vertex( @_ );
186 $self->set_equivalence( $_[0], $_[0] );
187 $self->set_eqreadings( $_[0], [ $_[0] ] );
191 around delete_reading => sub {
195 $self->_remove_equivalence_node( @_ );
199 =head2 get_relationship
201 Return the relationship object, if any, that exists between two readings.
205 sub get_relationship {
208 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
209 # Dereference the edge arrayref that was passed.
216 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
217 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
219 return $relationship;
222 sub _set_relationship {
223 my( $self, $relationship, @vector ) = @_;
224 $self->graph->add_edge( @vector );
225 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
226 $self->_make_equivalence( @vector ) if $relationship->colocated;
231 Create a new relationship with the given options and return it.
232 Warn and return undef if the relationship cannot be created.
237 my( $self, $options ) = @_;
238 # Check to see if a relationship exists between the two given readings
239 my $source = delete $options->{'orig_a'};
240 my $target = delete $options->{'orig_b'};
241 my $rel = $self->get_relationship( $source, $target );
243 if( $self->type( $rel->type )->is_weak ) {
244 # Always replace a weak relationship with a more descriptive
246 $self->del_relationship( $source, $target );
247 } elsif( $rel->type ne $options->{'type'} ) {
248 throw( "Another relationship of type " . $rel->type
249 . " already exists between $source and $target" );
255 $rel = Text::Tradition::Collation::Relationship->new( $options );
256 my $reltype = $self->type( $rel->type );
257 throw( "Unrecognized relationship type " . $rel->type ) unless $reltype;
258 # Validate the options given against the relationship type wanted
259 throw( "Cannot set nonlocal scope on relationship of type " . $reltype->name )
260 if $rel->nonlocal && !$reltype->is_generalizable;
262 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
266 =head2 add_scoped_relationship( $rel )
268 Keep track of relationships defined between specific readings that are scoped
269 non-locally. Key on whichever reading occurs first alphabetically.
273 sub add_scoped_relationship {
274 my( $self, $rel ) = @_;
275 my $rdga = $rel->reading_a;
276 my $rdgb = $rel->reading_b;
277 my $r = $self->scoped_relationship( $rdga, $rdgb );
279 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
280 $r->type, $rdga, $rdgb );
283 my( $first, $second ) = sort ( $rdga, $rdgb );
284 $self->scopedrels->{$first}->{$second} = $rel;
287 =head2 scoped_relationship( $reading_a, $reading_b )
289 Returns the general (document-level or global) relationship that has been defined
290 between the two reading strings. Returns undef if there is no general relationship.
294 sub scoped_relationship {
295 my( $self, $rdga, $rdgb ) = @_;
296 my( $first, $second ) = sort( $rdga, $rdgb );
297 if( exists $self->scopedrels->{$first}->{$second} ) {
298 return $self->scopedrels->{$first}->{$second};
303 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
305 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
306 for the possible options) between the readings given in $source and $target. Sets
307 up a scoped relationship between $sourcetext and $targettext if the relationship is
310 Returns a status boolean and a list of all reading pairs connected by the call to
321 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
322 } [qr/Cannot set relationship on a meta reading/],
323 "Got expected relationship drop warning on parse";
325 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
326 ok( $t1, "Parsed test fragment file" );
327 my $c1 = $t1->collation;
328 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
329 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
330 "Troublesome relationship exists" );
331 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
333 # Try to make the link we want
335 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
336 ok( 1, "Added cross-collation relationship as expected" );
337 } catch( Text::Tradition::Error $e ) {
338 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
342 $c1->calculate_ranks();
343 ok( 1, "Successfully calculated ranks" );
344 } catch ( Text::Tradition::Error $e ) {
345 ok( 0, "Collation now has a cycle: " . $e->message );
348 # Test 1.2: attempt merge of an identical reading
350 $c1->merge_readings( 'r9.3', 'r11.5' );
351 ok( 1, "Successfully merged reading 'pontifex'" );
352 } catch ( Text::Tradition::Error $e ) {
353 ok( 0, "Merge of mergeable readings failed: $e->message" );
357 # Test 1.3: attempt relationship with a meta reading (should fail)
359 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
360 ok( 0, "Allowed a meta-reading to be used in a relationship" );
361 } catch ( Text::Tradition::Error $e ) {
362 is( $e->message, 'Cannot set relationship on a meta reading',
363 "Relationship link prevented for a meta reading" );
366 # Test 1.4: try to break a relationship near a meta reading
367 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
369 $c1->del_relationship( 'r7.6', 'r7.7' );
370 $c1->del_relationship( 'r7.6', 'r7.3' );
371 ok( 1, "Relationship broken with a meta reading as neighbor" );
373 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
376 # Test 2.1: try to equate nodes that are prevented with a real intermediate
380 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
381 } [qr/Cannot set relationship on a meta reading/],
382 "Got expected relationship drop warning on parse";
383 my $c2 = $t2->collation;
384 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
385 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
386 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
387 "Created blocking relationship" );
388 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
389 # This time the link ought to fail
391 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
392 ok( 0, "Added cross-equivalent bad relationship" );
393 } catch ( Text::Tradition::Error $e ) {
394 like( $e->message, qr/witness loop/,
395 "Existing equivalence blocked crossing relationship" );
399 $c2->calculate_ranks();
400 ok( 1, "Successfully calculated ranks" );
401 } catch ( Text::Tradition::Error $e ) {
402 ok( 0, "Collation now has a cycle: " . $e->message );
405 # Test 3.1: make a straightforward pair of transpositions.
406 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
407 # Test 1: try to equate nodes that are prevented with an intermediate collation
408 my $c3 = $t3->collation;
410 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
411 ok( 1, "Added straightforward transposition" );
412 } catch ( Text::Tradition::Error $e ) {
413 ok( 0, "Failed to add normal transposition: " . $e->message );
416 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
417 ok( 1, "Added straightforward transposition complement" );
418 } catch ( Text::Tradition::Error $e ) {
419 ok( 0, "Failed to add normal transposition complement: " . $e->message );
422 # Test 3.2: try to make a transposition that could be a parallel.
424 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
425 ok( 0, "Added bad colocated transposition" );
426 } catch ( Text::Tradition::Error $e ) {
427 like( $e->message, qr/Readings appear to be colocated/,
428 "Prevented bad colocated transposition" );
431 # Test 3.3: make the parallel, and then make the transposition again.
433 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
434 ok( 1, "Equated identical readings for transposition" );
435 } catch ( Text::Tradition::Error $e ) {
436 ok( 0, "Failed to equate identical readings: " . $e->message );
439 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
440 ok( 1, "Added straightforward transposition complement" );
441 } catch ( Text::Tradition::Error $e ) {
442 ok( 0, "Failed to add normal transposition complement: " . $e->message );
445 # TODO Test 4: make a global relationship that involves re-ranking a node first, when
446 # the prior rank has a potential match too
447 my $t4 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/globalrel_test.xml' );
448 my $c4 = $t4->collation;
449 # Can we even add the relationship?
451 $c4->add_relationship( 'r463.2', 'r463.4',
452 { type => 'orthographic', scope => 'global' } );
453 ok( 1, "Added global relationship without error" );
454 } catch ( Text::Tradition::Error $e ) {
455 ok( 0, "Failed to add global relationship when same-rank alternative exists: "
458 $c4->calculate_ranks();
459 # Do our readings now share a rank?
460 is( $c4->reading('r463.2')->rank, $c4->reading('r463.4')->rank,
461 "Expected readings now at same rank" );
467 sub add_relationship {
468 my( $self, $source, $target, $options ) = @_;
469 my $c = $self->collation;
470 my $sourceobj = $c->reading( $source );
471 my $targetobj = $c->reading( $target );
472 throw( "Adding self relationship at $source" ) if $source eq $target;
473 throw( "Cannot set relationship on a meta reading" )
474 if( $sourceobj->is_meta || $targetobj->is_meta );
477 my $thispaironly = delete $options->{thispaironly};
478 my $droppedcolls = [];
479 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
480 $relationship = $options;
481 $reltype = $self->type( $relationship->type );
482 $thispaironly = 1; # If existing rel, set only where asked.
484 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
485 $relationship->type, $droppedcolls );
486 unless( $is_valid ) {
487 throw( "Invalid relationship: $reason" );
490 $reltype = $self->type( $options->{type} );
492 # Try to create the relationship object.
493 my $rdga = $reltype->regularize( $sourceobj );
494 my $rdgb = $reltype->regularize( $targetobj );
495 $options->{'orig_a'} = $sourceobj;
496 $options->{'orig_b'} = $targetobj;
497 $options->{'reading_a'} = $rdga;
498 $options->{'reading_b'} = $rdgb;
499 if( exists $options->{'scope'} && $options->{'scope'} ne 'local' ) {
500 # Is there a relationship with this a & b already?
501 if( $rdga eq $rdgb ) {
502 # If we have canonified to the same thing for the relationship
503 # type we want, something is wrong.
504 # NOTE we want to allow this at the local level, as a cheap means
505 # of merging readings in the UI, until we get a better means.
506 throw( "Canonifier returns identical form $rdga for this relationship type" );
509 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
510 if( $otherrel && $otherrel->type eq $options->{type}
511 && $otherrel->scope eq $options->{scope} ) {
512 # warn "Applying existing scoped relationship for $rdga / $rdgb";
513 $relationship = $otherrel;
514 } elsif( $otherrel ) {
515 throw( 'Conflicting scoped relationship '
516 . join( '/', $otherrel->type, $otherrel->scope ) . ' vs. '
517 . join( '/', $options->{type}, $options->{scope} )
518 . " for $rdga / $rdgb at $source / $target" );
521 $relationship = $self->create( $options ) unless $relationship;
522 # ... Will throw on error
524 # See if the relationship is actually valid here
525 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
526 $options->{'type'}, $droppedcolls );
527 unless( $is_valid ) {
528 throw( "Invalid relationship: $reason" );
533 # Now set the relationship(s).
535 my $rel = $self->get_relationship( $source, $target );
537 if( $rel && $rel ne $relationship ) {
538 if( $rel->nonlocal ) {
539 throw( "Found conflicting relationship at $source - $target" );
540 } elsif( !$reltype->is_weak ) {
541 # Replace a weak relationship; leave any other sort in place.
542 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
543 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
544 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
545 warn sprintf( "Not overriding local relationship %s with global %s "
546 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
547 $source, $target, $rel->reading_a, $rel->reading_b );
552 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
553 push( @pairs_set, [ $source, $target ] );
555 # Find all the pairs for which we need to set the relationship.
556 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
557 push( @pairs_set, $self->add_global_relationship( $relationship ) );
559 # Finally, restore whatever collations we can, and return.
560 $self->_restore_weak( @$droppedcolls );
564 =head2 add_global_relationship( $options, $skipvector )
566 Adds the relationship specified wherever the relevant readings appear together
567 in the graph. Options as in add_relationship above.
571 sub add_global_relationship {
572 my( $self, $relationship ) = @_;
574 my $reltype = $self->type( $relationship->type );
575 throw( "Relationship passed to add_global is not global" )
576 unless $relationship->nonlocal;
577 throw( "Relationship passed to add_global is not a valid global type" )
578 unless $reltype->is_generalizable;
580 # Apply the relationship wherever it is valid
582 foreach my $v ( $self->_find_applicable( $relationship ) ) {
583 my $exists = $self->get_relationship( @$v );
584 my $etype = $exists ? $self->type( $exists->type ) : '';
585 if( $exists && !$etype->is_weak ) {
586 unless( $exists->is_equivalent( $relationship ) ) {
587 throw( "Found conflicting relationship at @$v" );
592 @added = $self->add_relationship( @$v, $relationship );
594 my $reldesc = sprintf( "%s %s -> %s", $relationship->type,
595 $relationship->reading_a, $relationship->reading_b );
596 # print STDERR "Global relationship $reldesc not applicable at @$v\n";
598 push( @pairs_set, @added ) if @added;
605 =head2 del_scoped_relationship( $reading_a, $reading_b )
607 Returns the general (document-level or global) relationship that has been defined
608 between the two reading strings. Returns undef if there is no general relationship.
612 sub del_scoped_relationship {
613 my( $self, $rdga, $rdgb ) = @_;
614 my( $first, $second ) = sort( $rdga, $rdgb );
615 return delete $self->scopedrels->{$first}->{$second};
618 sub _find_applicable {
619 my( $self, $rel ) = @_;
620 my $c = $self->collation;
621 my $reltype = $self->type( $rel->type );
623 my @identical_readings;
624 @identical_readings = grep { $reltype->regularize( $_ ) eq $rel->reading_a }
626 foreach my $ir ( @identical_readings ) {
628 @itarget = grep { $reltype->regularize( $_ ) eq $rel->reading_b }
629 $c->readings_at_rank( $ir->rank );
631 # Warn if there is more than one hit with no closer link between them.
632 my $itmain = shift @itarget;
635 my $bindlevel = $reltype->bindlevel;
636 map { $all_targets{$_} = 1 } @itarget;
637 map { delete $all_targets{$_} }
638 $self->related_readings( $itmain, sub {
639 $self->type( $_[0]->type )->bindlevel < $bindlevel } );
640 warn "More than one unrelated reading with text " . $itmain->text
641 . " at rank " . $ir->rank . "!" if keys %all_targets;
643 push( @vectors, [ $ir->id, $itmain->id ] );
649 =head2 del_relationship( $source, $target )
651 Removes the relationship between the given readings. If the relationship is
652 non-local, removes the relationship everywhere in the graph.
656 sub del_relationship {
657 my( $self, $source, $target ) = @_;
658 my $rel = $self->get_relationship( $source, $target );
659 return () unless $rel; # Nothing to delete; return an empty set.
660 my $reltype = $self->type( $rel->type );
661 my $colo = $rel->colocated;
662 my @vectors = ( [ $source, $target ] );
663 $self->_remove_relationship( $colo, $source, $target );
664 if( $rel->nonlocal ) {
665 # Remove the relationship wherever it occurs.
666 my @rel_edges = grep { $self->get_relationship( @$_ ) eq $rel }
667 $self->relationships;
668 foreach my $re ( @rel_edges ) {
669 $self->_remove_relationship( $colo, @$re );
670 push( @vectors, $re );
672 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
677 sub _remove_relationship {
678 my( $self, $equiv, @vector ) = @_;
679 $self->graph->delete_edge( @vector );
680 $self->_break_equivalence( @vector ) if $equiv;
683 =head2 relationship_valid( $source, $target, $type )
685 Checks whether a relationship of type $type may exist between the readings given
686 in $source and $target. Returns a tuple of ( status, message ) where status is
687 a yes/no boolean and, if the answer is no, message gives the reason why.
691 sub relationship_valid {
692 my( $self, $source, $target, $rel, $mustdrop ) = @_;
693 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
694 my $c = $self->collation;
695 my $reltype = $self->type( $rel );
696 ## Assume validity is okay if we are initializing from scratch.
697 return ( 1, "initializing" ) unless $c->tradition->_initialized;
698 ## TODO Move this block to relationship type definition when we can save
700 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
701 # Check that the two readings do (for a repetition) or do not (for
702 # a transposition) appear in the same witness.
703 # TODO this might be called before witness paths are set...
705 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
706 foreach my $w ( $c->reading_witnesses( $target ) ) {
707 if( $seen_wits{$w} ) {
708 return ( 0, "Readings both occur in witness $w" )
709 if $rel eq 'transposition';
710 return ( 1, "ok" ) if $rel eq 'repetition';
713 return ( 0, "Readings occur only in distinct witnesses" )
714 if $rel eq 'repetition';
716 if ( $reltype->is_colocation ) {
717 # Check that linking the source and target in a relationship won't lead
718 # to a path loop for any witness.
719 # First, drop/stash any collations that might interfere
720 my $sourceobj = $c->reading( $source );
721 my $targetobj = $c->reading( $target );
722 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
723 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
724 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
725 push( @$mustdrop, $self->_drop_weak( $source ) );
726 push( @$mustdrop, $self->_drop_weak( $target ) );
727 if( $c->end->has_rank ) {
728 foreach my $rk ( $sourcerank .. $targetrank ) {
729 map { push( @$mustdrop, $self->_drop_weak( $_->id ) ) }
730 $c->readings_at_rank( $rk );
734 unless( $self->test_equivalence( $source, $target ) ) {
735 $self->_restore_weak( @$mustdrop );
736 return( 0, "Relationship would create witness loop" );
740 # We also need to check that the readings are not in the same place.
741 # That is, proposing to equate them should cause a witness loop.
742 if( $self->test_equivalence( $source, $target ) ) {
743 return ( 0, "Readings appear to be colocated" );
751 my( $self, $reading ) = @_;
753 foreach my $n ( $self->graph->neighbors( $reading ) ) {
754 my $nrel = $self->get_relationship( $reading, $n );
755 if( $self->type( $nrel->type )->is_weak ) {
756 push( @dropped, [ $reading, $n, $nrel->type ] );
757 $self->del_relationship( $reading, $n );
758 #print STDERR "Dropped weak relationship $reading -> $n\n";
765 my( $self, @vectors ) = @_;
766 foreach my $v ( @vectors ) {
769 $self->add_relationship( @$v, { 'type' => $type } );
770 #print STDERR "Restored weak relationship @$v\n";
771 }; # if it fails we don't care
775 =head2 related_readings( $reading, $filter )
777 Returns a list of readings that are connected via relationship links to $reading.
778 If $filter is set to a subroutine ref, returns only those related readings where
779 $filter( $relationship ) returns a true value.
783 sub related_readings {
784 my( $self, $reading, $filter ) = @_;
786 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
787 $reading = $reading->id;
793 if( $filter eq 'colocated' ) {
794 $filter = sub { $_[0]->colocated };
795 } elsif( !ref( $filter ) ) {
797 $filter = sub { $_[0]->type eq $type };
799 my %found = ( $reading => 1 );
800 my $check = [ $reading ];
804 foreach my $r ( @$check ) {
805 foreach my $nr ( $self->graph->neighbors( $r ) ) {
806 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
807 push( @$more, $nr ) unless exists $found{$nr};
814 delete $found{$reading};
815 @answer = keys %found;
817 @answer = $self->graph->all_reachable( $reading );
819 if( $return_object ) {
820 my $c = $self->collation;
821 return map { $c->reading( $_ ) } @answer;
827 =head2 merge_readings( $kept, $deleted );
829 Makes a best-effort merge of the relationship links between the given readings, and
830 stops tracking the to-be-deleted reading.
835 my( $self, $kept, $deleted, $combined ) = @_;
836 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
837 # Get the pair of kept / rel
838 my @vector = ( $kept );
839 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
840 next if $vector[0] eq $vector[1]; # Don't add a self loop
842 # If kept changes its text, drop the relationship.
845 # If kept / rel already has a relationship, just keep the old
846 my $rel = $self->get_relationship( @vector );
849 # Otherwise, adopt the relationship that would be deleted.
850 $rel = $self->get_relationship( @$edge );
851 $self->_set_relationship( $rel, @vector );
853 $self->_make_equivalence( $deleted, $kept );
856 ### Equivalence logic
858 sub _remove_equivalence_node {
859 my( $self, $node ) = @_;
860 my $group = $self->equivalence( $node );
861 my $nodelist = $self->eqreadings( $group );
862 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
863 $self->equivalence_graph->delete_vertex( $group );
864 $self->remove_eqreadings( $group );
865 $self->remove_equivalence( $group );
866 } elsif( @$nodelist == 1 ) {
867 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
868 " in group that should have only $node" );
870 my @newlist = grep { $_ ne $node } @$nodelist;
871 $self->set_eqreadings( $group, \@newlist );
872 $self->remove_equivalence( $node );
876 =head2 add_equivalence_edge
878 Add an edge in the equivalence graph corresponding to $source -> $target in the
879 collation. Should only be called by Collation.
883 sub add_equivalence_edge {
884 my( $self, $source, $target ) = @_;
885 my $seq = $self->equivalence( $source );
886 my $teq = $self->equivalence( $target );
887 $self->equivalence_graph->add_edge( $seq, $teq );
890 =head2 delete_equivalence_edge
892 Remove an edge in the equivalence graph corresponding to $source -> $target in the
893 collation. Should only be called by Collation.
897 sub delete_equivalence_edge {
898 my( $self, $source, $target ) = @_;
899 my $seq = $self->equivalence( $source );
900 my $teq = $self->equivalence( $target );
901 $self->equivalence_graph->delete_edge( $seq, $teq );
904 sub _is_disconnected {
906 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
907 || scalar $self->equivalence_graph->successorless_vertices > 1 );
910 # Equate two readings in the equivalence graph
911 sub _make_equivalence {
912 my( $self, $source, $target ) = @_;
913 # Get the source equivalent readings
914 my $seq = $self->equivalence( $source );
915 my $teq = $self->equivalence( $target );
916 # Nothing to do if they are already equivalent...
917 return if $seq eq $teq;
918 my $sourcepool = $self->eqreadings( $seq );
919 # and add them to the target readings.
920 push( @{$self->eqreadings( $teq )}, @$sourcepool );
921 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
922 # Then merge the nodes in the equivalence graph.
923 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
924 $self->equivalence_graph->add_edge( $pred, $teq );
926 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
927 $self->equivalence_graph->add_edge( $teq, $succ );
929 $self->equivalence_graph->delete_vertex( $seq );
930 # TODO enable this after collation parsing is done
931 throw( "Graph got disconnected making $source / $target equivalence" )
932 if $self->_is_disconnected && $self->collation->tradition->_initialized;
935 =head2 test_equivalence
937 Test whether, if two readings were equated with a 'colocated' relationship,
938 the graph would still be valid.
942 sub test_equivalence {
943 my( $self, $source, $target ) = @_;
944 # Try merging the nodes in the equivalence graph; return a true value if
945 # no cycle is introduced thereby. Restore the original graph first.
947 # Keep track of edges we add
950 # Get the reading equivalents
951 my $seq = $self->equivalence( $source );
952 my $teq = $self->equivalence( $target );
953 # Maybe this is easy?
954 return 1 if $seq eq $teq;
956 # Save the first graph
957 my $checkstr = $self->equivalence_graph->stringify();
958 # Add and save relevant edges
959 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
960 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
961 $added_pred{$pred} = 0;
963 $self->equivalence_graph->add_edge( $pred, $teq );
964 $added_pred{$pred} = 1;
967 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
968 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
969 $added_succ{$succ} = 0;
971 $self->equivalence_graph->add_edge( $teq, $succ );
972 $added_succ{$succ} = 1;
975 # Delete source equivalent and test
976 $self->equivalence_graph->delete_vertex( $seq );
977 my $ret = !$self->equivalence_graph->has_a_cycle;
979 # Restore what we changed
980 $self->equivalence_graph->add_vertex( $seq );
981 foreach my $pred ( keys %added_pred ) {
982 $self->equivalence_graph->add_edge( $pred, $seq );
983 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
985 foreach my $succ ( keys %added_succ ) {
986 $self->equivalence_graph->add_edge( $seq, $succ );
987 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
989 unless( $self->equivalence_graph->eq( $checkstr ) ) {
990 throw( "GRAPH CHANGED after testing" );
996 # Unmake an equivalence link between two readings. Should only be called internally.
997 sub _break_equivalence {
998 my( $self, $source, $target ) = @_;
1000 # This is the hard one. Need to reconstruct the equivalence groups without
1003 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
1004 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
1005 # If these groups intersect, they are still connected; do nothing.
1006 foreach my $el ( keys %tng ) {
1007 return if( exists $sng{$el} );
1009 # If they don't intersect, then we split the nodes in the graph and in
1010 # the hashes. First figure out which group has which name
1011 my $oldgroup = $self->equivalence( $source ); # same as $target
1012 my $keepsource = $sng{$oldgroup};
1013 my $newgroup = $keepsource ? $target : $source;
1014 my( $oldmembers, $newmembers );
1016 $oldmembers = [ keys %sng ];
1017 $newmembers = [ keys %tng ];
1019 $oldmembers = [ keys %tng ];
1020 $newmembers = [ keys %sng ];
1023 # First alter the old group in the hash
1024 $self->set_eqreadings( $oldgroup, $oldmembers );
1025 foreach my $el ( @$oldmembers ) {
1026 $self->set_equivalence( $el, $oldgroup );
1029 # then add the new group back to the hash with its new key
1030 $self->set_eqreadings( $newgroup, $newmembers );
1031 foreach my $el ( @$newmembers ) {
1032 $self->set_equivalence( $el, $newgroup );
1035 # Now add the new group back to the equivalence graph
1036 $self->equivalence_graph->add_vertex( $newgroup );
1037 # ...add the appropriate edges to the source group vertext
1038 my $c = $self->collation;
1039 foreach my $rdg ( @$newmembers ) {
1040 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1041 next unless $self->equivalence( $rp );
1042 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
1044 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1045 next unless $self->equivalence( $rs );
1046 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
1050 # ...and figure out which edges on the old group vertex to delete.
1051 my( %old_pred, %old_succ );
1052 foreach my $rdg ( @$oldmembers ) {
1053 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1054 next unless $self->equivalence( $rp );
1055 $old_pred{$self->equivalence( $rp )} = 1;
1057 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1058 next unless $self->equivalence( $rs );
1059 $old_succ{$self->equivalence( $rs )} = 1;
1062 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
1063 unless( $old_pred{$p} ) {
1064 $self->equivalence_graph->delete_edge( $p, $oldgroup );
1067 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
1068 unless( $old_succ{$s} ) {
1069 $self->equivalence_graph->delete_edge( $oldgroup, $s );
1072 # TODO enable this after collation parsing is done
1073 throw( "Graph got disconnected breaking $source / $target equivalence" )
1074 if $self->_is_disconnected && $self->collation->tradition->_initialized;
1077 sub _find_equiv_without {
1078 my( $self, $first, $second ) = @_;
1079 my %found = ( $first => 1 );
1080 my $check = [ $first ];
1084 foreach my $r ( @$check ) {
1085 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1086 next if $r eq $second;
1087 if( $self->get_relationship( $r, $nr )->colocated ) {
1088 push( @$more, $nr ) unless exists $found{$nr};
1098 =head2 rebuild_equivalence
1100 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1101 adds all readings and edges, then makes an equivalence for all relationships.
1105 sub rebuild_equivalence {
1107 my $newgraph = Graph->new();
1108 # Set this as the new equivalence graph
1109 $self->_reset_equivalence( $newgraph );
1110 # Clear out the data hashes
1111 $self->_clear_equivalence;
1112 $self->_clear_eqreadings;
1114 $self->collation->tradition->_init_done(0);
1116 foreach my $r ( $self->collation->readings ) {
1118 $newgraph->add_vertex( $rid );
1119 $self->set_equivalence( $rid, $rid );
1120 $self->set_eqreadings( $rid, [ $rid ] );
1124 foreach my $e ( $self->collation->paths ) {
1125 $self->add_equivalence_edge( @$e );
1128 # Now equate the colocated readings. This does no testing;
1129 # it assumes that all preexisting relationships are valid.
1130 foreach my $rel ( $self->relationships ) {
1131 my $relobj = $self->get_relationship( $rel );
1132 next unless $relobj && $relobj->colocated;
1133 $self->_make_equivalence( @$rel );
1135 $self->collation->tradition->_init_done(1);
1138 =head2 equivalence_ranks
1140 Rank all vertices in the equivalence graph, and return a hash reference with
1141 vertex => rank mapping.
1145 sub equivalence_ranks {
1147 my $eqstart = $self->equivalence( $self->collation->start );
1148 my $eqranks = { $eqstart => 0 };
1149 my $rankeqs = { 0 => [ $eqstart ] };
1150 my @curr_origin = ( $eqstart );
1151 # A little iterative function.
1152 while( @curr_origin ) {
1153 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1155 return( $eqranks, $rankeqs );
1159 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1160 my $graph = $self->equivalence_graph;
1161 # Look at each of the children of @current_nodes. If all the child's
1162 # parents have a rank, assign it the highest rank + 1 and add it to
1163 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1164 # parent gets a rank.
1166 foreach my $c ( @current_nodes ) {
1167 warn "Current reading $c has no rank!"
1168 unless exists $node_ranks->{$c};
1169 foreach my $child ( $graph->successors( $c ) ) {
1170 next if exists $node_ranks->{$child};
1171 my $highest_rank = -1;
1173 foreach my $parent ( $graph->predecessors( $child ) ) {
1174 if( exists $node_ranks->{$parent} ) {
1175 $highest_rank = $node_ranks->{$parent}
1176 if $highest_rank <= $node_ranks->{$parent};
1183 my $c_rank = $highest_rank + 1;
1184 # print STDERR "Assigning rank $c_rank to node $child \n";
1185 $node_ranks->{$child} = $c_rank if $node_ranks;
1186 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1187 push( @next_nodes, $child );
1196 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1198 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1199 $rgraph->setAttribute( 'edgedefault', 'directed' );
1200 $rgraph->setAttribute( 'id', 'relationships', );
1201 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1202 $rgraph->setAttribute( 'parse.edges', 0 );
1203 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1204 $rgraph->setAttribute( 'parse.nodes', 0 );
1205 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1207 # Add the vertices according to their XML IDs
1208 my %rdg_lookup = ( reverse %$node_hash );
1209 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1210 my @nlist = sort keys( %rdg_lookup );
1211 foreach my $n ( @nlist ) {
1212 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1213 $n_el->setAttribute( 'id', $n );
1214 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1216 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1218 # Add the relationship edges, with their object information
1220 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1221 # Add an edge and fill in its relationship info.
1222 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1223 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1224 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1225 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1226 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1228 my $rel_obj = $self->get_relationship( @$e );
1229 foreach my $key ( keys %$edge_keys ) {
1230 my $value = $rel_obj->$key;
1231 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1235 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1243 return $tmp_a <=> $tmp_b;
1246 sub _add_graphml_data {
1247 my( $el, $key, $value ) = @_;
1248 return unless defined $value;
1249 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1250 $data_el->setAttribute( 'key', $key );
1251 $data_el->appendText( $value );
1255 my( $self, $from, $to ) = @_;
1256 open( DUMP, ">debug.svg" ) or die "Could not open debug.svg";
1257 binmode DUMP, ':utf8';
1258 print DUMP $self->collation->as_svg({ from => $from, to => $to, nocalc => 1 });
1263 Text::Tradition::Error->throw(
1264 'ident' => 'Relationship error',
1270 __PACKAGE__->meta->make_immutable;