1 package Text::Tradition::Collation::RelationshipStore;
6 use Text::Tradition::Error;
7 use Text::Tradition::Collation::Relationship;
8 use Text::Tradition::Collation::RelationshipType;
15 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
16 between readings in a given collation
20 Text::Tradition is a library for representation and analysis of collated
21 texts, particularly medieval ones. The RelationshipStore is an internal object
22 of the collation, to keep track of the defined relationships (both specific and
23 general) between readings.
30 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
32 # Add some relationships, and delete them
34 my $cxfile = 't/data/Collatex-16.xml';
35 my $t = Text::Tradition->new(
37 'input' => 'CollateX',
40 my $c = $t->collation;
42 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
43 is( scalar @v1, 1, "Added a single relationship" );
44 is( $v1[0]->[0], 'n21', "Got correct node 1" );
45 is( $v1[0]->[1], 'n22', "Got correct node 2" );
46 my @v2 = $c->add_relationship( 'n24', 'n23',
47 { 'type' => 'spelling', 'scope' => 'global' } );
48 is( scalar @v2, 2, "Added a global relationship with two instances" );
49 @v1 = $c->del_relationship( 'n22', 'n21' );
50 is( scalar @v1, 1, "Deleted first relationship" );
51 @v2 = $c->del_relationship( 'n12', 'n13' );
52 is( scalar @v2, 2, "Deleted second global relationship" );
53 my @v3 = $c->del_relationship( 'n1', 'n2' );
54 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
60 =head2 new( collation => $collation );
62 Creates a new relationship store for the given collation.
68 isa => 'Text::Tradition::Collation',
75 Registry of possible relationship types. See RelationshipType for more info.
79 has 'relationship_types' => (
92 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
93 default => sub { {} },
99 default => sub { Graph->new( undirected => 1 ) },
101 relationships => 'edges',
102 add_reading => 'add_vertex',
103 delete_reading => 'delete_vertex',
107 =head2 equivalence_graph()
109 Returns an equivalence graph of the collation, in which all readings
110 related via a 'colocated' relationship are transformed into a single
111 vertex. Can be used to determine the validity of a new relationship.
115 has 'equivalence_graph' => (
118 default => sub { Graph->new() },
119 writer => '_reset_equivalence',
122 has '_node_equivalences' => (
126 equivalence => 'get',
127 set_equivalence => 'set',
128 remove_equivalence => 'delete',
129 _clear_equivalence => 'clear',
133 has '_equivalence_readings' => (
138 set_eqreadings => 'set',
139 remove_eqreadings => 'delete',
140 _clear_eqreadings => 'clear',
144 ## Build function - here we have our default set of relationship types.
149 my @DEFAULT_TYPES = (
150 { name => 'collated', bindlevel => 50, is_weak => 1, is_transitive => 0, is_generalizable => 0 },
151 { name => 'orthographic', bindlevel => 0, use_regular => 0 },
152 { name => 'spelling', bindlevel => 1 },
153 { name => 'punctuation', bindlevel => 2 },
154 { name => 'grammatical', bindlevel => 2 },
155 { name => 'lexical', bindlevel => 2 },
156 { name => 'uncertain', bindlevel => 50, is_transitive => 0, is_generalizable => 0 },
157 { name => 'other', bindlevel => 50, is_transitive => 0, is_generalizable => 0 },
158 { name => 'transposition', bindlevel => 50, is_colocation => 0, is_transitive => 0 },
159 { name => 'repetition', bindlevel => 50, is_colocation => 0, is_transitive => 0 }
162 foreach my $type ( @DEFAULT_TYPES ) {
163 $self->add_type( $type );
167 around add_type => sub {
171 if( @_ == 1 && $_[0]->$_isa( 'Text::Tradition::Collation::RelationshipType' ) ) {
174 my %args = @_ == 1 ? %{$_[0]} : @_;
175 $new_type = Text::Tradition::Collation::RelationshipType->new( %args );
177 $self->$orig( $new_type->name => $new_type );
181 around add_reading => sub {
185 $self->equivalence_graph->add_vertex( @_ );
186 $self->set_equivalence( $_[0], $_[0] );
187 $self->set_eqreadings( $_[0], [ $_[0] ] );
191 around delete_reading => sub {
195 $self->_remove_equivalence_node( @_ );
199 =head2 get_relationship
201 Return the relationship object, if any, that exists between two readings.
205 sub get_relationship {
208 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
209 # Dereference the edge arrayref that was passed.
216 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
217 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
219 return $relationship;
222 sub _set_relationship {
223 my( $self, $relationship, @vector ) = @_;
224 $self->graph->add_edge( @vector );
225 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
226 $self->_make_equivalence( @vector ) if $relationship->colocated;
231 Create a new relationship with the given options and return it.
232 Warn and return undef if the relationship cannot be created.
237 my( $self, $options ) = @_;
238 # Check to see if a relationship exists between the two given readings
239 my $source = delete $options->{'orig_a'};
240 my $target = delete $options->{'orig_b'};
241 my $rel = $self->get_relationship( $source, $target );
243 if( $self->type( $rel->type )->is_weak ) {
244 # Always replace a weak relationship with a more descriptive
246 $self->del_relationship( $source, $target );
247 } elsif( $rel->type ne $options->{'type'} ) {
248 throw( "Another relationship of type " . $rel->type
249 . " already exists between $source and $target" );
255 $rel = Text::Tradition::Collation::Relationship->new( $options );
256 my $reltype = $self->type( $rel->type );
257 throw( "Unrecognized relationship type " . $rel->type ) unless $reltype;
258 # Validate the options given against the relationship type wanted
259 throw( "Cannot set nonlocal scope on relationship of type " . $reltype->name )
260 if $rel->nonlocal && !$reltype->is_generalizable;
262 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
266 =head2 add_scoped_relationship( $rel )
268 Keep track of relationships defined between specific readings that are scoped
269 non-locally. Key on whichever reading occurs first alphabetically.
273 sub add_scoped_relationship {
274 my( $self, $rel ) = @_;
275 my $rdga = $rel->reading_a;
276 my $rdgb = $rel->reading_b;
277 my $r = $self->scoped_relationship( $rdga, $rdgb );
279 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
280 $r->type, $rdga, $rdgb );
283 my( $first, $second ) = sort ( $rdga, $rdgb );
284 $self->scopedrels->{$first}->{$second} = $rel;
287 =head2 scoped_relationship( $reading_a, $reading_b )
289 Returns the general (document-level or global) relationship that has been defined
290 between the two reading strings. Returns undef if there is no general relationship.
294 sub scoped_relationship {
295 my( $self, $rdga, $rdgb ) = @_;
296 my( $first, $second ) = sort( $rdga, $rdgb );
297 if( exists $self->scopedrels->{$first}->{$second} ) {
298 return $self->scopedrels->{$first}->{$second};
303 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
305 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
306 for the possible options) between the readings given in $source and $target. Sets
307 up a scoped relationship between $sourcetext and $targettext if the relationship is
310 Returns a status boolean and a list of all reading pairs connected by the call to
321 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
322 } [qr/Cannot set relationship on a meta reading/],
323 "Got expected relationship drop warning on parse";
325 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
326 ok( $t1, "Parsed test fragment file" );
327 my $c1 = $t1->collation;
328 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
329 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
330 "Troublesome relationship exists" );
331 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
333 # Try to make the link we want
335 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
336 ok( 1, "Added cross-collation relationship as expected" );
337 } catch( Text::Tradition::Error $e ) {
338 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
342 $c1->calculate_ranks();
343 ok( 1, "Successfully calculated ranks" );
344 } catch ( Text::Tradition::Error $e ) {
345 ok( 0, "Collation now has a cycle: " . $e->message );
348 # Test 1.2: attempt merge of an identical reading
350 $c1->merge_readings( 'r9.3', 'r11.5' );
351 ok( 1, "Successfully merged reading 'pontifex'" );
352 } catch ( Text::Tradition::Error $e ) {
353 ok( 0, "Merge of mergeable readings failed: $e->message" );
357 # Test 1.3: attempt relationship with a meta reading (should fail)
359 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
360 ok( 0, "Allowed a meta-reading to be used in a relationship" );
361 } catch ( Text::Tradition::Error $e ) {
362 is( $e->message, 'Cannot set relationship on a meta reading',
363 "Relationship link prevented for a meta reading" );
366 # Test 1.4: try to break a relationship near a meta reading
367 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
369 $c1->del_relationship( 'r7.6', 'r7.7' );
370 $c1->del_relationship( 'r7.6', 'r7.3' );
371 ok( 1, "Relationship broken with a meta reading as neighbor" );
373 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
376 # Test 2.1: try to equate nodes that are prevented with a real intermediate
380 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
381 } [qr/Cannot set relationship on a meta reading/],
382 "Got expected relationship drop warning on parse";
383 my $c2 = $t2->collation;
384 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
385 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
386 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
387 "Created blocking relationship" );
388 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
389 # This time the link ought to fail
391 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
392 ok( 0, "Added cross-equivalent bad relationship" );
393 } catch ( Text::Tradition::Error $e ) {
394 like( $e->message, qr/witness loop/,
395 "Existing equivalence blocked crossing relationship" );
399 $c2->calculate_ranks();
400 ok( 1, "Successfully calculated ranks" );
401 } catch ( Text::Tradition::Error $e ) {
402 ok( 0, "Collation now has a cycle: " . $e->message );
405 # Test 3.1: make a straightforward pair of transpositions.
406 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
407 # Test 1: try to equate nodes that are prevented with an intermediate collation
408 my $c3 = $t3->collation;
410 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
411 ok( 1, "Added straightforward transposition" );
412 } catch ( Text::Tradition::Error $e ) {
413 ok( 0, "Failed to add normal transposition: " . $e->message );
416 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
417 ok( 1, "Added straightforward transposition complement" );
418 } catch ( Text::Tradition::Error $e ) {
419 ok( 0, "Failed to add normal transposition complement: " . $e->message );
422 # Test 3.2: try to make a transposition that could be a parallel.
424 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
425 ok( 0, "Added bad colocated transposition" );
426 } catch ( Text::Tradition::Error $e ) {
427 like( $e->message, qr/Readings appear to be colocated/,
428 "Prevented bad colocated transposition" );
431 # Test 3.3: make the parallel, and then make the transposition again.
433 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
434 ok( 1, "Equated identical readings for transposition" );
435 } catch ( Text::Tradition::Error $e ) {
436 ok( 0, "Failed to equate identical readings: " . $e->message );
439 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
440 ok( 1, "Added straightforward transposition complement" );
441 } catch ( Text::Tradition::Error $e ) {
442 ok( 0, "Failed to add normal transposition complement: " . $e->message );
445 # TODO Test 4: make a global relationship that involves re-ranking a node first, when
446 # the prior rank has a potential match too
447 my $t4 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/globalrel_test.xml' );
454 sub add_relationship {
455 my( $self, $source, $target, $options ) = @_;
456 my $c = $self->collation;
457 my $sourceobj = $c->reading( $source );
458 my $targetobj = $c->reading( $target );
459 throw( "Adding self relationship at $source" ) if $source eq $target;
460 throw( "Cannot set relationship on a meta reading" )
461 if( $sourceobj->is_meta || $targetobj->is_meta );
464 my $thispaironly = delete $options->{thispaironly};
465 my $droppedcolls = [];
466 $DB::single = 1 if $source eq 'r796.3' && $target eq 'r796.4';
467 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
468 $relationship = $options;
469 $reltype = $self->type( $relationship->type );
470 $thispaironly = 1; # If existing rel, set only where asked.
472 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
473 $relationship->type, $droppedcolls );
474 unless( $is_valid ) {
475 throw( "Invalid relationship: $reason" );
478 $reltype = $self->type( $options->{type} );
480 # Try to create the relationship object.
481 my $rdga = $reltype->regularize( $sourceobj );
482 my $rdgb = $reltype->regularize( $targetobj );
483 $options->{'orig_a'} = $sourceobj;
484 $options->{'orig_b'} = $targetobj;
485 $options->{'reading_a'} = $rdga;
486 $options->{'reading_b'} = $rdgb;
487 if( exists $options->{'scope'} && $options->{'scope'} ne 'local' ) {
488 # Is there a relationship with this a & b already?
489 if( $rdga eq $rdgb ) {
490 # If we have canonified to the same thing for the relationship
491 # type we want, something is wrong.
492 # NOTE we want to allow this at the local level, as a cheap means
493 # of merging readings in the UI, until we get a better means.
494 throw( "Canonifier returns identical form $rdga for this relationship type" );
497 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
498 if( $otherrel && $otherrel->type eq $options->{type}
499 && $otherrel->scope eq $options->{scope} ) {
500 # warn "Applying existing scoped relationship for $rdga / $rdgb";
501 $relationship = $otherrel;
502 } elsif( $otherrel ) {
503 throw( 'Conflicting scoped relationship '
504 . join( '/', $otherrel->type, $otherrel->scope ) . ' vs. '
505 . join( '/', $options->{type}, $options->{scope} )
506 . " for $rdga / $rdgb at $source / $target" );
509 $relationship = $self->create( $options ) unless $relationship;
510 # ... Will throw on error
512 # See if the relationship is actually valid here
513 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
514 $options->{'type'}, $droppedcolls );
515 unless( $is_valid ) {
516 throw( "Invalid relationship: $reason" );
521 # Now set the relationship(s).
523 my $rel = $self->get_relationship( $source, $target );
525 if( $rel && $rel ne $relationship ) {
526 if( $rel->nonlocal ) {
527 throw( "Found conflicting relationship at $source - $target" );
528 } elsif( !$reltype->is_weak ) {
529 # Replace a weak relationship; leave any other sort in place.
530 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
531 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
532 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
533 warn sprintf( "Not overriding local relationship %s with global %s "
534 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
535 $source, $target, $rel->reading_a, $rel->reading_b );
540 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
541 push( @pairs_set, [ $source, $target ] );
543 # Find all the pairs for which we need to set the relationship.
544 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
545 push( @pairs_set, $self->add_global_relationship( $relationship ) );
547 # Finally, restore whatever collations we can, and return.
548 $self->_restore_weak( @$droppedcolls );
552 =head2 add_global_relationship( $options, $skipvector )
554 Adds the relationship specified wherever the relevant readings appear together
555 in the graph. Options as in add_relationship above.
559 sub add_global_relationship {
560 my( $self, $relationship ) = @_;
562 my $reltype = $self->type( $relationship->type );
563 throw( "Relationship passed to add_global is not global" )
564 unless $relationship->nonlocal;
565 throw( "Relationship passed to add_global is not a valid global type" )
566 unless $reltype->is_generalizable;
568 # Apply the relationship wherever it is valid
570 foreach my $v ( $self->_find_applicable( $relationship ) ) {
571 my $exists = $self->get_relationship( @$v );
572 my $etype = $exists ? $self->type( $exists->type ) : '';
573 if( $exists && !$etype->is_weak ) {
574 unless( $exists->is_equivalent( $relationship ) ) {
575 throw( "Found conflicting relationship at @$v" );
580 @added = $self->add_relationship( @$v, $relationship );
582 my $reldesc = sprintf( "%s %s -> %s", $relationship->type,
583 $relationship->reading_a, $relationship->reading_b );
584 print STDERR "Global relationship $reldesc not applicable at @$v\n";
586 push( @pairs_set, @added ) if @added;
593 =head2 del_scoped_relationship( $reading_a, $reading_b )
595 Returns the general (document-level or global) relationship that has been defined
596 between the two reading strings. Returns undef if there is no general relationship.
600 sub del_scoped_relationship {
601 my( $self, $rdga, $rdgb ) = @_;
602 my( $first, $second ) = sort( $rdga, $rdgb );
603 return delete $self->scopedrels->{$first}->{$second};
606 sub _find_applicable {
607 my( $self, $rel ) = @_;
608 my $c = $self->collation;
609 my $reltype = $self->type( $rel->type );
611 my @identical_readings;
612 @identical_readings = grep { $reltype->regularize( $_ ) eq $rel->reading_a }
614 foreach my $ir ( @identical_readings ) {
616 @itarget = grep { $reltype->regularize( $_ ) eq $rel->reading_b }
617 $c->readings_at_rank( $ir->rank );
619 # Warn if there is more than one hit with no closer link between them.
620 my $itmain = shift @itarget;
623 my $bindlevel = $reltype->bindlevel;
624 map { $all_targets{$_} = 1 } @itarget;
625 map { delete $all_targets{$_} }
626 $self->related_readings( $itmain, sub {
627 $self->type( $_[0]->type )->bindlevel < $bindlevel } );
628 warn "More than one unrelated reading with text " . $itmain->text
629 . " at rank " . $ir->rank . "!" if keys %all_targets;
631 push( @vectors, [ $ir->id, $itmain->id ] );
637 =head2 del_relationship( $source, $target )
639 Removes the relationship between the given readings. If the relationship is
640 non-local, removes the relationship everywhere in the graph.
644 sub del_relationship {
645 my( $self, $source, $target ) = @_;
646 my $rel = $self->get_relationship( $source, $target );
647 return () unless $rel; # Nothing to delete; return an empty set.
648 my $reltype = $self->type( $rel->type );
649 my $colo = $rel->colocated;
650 my @vectors = ( [ $source, $target ] );
651 $self->_remove_relationship( $colo, $source, $target );
652 if( $rel->nonlocal ) {
653 # Remove the relationship wherever it occurs.
654 my @rel_edges = grep { $self->get_relationship( @$_ ) eq $rel }
655 $self->relationships;
656 foreach my $re ( @rel_edges ) {
657 $self->_remove_relationship( $colo, @$re );
658 push( @vectors, $re );
660 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
665 sub _remove_relationship {
666 my( $self, $equiv, @vector ) = @_;
667 $self->graph->delete_edge( @vector );
668 $self->_break_equivalence( @vector ) if $equiv;
671 =head2 relationship_valid( $source, $target, $type )
673 Checks whether a relationship of type $type may exist between the readings given
674 in $source and $target. Returns a tuple of ( status, message ) where status is
675 a yes/no boolean and, if the answer is no, message gives the reason why.
679 sub relationship_valid {
680 my( $self, $source, $target, $rel, $mustdrop ) = @_;
681 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
682 my $c = $self->collation;
683 my $reltype = $self->type( $rel );
684 ## Assume validity is okay if we are initializing from scratch.
685 return ( 1, "initializing" ) unless $c->tradition->_initialized;
686 ## TODO Move this block to relationship type definition when we can save
688 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
689 # Check that the two readings do (for a repetition) or do not (for
690 # a transposition) appear in the same witness.
691 # TODO this might be called before witness paths are set...
693 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
694 foreach my $w ( $c->reading_witnesses( $target ) ) {
695 if( $seen_wits{$w} ) {
696 return ( 0, "Readings both occur in witness $w" )
697 if $rel eq 'transposition';
698 return ( 1, "ok" ) if $rel eq 'repetition';
701 return ( 0, "Readings occur only in distinct witnesses" )
702 if $rel eq 'repetition';
704 if ( $reltype->is_colocation ) {
705 # Check that linking the source and target in a relationship won't lead
706 # to a path loop for any witness.
707 # First, drop/stash any collations that might interfere
708 my $sourceobj = $c->reading( $source );
709 my $targetobj = $c->reading( $target );
710 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
711 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
712 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
713 push( @$mustdrop, $self->_drop_weak( $source ) );
714 push( @$mustdrop, $self->_drop_weak( $target ) );
715 if( $c->end->has_rank ) {
716 foreach my $rk ( $sourcerank .. $targetrank ) {
717 map { push( @$mustdrop, $self->_drop_weak( $_->id ) ) }
718 $c->readings_at_rank( $rk );
722 unless( $self->test_equivalence( $source, $target ) ) {
723 $self->_restore_weak( @$mustdrop );
724 return( 0, "Relationship would create witness loop" );
728 # We also need to check that the readings are not in the same place.
729 # That is, proposing to equate them should cause a witness loop.
730 if( $self->test_equivalence( $source, $target ) ) {
731 return ( 0, "Readings appear to be colocated" );
739 my( $self, $reading ) = @_;
741 foreach my $n ( $self->graph->neighbors( $reading ) ) {
742 my $nrel = $self->get_relationship( $reading, $n );
743 if( $self->type( $nrel->type )->is_weak ) {
744 push( @dropped, [ $reading, $n, $nrel->type ] );
745 $self->del_relationship( $reading, $n );
746 #print STDERR "Dropped weak relationship $reading -> $n\n";
753 my( $self, @vectors ) = @_;
754 foreach my $v ( @vectors ) {
757 $self->add_relationship( @$v, { 'type' => $type } );
758 #print STDERR "Restored weak relationship @$v\n";
759 }; # if it fails we don't care
763 =head2 filter_collations()
765 Utility function. Removes any redundant weak relationships from the graph.
766 A weak relationship is redundant if the readings in question would occupy
767 the same rank regardless of the existence of the relationship.
772 sub filter_collations {
774 my $c = $self->collation;
775 foreach my $r ( 1 .. $c->end->rank - 1 ) {
778 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
779 next if $rdg->is_meta;
781 foreach my $pred ( $rdg->predecessors ) {
782 if( $pred->rank == $r - 1 ) {
784 $anchor = $rdg unless( $anchor );
788 push( @need_weak, $rdg ) unless $ip;
789 $self->_drop_weak( $rdg->id );
792 # TODO FIX HACK of adding explicit collation type
793 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
794 unless $c->get_relationship( $anchor, $_ ) } @need_weak
795 : print STDERR "No anchor found at $r\n";
799 =head2 related_readings( $reading, $filter )
801 Returns a list of readings that are connected via relationship links to $reading.
802 If $filter is set to a subroutine ref, returns only those related readings where
803 $filter( $relationship ) returns a true value.
807 sub related_readings {
808 my( $self, $reading, $filter ) = @_;
810 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
811 $reading = $reading->id;
817 if( $filter eq 'colocated' ) {
818 $filter = sub { $_[0]->colocated };
819 } elsif( !ref( $filter ) ) {
821 $filter = sub { $_[0]->type eq $type };
823 my %found = ( $reading => 1 );
824 my $check = [ $reading ];
828 foreach my $r ( @$check ) {
829 foreach my $nr ( $self->graph->neighbors( $r ) ) {
830 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
831 push( @$more, $nr ) unless exists $found{$nr};
838 delete $found{$reading};
839 @answer = keys %found;
841 @answer = $self->graph->all_reachable( $reading );
843 if( $return_object ) {
844 my $c = $self->collation;
845 return map { $c->reading( $_ ) } @answer;
851 =head2 merge_readings( $kept, $deleted );
853 Makes a best-effort merge of the relationship links between the given readings, and
854 stops tracking the to-be-deleted reading.
859 my( $self, $kept, $deleted, $combined ) = @_;
860 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
861 # Get the pair of kept / rel
862 my @vector = ( $kept );
863 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
864 next if $vector[0] eq $vector[1]; # Don't add a self loop
866 # If kept changes its text, drop the relationship.
869 # If kept / rel already has a relationship, just keep the old
870 my $rel = $self->get_relationship( @vector );
873 # Otherwise, adopt the relationship that would be deleted.
874 $rel = $self->get_relationship( @$edge );
875 $self->_set_relationship( $rel, @vector );
877 $self->_make_equivalence( $deleted, $kept );
880 ### Equivalence logic
882 sub _remove_equivalence_node {
883 my( $self, $node ) = @_;
884 my $group = $self->equivalence( $node );
885 my $nodelist = $self->eqreadings( $group );
886 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
887 $self->equivalence_graph->delete_vertex( $group );
888 $self->remove_eqreadings( $group );
889 $self->remove_equivalence( $group );
890 } elsif( @$nodelist == 1 ) {
891 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
892 " in group that should have only $node" );
894 my @newlist = grep { $_ ne $node } @$nodelist;
895 $self->set_eqreadings( $group, \@newlist );
896 $self->remove_equivalence( $node );
900 =head2 add_equivalence_edge
902 Add an edge in the equivalence graph corresponding to $source -> $target in the
903 collation. Should only be called by Collation.
907 sub add_equivalence_edge {
908 my( $self, $source, $target ) = @_;
909 my $seq = $self->equivalence( $source );
910 my $teq = $self->equivalence( $target );
911 $self->equivalence_graph->add_edge( $seq, $teq );
914 =head2 delete_equivalence_edge
916 Remove an edge in the equivalence graph corresponding to $source -> $target in the
917 collation. Should only be called by Collation.
921 sub delete_equivalence_edge {
922 my( $self, $source, $target ) = @_;
923 my $seq = $self->equivalence( $source );
924 my $teq = $self->equivalence( $target );
925 $self->equivalence_graph->delete_edge( $seq, $teq );
928 sub _is_disconnected {
930 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
931 || scalar $self->equivalence_graph->successorless_vertices > 1 );
934 # Equate two readings in the equivalence graph
935 sub _make_equivalence {
936 my( $self, $source, $target ) = @_;
937 # Get the source equivalent readings
938 my $seq = $self->equivalence( $source );
939 my $teq = $self->equivalence( $target );
940 # Nothing to do if they are already equivalent...
941 return if $seq eq $teq;
942 my $sourcepool = $self->eqreadings( $seq );
943 # and add them to the target readings.
944 push( @{$self->eqreadings( $teq )}, @$sourcepool );
945 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
946 # Then merge the nodes in the equivalence graph.
947 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
948 $self->equivalence_graph->add_edge( $pred, $teq );
950 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
951 $self->equivalence_graph->add_edge( $teq, $succ );
953 $self->equivalence_graph->delete_vertex( $seq );
954 # TODO enable this after collation parsing is done
955 throw( "Graph got disconnected making $source / $target equivalence" )
956 if $self->_is_disconnected && $self->collation->tradition->_initialized;
959 =head2 test_equivalence
961 Test whether, if two readings were equated with a 'colocated' relationship,
962 the graph would still be valid.
966 sub test_equivalence {
967 my( $self, $source, $target ) = @_;
968 # Try merging the nodes in the equivalence graph; return a true value if
969 # no cycle is introduced thereby. Restore the original graph first.
971 # Keep track of edges we add
974 # Get the reading equivalents
975 my $seq = $self->equivalence( $source );
976 my $teq = $self->equivalence( $target );
977 # Maybe this is easy?
978 return 1 if $seq eq $teq;
980 # Save the first graph
981 my $checkstr = $self->equivalence_graph->stringify();
982 # Add and save relevant edges
983 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
984 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
985 $added_pred{$pred} = 0;
987 $self->equivalence_graph->add_edge( $pred, $teq );
988 $added_pred{$pred} = 1;
991 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
992 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
993 $added_succ{$succ} = 0;
995 $self->equivalence_graph->add_edge( $teq, $succ );
996 $added_succ{$succ} = 1;
999 # Delete source equivalent and test
1000 $self->equivalence_graph->delete_vertex( $seq );
1001 my $ret = !$self->equivalence_graph->has_a_cycle;
1003 # Restore what we changed
1004 $self->equivalence_graph->add_vertex( $seq );
1005 foreach my $pred ( keys %added_pred ) {
1006 $self->equivalence_graph->add_edge( $pred, $seq );
1007 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
1009 foreach my $succ ( keys %added_succ ) {
1010 $self->equivalence_graph->add_edge( $seq, $succ );
1011 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
1013 unless( $self->equivalence_graph->eq( $checkstr ) ) {
1014 throw( "GRAPH CHANGED after testing" );
1020 # Unmake an equivalence link between two readings. Should only be called internally.
1021 sub _break_equivalence {
1022 my( $self, $source, $target ) = @_;
1024 # This is the hard one. Need to reconstruct the equivalence groups without
1027 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
1028 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
1029 # If these groups intersect, they are still connected; do nothing.
1030 foreach my $el ( keys %tng ) {
1031 return if( exists $sng{$el} );
1033 # If they don't intersect, then we split the nodes in the graph and in
1034 # the hashes. First figure out which group has which name
1035 my $oldgroup = $self->equivalence( $source ); # same as $target
1036 my $keepsource = $sng{$oldgroup};
1037 my $newgroup = $keepsource ? $target : $source;
1038 my( $oldmembers, $newmembers );
1040 $oldmembers = [ keys %sng ];
1041 $newmembers = [ keys %tng ];
1043 $oldmembers = [ keys %tng ];
1044 $newmembers = [ keys %sng ];
1047 # First alter the old group in the hash
1048 $self->set_eqreadings( $oldgroup, $oldmembers );
1049 foreach my $el ( @$oldmembers ) {
1050 $self->set_equivalence( $el, $oldgroup );
1053 # then add the new group back to the hash with its new key
1054 $self->set_eqreadings( $newgroup, $newmembers );
1055 foreach my $el ( @$newmembers ) {
1056 $self->set_equivalence( $el, $newgroup );
1059 # Now add the new group back to the equivalence graph
1060 $self->equivalence_graph->add_vertex( $newgroup );
1061 # ...add the appropriate edges to the source group vertext
1062 my $c = $self->collation;
1063 foreach my $rdg ( @$newmembers ) {
1064 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1065 next unless $self->equivalence( $rp );
1066 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
1068 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1069 next unless $self->equivalence( $rs );
1070 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
1074 # ...and figure out which edges on the old group vertex to delete.
1075 my( %old_pred, %old_succ );
1076 foreach my $rdg ( @$oldmembers ) {
1077 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1078 next unless $self->equivalence( $rp );
1079 $old_pred{$self->equivalence( $rp )} = 1;
1081 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1082 next unless $self->equivalence( $rs );
1083 $old_succ{$self->equivalence( $rs )} = 1;
1086 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
1087 unless( $old_pred{$p} ) {
1088 $self->equivalence_graph->delete_edge( $p, $oldgroup );
1091 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
1092 unless( $old_succ{$s} ) {
1093 $self->equivalence_graph->delete_edge( $oldgroup, $s );
1096 # TODO enable this after collation parsing is done
1097 throw( "Graph got disconnected breaking $source / $target equivalence" )
1098 if $self->_is_disconnected && $self->collation->tradition->_initialized;
1101 sub _find_equiv_without {
1102 my( $self, $first, $second ) = @_;
1103 my %found = ( $first => 1 );
1104 my $check = [ $first ];
1108 foreach my $r ( @$check ) {
1109 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1110 next if $r eq $second;
1111 if( $self->get_relationship( $r, $nr )->colocated ) {
1112 push( @$more, $nr ) unless exists $found{$nr};
1122 =head2 rebuild_equivalence
1124 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1125 adds all readings and edges, then makes an equivalence for all relationships.
1129 sub rebuild_equivalence {
1131 my $newgraph = Graph->new();
1132 # Set this as the new equivalence graph
1133 $self->_reset_equivalence( $newgraph );
1134 # Clear out the data hashes
1135 $self->_clear_equivalence;
1136 $self->_clear_eqreadings;
1138 $self->collation->tradition->_init_done(0);
1140 foreach my $r ( $self->collation->readings ) {
1142 $newgraph->add_vertex( $rid );
1143 $self->set_equivalence( $rid, $rid );
1144 $self->set_eqreadings( $rid, [ $rid ] );
1148 foreach my $e ( $self->collation->paths ) {
1149 $self->add_equivalence_edge( @$e );
1152 # Now equate the colocated readings. This does no testing;
1153 # it assumes that all preexisting relationships are valid.
1154 foreach my $rel ( $self->relationships ) {
1155 my $relobj = $self->get_relationship( $rel );
1156 next unless $relobj && $relobj->colocated;
1157 $self->_make_equivalence( @$rel );
1159 $self->collation->tradition->_init_done(1);
1162 =head2 equivalence_ranks
1164 Rank all vertices in the equivalence graph, and return a hash reference with
1165 vertex => rank mapping.
1169 sub equivalence_ranks {
1171 my $eqstart = $self->equivalence( $self->collation->start );
1172 my $eqranks = { $eqstart => 0 };
1173 my $rankeqs = { 0 => [ $eqstart ] };
1174 my @curr_origin = ( $eqstart );
1175 # A little iterative function.
1176 while( @curr_origin ) {
1177 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1179 return( $eqranks, $rankeqs );
1183 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1184 my $graph = $self->equivalence_graph;
1185 # Look at each of the children of @current_nodes. If all the child's
1186 # parents have a rank, assign it the highest rank + 1 and add it to
1187 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1188 # parent gets a rank.
1190 foreach my $c ( @current_nodes ) {
1191 warn "Current reading $c has no rank!"
1192 unless exists $node_ranks->{$c};
1193 foreach my $child ( $graph->successors( $c ) ) {
1194 next if exists $node_ranks->{$child};
1195 my $highest_rank = -1;
1197 foreach my $parent ( $graph->predecessors( $child ) ) {
1198 if( exists $node_ranks->{$parent} ) {
1199 $highest_rank = $node_ranks->{$parent}
1200 if $highest_rank <= $node_ranks->{$parent};
1207 my $c_rank = $highest_rank + 1;
1208 # print STDERR "Assigning rank $c_rank to node $child \n";
1209 $node_ranks->{$child} = $c_rank if $node_ranks;
1210 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1211 push( @next_nodes, $child );
1220 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1222 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1223 $rgraph->setAttribute( 'edgedefault', 'directed' );
1224 $rgraph->setAttribute( 'id', 'relationships', );
1225 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1226 $rgraph->setAttribute( 'parse.edges', 0 );
1227 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1228 $rgraph->setAttribute( 'parse.nodes', 0 );
1229 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1231 # Add the vertices according to their XML IDs
1232 my %rdg_lookup = ( reverse %$node_hash );
1233 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1234 my @nlist = sort keys( %rdg_lookup );
1235 foreach my $n ( @nlist ) {
1236 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1237 $n_el->setAttribute( 'id', $n );
1238 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1240 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1242 # Add the relationship edges, with their object information
1244 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1245 # Add an edge and fill in its relationship info.
1246 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1247 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1248 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1249 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1250 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1252 my $rel_obj = $self->get_relationship( @$e );
1253 foreach my $key ( keys %$edge_keys ) {
1254 my $value = $rel_obj->$key;
1255 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1259 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1267 return $tmp_a <=> $tmp_b;
1270 sub _add_graphml_data {
1271 my( $el, $key, $value ) = @_;
1272 return unless defined $value;
1273 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1274 $data_el->setAttribute( 'key', $key );
1275 $data_el->appendText( $value );
1279 my( $self, $from, $to ) = @_;
1280 open( DUMP, ">debug.svg" ) or die "Could not open debug.svg";
1281 binmode DUMP, ':utf8';
1282 print DUMP $self->collation->as_svg({ from => $from, to => $to, nocalc => 1 });
1287 Text::Tradition::Error->throw(
1288 'ident' => 'Relationship error',
1294 __PACKAGE__->meta->make_immutable;