1 package Text::Tradition::Collation::RelationshipStore;
6 use Text::Tradition::Error;
7 use Text::Tradition::Collation::Relationship;
8 use Text::Tradition::Collation::RelationshipType;
15 Text::Tradition::Collation::RelationshipStore - Keeps track of the relationships
16 between readings in a given collation
20 Text::Tradition is a library for representation and analysis of collated
21 texts, particularly medieval ones. The RelationshipStore is an internal object
22 of the collation, to keep track of the defined relationships (both specific and
23 general) between readings.
30 use_ok( 'Text::Tradition::Collation::RelationshipStore' );
32 # Add some relationships, and delete them
34 my $cxfile = 't/data/Collatex-16.xml';
35 my $t = Text::Tradition->new(
37 'input' => 'CollateX',
40 my $c = $t->collation;
42 my @v1 = $c->add_relationship( 'n21', 'n22', { 'type' => 'lexical' } );
43 is( scalar @v1, 1, "Added a single relationship" );
44 is( $v1[0]->[0], 'n21', "Got correct node 1" );
45 is( $v1[0]->[1], 'n22', "Got correct node 2" );
46 my @v2 = $c->add_relationship( 'n24', 'n23',
47 { 'type' => 'spelling', 'scope' => 'global' } );
48 is( scalar @v2, 2, "Added a global relationship with two instances" );
49 @v1 = $c->del_relationship( 'n22', 'n21' );
50 is( scalar @v1, 1, "Deleted first relationship" );
51 @v2 = $c->del_relationship( 'n12', 'n13' );
52 is( scalar @v2, 2, "Deleted second global relationship" );
53 my @v3 = $c->del_relationship( 'n1', 'n2' );
54 is( scalar @v3, 0, "Nothing deleted on non-existent relationship" );
60 =head2 new( collation => $collation );
62 Creates a new relationship store for the given collation.
68 isa => 'Text::Tradition::Collation',
75 Registry of possible relationship types. See RelationshipType for more info.
79 has 'relationship_types' => (
92 isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
93 default => sub { {} },
99 default => sub { Graph->new( undirected => 1 ) },
101 relationships => 'edges',
102 add_reading => 'add_vertex',
103 delete_reading => 'delete_vertex',
107 =head2 equivalence_graph()
109 Returns an equivalence graph of the collation, in which all readings
110 related via a 'colocated' relationship are transformed into a single
111 vertex. Can be used to determine the validity of a new relationship.
115 has 'equivalence_graph' => (
118 default => sub { Graph->new() },
119 writer => '_reset_equivalence',
122 has '_node_equivalences' => (
126 equivalence => 'get',
127 set_equivalence => 'set',
128 remove_equivalence => 'delete',
129 _clear_equivalence => 'clear',
133 has '_equivalence_readings' => (
138 set_eqreadings => 'set',
139 remove_eqreadings => 'delete',
140 _clear_eqreadings => 'clear',
144 ## Build function - here we have our default set of relationship types.
149 my $regularize = sub {
150 return $_[0]->can('regularize') ? $_[0]->regularize : $_[0]->text; };
152 my @DEFAULT_TYPES = (
153 { name => 'collated', bindlevel => 50, is_weak => 1, is_transitive => 0, is_generalizable => 0 },
154 { name => 'orthographic', bindlevel => 0 },
155 { name => 'spelling', bindlevel => 1, record_sub => $regularize },
156 { name => 'punctuation', bindlevel => 2, record_sub => $regularize },
157 { name => 'grammatical', bindlevel => 2, record_sub => $regularize },
158 { name => 'lexical', bindlevel => 2, record_sub => $regularize },
159 { name => 'transposition', bindlevel => 50, is_colocation => 0, is_transitive => 0 },
160 { name => 'repetition', bindlevel => 50, is_colocation => 0, is_transitive => 0 }
163 foreach my $type ( @DEFAULT_TYPES ) {
164 $self->add_type( $type );
171 around add_type => sub {
175 if( @_ == 1 && $_[0]->$_isa( 'Text::Tradition::Collation::RelationshipType' ) ) {
178 my %args = @_ == 1 ? %{$_[0]} : @_;
179 $new_type = Text::Tradition::Collation::RelationshipType->new( %args );
181 $self->$orig( $new_type->name => $new_type );
185 around add_reading => sub {
189 $self->equivalence_graph->add_vertex( @_ );
190 $self->set_equivalence( $_[0], $_[0] );
191 $self->set_eqreadings( $_[0], [ $_[0] ] );
195 around delete_reading => sub {
199 $self->_remove_equivalence_node( @_ );
203 =head2 get_relationship
205 Return the relationship object, if any, that exists between two readings.
209 sub get_relationship {
212 if( @_ == 1 && ref( $_[0] ) eq 'ARRAY' ) {
213 # Dereference the edge arrayref that was passed.
220 if( $self->graph->has_edge_attribute( @vector, 'object' ) ) {
221 $relationship = $self->graph->get_edge_attribute( @vector, 'object' );
223 return $relationship;
226 sub _set_relationship {
227 my( $self, $relationship, @vector ) = @_;
228 $self->graph->add_edge( @vector );
229 $self->graph->set_edge_attribute( @vector, 'object', $relationship );
230 $self->_make_equivalence( @vector ) if $relationship->colocated;
235 Create a new relationship with the given options and return it.
236 Warn and return undef if the relationship cannot be created.
241 my( $self, $options ) = @_;
242 # Check to see if a relationship exists between the two given readings
243 my $source = delete $options->{'orig_a'};
244 my $target = delete $options->{'orig_b'};
245 my $rel = $self->get_relationship( $source, $target );
247 if( $self->type( $rel->type )->is_weak ) {
248 # Always replace a weak relationship with a more descriptive
250 $self->del_relationship( $source, $target );
251 } elsif( $rel->type ne $options->{'type'} ) {
252 throw( "Another relationship of type " . $rel->type
253 . " already exists between $source and $target" );
259 $rel = Text::Tradition::Collation::Relationship->new( $options );
260 my $reltype = $self->type( $rel->type );
261 # Validate the options given against the relationship type wanted
262 throw( "Cannot set nonlocal scope on relationship of type " . $reltype->name )
263 if $rel->nonlocal && !$reltype->is_generalizable;
265 $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
269 =head2 add_scoped_relationship( $rel )
271 Keep track of relationships defined between specific readings that are scoped
272 non-locally. Key on whichever reading occurs first alphabetically.
276 sub add_scoped_relationship {
277 my( $self, $rel ) = @_;
278 my $rdga = $rel->reading_a;
279 my $rdgb = $rel->reading_b;
280 my $r = $self->scoped_relationship( $rdga, $rdgb );
282 warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
283 $r->type, $rdga, $rdgb );
286 my( $first, $second ) = sort ( $rdga, $rdgb );
287 $self->scopedrels->{$first}->{$second} = $rel;
290 =head2 scoped_relationship( $reading_a, $reading_b )
292 Returns the general (document-level or global) relationship that has been defined
293 between the two reading strings. Returns undef if there is no general relationship.
297 sub scoped_relationship {
298 my( $self, $rdga, $rdgb ) = @_;
299 my( $first, $second ) = sort( $rdga, $rdgb );
300 if( exists $self->scopedrels->{$first}->{$second} ) {
301 return $self->scopedrels->{$first}->{$second};
306 =head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
308 Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
309 for the possible options) between the readings given in $source and $target. Sets
310 up a scoped relationship between $sourcetext and $targettext if the relationship is
313 Returns a status boolean and a list of all reading pairs connected by the call to
324 $t1 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
325 } [qr/Cannot set relationship on a meta reading/],
326 "Got expected relationship drop warning on parse";
328 # Test 1.1: try to equate nodes that are prevented with an intermediate collation
329 ok( $t1, "Parsed test fragment file" );
330 my $c1 = $t1->collation;
331 my $trel = $c1->get_relationship( 'r9.2', 'r9.3' );
332 is( ref( $trel ), 'Text::Tradition::Collation::Relationship',
333 "Troublesome relationship exists" );
334 is( $trel->type, 'collated', "Troublesome relationship is a collation" );
336 # Try to make the link we want
338 $c1->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
339 ok( 1, "Added cross-collation relationship as expected" );
340 } catch( Text::Tradition::Error $e ) {
341 ok( 0, "Existing collation blocked equivalence relationship: " . $e->message );
345 $c1->calculate_ranks();
346 ok( 1, "Successfully calculated ranks" );
347 } catch ( Text::Tradition::Error $e ) {
348 ok( 0, "Collation now has a cycle: " . $e->message );
351 # Test 1.2: attempt merge of an identical reading
353 $c1->merge_readings( 'r9.3', 'r11.5' );
354 ok( 1, "Successfully merged reading 'pontifex'" );
355 } catch ( Text::Tradition::Error $e ) {
356 ok( 0, "Merge of mergeable readings failed: $e->message" );
360 # Test 1.3: attempt relationship with a meta reading (should fail)
362 $c1->add_relationship( 'r8.1', 'r9.2', { 'type' => 'collated' } );
363 ok( 0, "Allowed a meta-reading to be used in a relationship" );
364 } catch ( Text::Tradition::Error $e ) {
365 is( $e->message, 'Cannot set relationship on a meta reading',
366 "Relationship link prevented for a meta reading" );
369 # Test 1.4: try to break a relationship near a meta reading
370 $c1->add_relationship( 'r7.6', 'r7.3', { type => 'orthographic' } );
372 $c1->del_relationship( 'r7.6', 'r7.7' );
373 $c1->del_relationship( 'r7.6', 'r7.3' );
374 ok( 1, "Relationship broken with a meta reading as neighbor" );
376 ok( 0, "Relationship deletion failed with a meta reading as neighbor" );
379 # Test 2.1: try to equate nodes that are prevented with a real intermediate
383 $t2 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/legendfrag.xml' );
384 } [qr/Cannot set relationship on a meta reading/],
385 "Got expected relationship drop warning on parse";
386 my $c2 = $t2->collation;
387 $c2->add_relationship( 'r9.2', 'r9.3', { 'type' => 'lexical' } );
388 my $trel2 = $c2->get_relationship( 'r9.2', 'r9.3' );
389 is( ref( $trel2 ), 'Text::Tradition::Collation::Relationship',
390 "Created blocking relationship" );
391 is( $trel2->type, 'lexical', "Blocking relationship is not a collation" );
392 # This time the link ought to fail
394 $c2->add_relationship( 'r8.6', 'r10.3', { 'type' => 'orthographic' } );
395 ok( 0, "Added cross-equivalent bad relationship" );
396 } catch ( Text::Tradition::Error $e ) {
397 like( $e->message, qr/witness loop/,
398 "Existing equivalence blocked crossing relationship" );
402 $c2->calculate_ranks();
403 ok( 1, "Successfully calculated ranks" );
404 } catch ( Text::Tradition::Error $e ) {
405 ok( 0, "Collation now has a cycle: " . $e->message );
408 # Test 3.1: make a straightforward pair of transpositions.
409 my $t3 = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/lf2.xml' );
410 # Test 1: try to equate nodes that are prevented with an intermediate collation
411 my $c3 = $t3->collation;
413 $c3->add_relationship( 'r36.4', 'r38.3', { 'type' => 'transposition' } );
414 ok( 1, "Added straightforward transposition" );
415 } catch ( Text::Tradition::Error $e ) {
416 ok( 0, "Failed to add normal transposition: " . $e->message );
419 $c3->add_relationship( 'r36.3', 'r38.2', { 'type' => 'transposition' } );
420 ok( 1, "Added straightforward transposition complement" );
421 } catch ( Text::Tradition::Error $e ) {
422 ok( 0, "Failed to add normal transposition complement: " . $e->message );
425 # Test 3.2: try to make a transposition that could be a parallel.
427 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
428 ok( 0, "Added bad colocated transposition" );
429 } catch ( Text::Tradition::Error $e ) {
430 like( $e->message, qr/Readings appear to be colocated/,
431 "Prevented bad colocated transposition" );
434 # Test 3.3: make the parallel, and then make the transposition again.
436 $c3->add_relationship( 'r28.3', 'r29.3', { 'type' => 'orthographic' } );
437 ok( 1, "Equated identical readings for transposition" );
438 } catch ( Text::Tradition::Error $e ) {
439 ok( 0, "Failed to equate identical readings: " . $e->message );
442 $c3->add_relationship( 'r28.2', 'r29.2', { 'type' => 'transposition' } );
443 ok( 1, "Added straightforward transposition complement" );
444 } catch ( Text::Tradition::Error $e ) {
445 ok( 0, "Failed to add normal transposition complement: " . $e->message );
452 sub add_relationship {
453 my( $self, $source, $target, $options ) = @_;
454 my $c = $self->collation;
455 my $sourceobj = $c->reading( $source );
456 my $targetobj = $c->reading( $target );
457 throw( "Adding self relationship at $source" ) if $source eq $target;
458 throw( "Cannot set relationship on a meta reading" )
459 if( $sourceobj->is_meta || $targetobj->is_meta );
463 my $droppedcolls = [];
464 if( ref( $options ) eq 'Text::Tradition::Collation::Relationship' ) {
465 $relationship = $options;
466 $reltype = $self->type( $relationship->type );
467 $thispaironly = 1; # If existing rel, set only where asked.
469 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
470 $relationship->type, $droppedcolls );
471 unless( $is_valid ) {
472 throw( "Invalid relationship: $reason" );
475 $reltype = $self->type( $options->{type} );
477 # Try to create the relationship object.
478 my $rdga = $reltype->record_sub->( $sourceobj );
479 my $rdgb = $reltype->record_sub->( $targetobj );
480 $options->{'orig_a'} = $sourceobj;
481 $options->{'orig_b'} = $targetobj;
482 $options->{'reading_a'} = $rdga;
483 $options->{'reading_b'} = $rdgb;
484 if( exists $options->{'scope'} && $options->{'scope'} ne 'local' ) {
485 # Is there a relationship with this a & b already?
486 if( $rdga eq $rdgb ) {
487 # If we have canonified to the same thing for the relationship
488 # type we want, something is wrong.
489 # NOTE we want to allow this at the local level, as a cheap means
490 # of merging readings in the UI, until we get a better means.
491 throw( "Canonifier returns identical form $rdga for this relationship type" );
494 my $otherrel = $self->scoped_relationship( $rdga, $rdgb );
495 if( $otherrel && $otherrel->type eq $options->{type}
496 && $otherrel->scope eq $options->{scope} ) {
497 # warn "Applying existing scoped relationship for $rdga / $rdgb";
498 $relationship = $otherrel;
499 } elsif( $otherrel ) {
500 throw( 'Conflicting scoped relationship '
501 . join( '/', $otherrel->type, $otherrel->scope ) . ' vs. '
502 . join( '/', $options->{type}, $options->{scope} )
503 . " for $rdga / $rdgb at $source / $target" );
506 $relationship = $self->create( $options ) unless $relationship;
507 # ... Will throw on error
509 # See if the relationship is actually valid here
510 my( $is_valid, $reason ) = $self->relationship_valid( $source, $target,
511 $options->{'type'}, $droppedcolls );
512 unless( $is_valid ) {
513 throw( "Invalid relationship: $reason" );
518 # Now set the relationship(s).
520 my $rel = $self->get_relationship( $source, $target );
522 if( $rel && $rel ne $relationship ) {
523 if( $rel->nonlocal ) {
524 throw( "Found conflicting relationship at $source - $target" );
525 } elsif( !$reltype->is_weak ) {
526 # Replace a weak relationship; leave any other sort in place.
527 my $r1ann = $rel->has_annotation ? $rel->annotation : '';
528 my $r2ann = $relationship->has_annotation ? $relationship->annotation : '';
529 unless( $rel->type eq $relationship->type && $r1ann eq $r2ann ) {
530 warn sprintf( "Not overriding local relationship %s with global %s "
531 . "set at %s -> %s (%s -> %s)", $rel->type, $relationship->type,
532 $source, $target, $rel->reading_a, $rel->reading_b );
537 $self->_set_relationship( $relationship, $source, $target ) unless $skip;
538 push( @pairs_set, [ $source, $target ] );
540 # Find all the pairs for which we need to set the relationship.
541 if( $relationship->colocated && $relationship->nonlocal && !$thispaironly ) {
542 push( @pairs_set, $self->add_global_relationship( $relationship ) );
544 # Finally, restore whatever collations we can, and return.
545 $self->_restore_weak( @$droppedcolls );
549 =head2 add_global_relationship( $options, $skipvector )
551 Adds the relationship specified wherever the relevant readings appear together
552 in the graph. Options as in add_relationship above.
556 sub add_global_relationship {
557 my( $self, $relationship ) = @_;
559 my $reltype = $self->type( $relationship->type );
560 throw( "Relationship passed to add_global is not global" )
561 unless $relationship->nonlocal;
562 throw( "Relationship passed to add_global is not a valid global type" )
563 unless $reltype->is_generalizable;
565 # Apply the relationship wherever it is valid
567 foreach my $v ( $self->_find_applicable( $relationship ) ) {
568 my $exists = $self->get_relationship( @$v );
569 my $etype = $exists ? $self->type( $exists->type ) : '';
570 if( $exists && !$etype->is_weak ) {
571 unless( $exists->is_equivalent( $relationship ) ) {
572 throw( "Found conflicting relationship at @$v" );
577 @added = $self->add_relationship( @$v, $relationship );
579 my $reldesc = sprintf( "%s %s -> %s", $relationship->type,
580 $relationship->reading_a, $relationship->reading_b );
581 print STDERR "Global relationship $reldesc not applicable at @$v\n";
583 push( @pairs_set, @added ) if @added;
590 =head2 del_scoped_relationship( $reading_a, $reading_b )
592 Returns the general (document-level or global) relationship that has been defined
593 between the two reading strings. Returns undef if there is no general relationship.
597 sub del_scoped_relationship {
598 my( $self, $rdga, $rdgb ) = @_;
599 my( $first, $second ) = sort( $rdga, $rdgb );
600 return delete $self->scopedrels->{$first}->{$second};
603 sub _find_applicable {
604 my( $self, $rel ) = @_;
605 my $c = $self->collation;
606 my $reltype = $self->type( $rel->type );
608 my @identical_readings;
609 @identical_readings = grep { $reltype->record_sub->( $_ ) eq $rel->reading_a }
611 foreach my $ir ( @identical_readings ) {
613 @itarget = grep { $reltype->record_sub->( $_ ) eq $rel->reading_b }
614 $c->readings_at_rank( $ir->rank );
616 # Warn if there is more than one hit with no closer link between them.
617 my $itmain = shift @itarget;
620 my $bindlevel = $reltype->bindlevel;
621 map { $all_targets{$_} = 1 } @itarget;
622 map { delete $all_targets{$_} }
623 $self->related_readings( $itmain, sub {
624 $self->type( $_[0]->type )->bindlevel < $bindlevel } );
625 warn "More than one unrelated reading with text " . $itmain->text
626 . " at rank " . $ir->rank . "!" if keys %all_targets;
628 push( @vectors, [ $ir->id, $itmain->id ] );
634 =head2 del_relationship( $source, $target )
636 Removes the relationship between the given readings. If the relationship is
637 non-local, removes the relationship everywhere in the graph.
641 sub del_relationship {
642 my( $self, $source, $target ) = @_;
643 my $rel = $self->get_relationship( $source, $target );
644 return () unless $rel; # Nothing to delete; return an empty set.
645 my $reltype = $self->type( $rel->type );
646 my $colo = $rel->colocated;
647 my @vectors = ( [ $source, $target ] );
648 $self->_remove_relationship( $colo, $source, $target );
649 if( $rel->nonlocal ) {
650 # Remove the relationship wherever it occurs.
651 my @rel_edges = grep { $self->get_relationship( @$_ ) eq $rel }
652 $self->relationships;
653 foreach my $re ( @rel_edges ) {
654 $self->_remove_relationship( $colo, @$re );
655 push( @vectors, $re );
657 $self->del_scoped_relationship( $rel->reading_a, $rel->reading_b );
662 sub _remove_relationship {
663 my( $self, $equiv, @vector ) = @_;
664 $self->graph->delete_edge( @vector );
665 $self->_break_equivalence( @vector ) if $equiv;
668 =head2 relationship_valid( $source, $target, $type )
670 Checks whether a relationship of type $type may exist between the readings given
671 in $source and $target. Returns a tuple of ( status, message ) where status is
672 a yes/no boolean and, if the answer is no, message gives the reason why.
676 sub relationship_valid {
677 my( $self, $source, $target, $rel, $mustdrop ) = @_;
678 $mustdrop = [] unless $mustdrop; # in case we were passed nothing
679 my $c = $self->collation;
680 my $reltype = $self->type( $rel );
681 ## Assume validity is okay if we are initializing from scratch.
682 return ( 1, "initializing" ) unless $c->tradition->_initialized;
683 ## TODO Move this block to relationship type definition
684 if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
685 # Check that the two readings do (for a repetition) or do not (for
686 # a transposition) appear in the same witness.
687 # TODO this might be called before witness paths are set...
689 map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
690 foreach my $w ( $c->reading_witnesses( $target ) ) {
691 if( $seen_wits{$w} ) {
692 return ( 0, "Readings both occur in witness $w" )
693 if $rel eq 'transposition';
694 return ( 1, "ok" ) if $rel eq 'repetition';
697 return ( 0, "Readings occur only in distinct witnesses" )
698 if $rel eq 'repetition';
700 if ( $reltype->is_colocation ) {
701 # Check that linking the source and target in a relationship won't lead
702 # to a path loop for any witness.
703 # First, drop/stash any collations that might interfere
704 my $sourceobj = $c->reading( $source );
705 my $targetobj = $c->reading( $target );
706 my $sourcerank = $sourceobj->has_rank ? $sourceobj->rank : -1;
707 my $targetrank = $targetobj->has_rank ? $targetobj->rank : -1;
708 unless( $rel eq 'collated' || $sourcerank == $targetrank ) {
709 push( @$mustdrop, $self->_drop_weak( $source ) );
710 push( @$mustdrop, $self->_drop_weak( $target ) );
711 if( $c->end->has_rank ) {
712 foreach my $rk ( $sourcerank .. $targetrank ) {
713 map { push( @$mustdrop, $self->_drop_weak( $_->id ) ) }
714 $c->readings_at_rank( $rk );
718 unless( $self->test_equivalence( $source, $target ) ) {
719 $self->_restore_weak( @$mustdrop );
720 return( 0, "Relationship would create witness loop" );
724 # We also need to check that the readings are not in the same place.
725 # That is, proposing to equate them should cause a witness loop.
726 if( $self->test_equivalence( $source, $target ) ) {
727 return ( 0, "Readings appear to be colocated" );
735 my( $self, $reading ) = @_;
737 foreach my $n ( $self->graph->neighbors( $reading ) ) {
738 my $nrel = $self->get_relationship( $reading, $n );
739 if( $self->type( $nrel->type )->is_weak ) {
740 push( @dropped, [ $reading, $n, $nrel->type ] );
741 $self->del_relationship( $reading, $n );
742 #print STDERR "Dropped weak relationship $reading -> $n\n";
749 my( $self, @vectors ) = @_;
750 foreach my $v ( @vectors ) {
753 $self->add_relationship( @$v, { 'type' => $type } );
754 #print STDERR "Restored weak relationship @$v\n";
755 }; # if it fails we don't care
759 =head2 filter_collations()
761 Utility function. Removes any redundant weak relationships from the graph.
762 A weak relationship is redundant if the readings in question would occupy
763 the same rank regardless of the existence of the relationship.
768 sub filter_collations {
770 my $c = $self->collation;
771 foreach my $r ( 1 .. $c->end->rank - 1 ) {
774 foreach my $rdg ( $c->readings_at_rank( $r ) ) {
775 next if $rdg->is_meta;
777 foreach my $pred ( $rdg->predecessors ) {
778 if( $pred->rank == $r - 1 ) {
780 $anchor = $rdg unless( $anchor );
784 push( @need_weak, $rdg ) unless $ip;
785 $self->_drop_weak( $rdg->id );
788 # TODO FIX HACK of adding explicit collation type
789 ? map { $c->add_relationship( $anchor, $_, { 'type' => 'collated' } )
790 unless $c->get_relationship( $anchor, $_ ) } @need_weak
791 : print STDERR "No anchor found at $r\n";
795 =head2 related_readings( $reading, $filter )
797 Returns a list of readings that are connected via relationship links to $reading.
798 If $filter is set to a subroutine ref, returns only those related readings where
799 $filter( $relationship ) returns a true value.
803 sub related_readings {
804 my( $self, $reading, $filter ) = @_;
806 if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
807 $reading = $reading->id;
813 if( $filter eq 'colocated' ) {
814 $filter = sub { $_[0]->colocated };
815 } elsif( !ref( $filter ) ) {
817 $filter = sub { $_[0]->type eq $type };
819 my %found = ( $reading => 1 );
820 my $check = [ $reading ];
824 foreach my $r ( @$check ) {
825 foreach my $nr ( $self->graph->neighbors( $r ) ) {
826 if( &$filter( $self->get_relationship( $r, $nr ) ) ) {
827 push( @$more, $nr ) unless exists $found{$nr};
834 delete $found{$reading};
835 @answer = keys %found;
837 @answer = $self->graph->all_reachable( $reading );
839 if( $return_object ) {
840 my $c = $self->collation;
841 return map { $c->reading( $_ ) } @answer;
847 =head2 merge_readings( $kept, $deleted );
849 Makes a best-effort merge of the relationship links between the given readings, and
850 stops tracking the to-be-deleted reading.
855 my( $self, $kept, $deleted, $combined ) = @_;
856 foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
857 # Get the pair of kept / rel
858 my @vector = ( $kept );
859 push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
860 next if $vector[0] eq $vector[1]; # Don't add a self loop
862 # If kept changes its text, drop the relationship.
865 # If kept / rel already has a relationship, just keep the old
866 my $rel = $self->get_relationship( @vector );
869 # Otherwise, adopt the relationship that would be deleted.
870 $rel = $self->get_relationship( @$edge );
871 $self->_set_relationship( $rel, @vector );
873 $self->_make_equivalence( $deleted, $kept );
876 ### Equivalence logic
878 sub _remove_equivalence_node {
879 my( $self, $node ) = @_;
880 my $group = $self->equivalence( $node );
881 my $nodelist = $self->eqreadings( $group );
882 if( @$nodelist == 1 && $nodelist->[0] eq $node ) {
883 $self->equivalence_graph->delete_vertex( $group );
884 $self->remove_eqreadings( $group );
885 $self->remove_equivalence( $group );
886 } elsif( @$nodelist == 1 ) {
887 throw( "DATA INCONSISTENCY in equivalence graph: " . $nodelist->[0] .
888 " in group that should have only $node" );
890 my @newlist = grep { $_ ne $node } @$nodelist;
891 $self->set_eqreadings( $group, \@newlist );
892 $self->remove_equivalence( $node );
896 =head2 add_equivalence_edge
898 Add an edge in the equivalence graph corresponding to $source -> $target in the
899 collation. Should only be called by Collation.
903 sub add_equivalence_edge {
904 my( $self, $source, $target ) = @_;
905 my $seq = $self->equivalence( $source );
906 my $teq = $self->equivalence( $target );
907 $self->equivalence_graph->add_edge( $seq, $teq );
910 =head2 delete_equivalence_edge
912 Remove an edge in the equivalence graph corresponding to $source -> $target in the
913 collation. Should only be called by Collation.
917 sub delete_equivalence_edge {
918 my( $self, $source, $target ) = @_;
919 my $seq = $self->equivalence( $source );
920 my $teq = $self->equivalence( $target );
921 $self->equivalence_graph->delete_edge( $seq, $teq );
924 sub _is_disconnected {
926 return( scalar $self->equivalence_graph->predecessorless_vertices > 1
927 || scalar $self->equivalence_graph->successorless_vertices > 1 );
930 # Equate two readings in the equivalence graph
931 sub _make_equivalence {
932 my( $self, $source, $target ) = @_;
933 # Get the source equivalent readings
934 my $seq = $self->equivalence( $source );
935 my $teq = $self->equivalence( $target );
936 # Nothing to do if they are already equivalent...
937 return if $seq eq $teq;
938 my $sourcepool = $self->eqreadings( $seq );
939 # and add them to the target readings.
940 push( @{$self->eqreadings( $teq )}, @$sourcepool );
941 map { $self->set_equivalence( $_, $teq ) } @$sourcepool;
942 # Then merge the nodes in the equivalence graph.
943 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
944 $self->equivalence_graph->add_edge( $pred, $teq );
946 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
947 $self->equivalence_graph->add_edge( $teq, $succ );
949 $self->equivalence_graph->delete_vertex( $seq );
950 # TODO enable this after collation parsing is done
951 throw( "Graph got disconnected making $source / $target equivalence" )
952 if $self->_is_disconnected && $self->collation->tradition->_initialized;
955 =head2 test_equivalence
957 Test whether, if two readings were equated with a 'colocated' relationship,
958 the graph would still be valid.
962 sub test_equivalence {
963 my( $self, $source, $target ) = @_;
964 # Try merging the nodes in the equivalence graph; return a true value if
965 # no cycle is introduced thereby. Restore the original graph first.
967 # Keep track of edges we add
970 # Get the reading equivalents
971 my $seq = $self->equivalence( $source );
972 my $teq = $self->equivalence( $target );
973 # Maybe this is easy?
974 return 1 if $seq eq $teq;
976 # Save the first graph
977 my $checkstr = $self->equivalence_graph->stringify();
978 # Add and save relevant edges
979 foreach my $pred ( $self->equivalence_graph->predecessors( $seq ) ) {
980 if( $self->equivalence_graph->has_edge( $pred, $teq ) ) {
981 $added_pred{$pred} = 0;
983 $self->equivalence_graph->add_edge( $pred, $teq );
984 $added_pred{$pred} = 1;
987 foreach my $succ ( $self->equivalence_graph->successors( $seq ) ) {
988 if( $self->equivalence_graph->has_edge( $teq, $succ ) ) {
989 $added_succ{$succ} = 0;
991 $self->equivalence_graph->add_edge( $teq, $succ );
992 $added_succ{$succ} = 1;
995 # Delete source equivalent and test
996 $self->equivalence_graph->delete_vertex( $seq );
997 my $ret = !$self->equivalence_graph->has_a_cycle;
999 # Restore what we changed
1000 $self->equivalence_graph->add_vertex( $seq );
1001 foreach my $pred ( keys %added_pred ) {
1002 $self->equivalence_graph->add_edge( $pred, $seq );
1003 $self->equivalence_graph->delete_edge( $pred, $teq ) if $added_pred{$pred};
1005 foreach my $succ ( keys %added_succ ) {
1006 $self->equivalence_graph->add_edge( $seq, $succ );
1007 $self->equivalence_graph->delete_edge( $teq, $succ ) if $added_succ{$succ};
1009 unless( $self->equivalence_graph->eq( $checkstr ) ) {
1010 warn "GRAPH CHANGED after testing";
1016 # Unmake an equivalence link between two readings. Should only be called internally.
1017 sub _break_equivalence {
1018 my( $self, $source, $target ) = @_;
1020 # This is the hard one. Need to reconstruct the equivalence groups without
1023 map { $sng{$_} = 1 } $self->_find_equiv_without( $source, $target );
1024 map { $tng{$_} = 1 } $self->_find_equiv_without( $target, $source );
1025 # If these groups intersect, they are still connected; do nothing.
1026 foreach my $el ( keys %tng ) {
1027 return if( exists $sng{$el} );
1029 # If they don't intersect, then we split the nodes in the graph and in
1030 # the hashes. First figure out which group has which name
1031 my $oldgroup = $self->equivalence( $source ); # same as $target
1032 my $keepsource = $sng{$oldgroup};
1033 my $newgroup = $keepsource ? $target : $source;
1034 my( $oldmembers, $newmembers );
1036 $oldmembers = [ keys %sng ];
1037 $newmembers = [ keys %tng ];
1039 $oldmembers = [ keys %tng ];
1040 $newmembers = [ keys %sng ];
1043 # First alter the old group in the hash
1044 $self->set_eqreadings( $oldgroup, $oldmembers );
1045 foreach my $el ( @$oldmembers ) {
1046 $self->set_equivalence( $el, $oldgroup );
1049 # then add the new group back to the hash with its new key
1050 $self->set_eqreadings( $newgroup, $newmembers );
1051 foreach my $el ( @$newmembers ) {
1052 $self->set_equivalence( $el, $newgroup );
1055 # Now add the new group back to the equivalence graph
1056 $self->equivalence_graph->add_vertex( $newgroup );
1057 # ...add the appropriate edges to the source group vertext
1058 my $c = $self->collation;
1059 foreach my $rdg ( @$newmembers ) {
1060 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1061 next unless $self->equivalence( $rp );
1062 $self->equivalence_graph->add_edge( $self->equivalence( $rp ), $newgroup );
1064 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1065 next unless $self->equivalence( $rs );
1066 $self->equivalence_graph->add_edge( $newgroup, $self->equivalence( $rs ) );
1070 # ...and figure out which edges on the old group vertex to delete.
1071 my( %old_pred, %old_succ );
1072 foreach my $rdg ( @$oldmembers ) {
1073 foreach my $rp ( $c->sequence->predecessors( $rdg ) ) {
1074 next unless $self->equivalence( $rp );
1075 $old_pred{$self->equivalence( $rp )} = 1;
1077 foreach my $rs ( $c->sequence->successors( $rdg ) ) {
1078 next unless $self->equivalence( $rs );
1079 $old_succ{$self->equivalence( $rs )} = 1;
1082 foreach my $p ( $self->equivalence_graph->predecessors( $oldgroup ) ) {
1083 unless( $old_pred{$p} ) {
1084 $self->equivalence_graph->delete_edge( $p, $oldgroup );
1087 foreach my $s ( $self->equivalence_graph->successors( $oldgroup ) ) {
1088 unless( $old_succ{$s} ) {
1089 $self->equivalence_graph->delete_edge( $oldgroup, $s );
1092 # TODO enable this after collation parsing is done
1093 throw( "Graph got disconnected breaking $source / $target equivalence" )
1094 if $self->_is_disconnected && $self->collation->tradition->_initialized;
1097 sub _find_equiv_without {
1098 my( $self, $first, $second ) = @_;
1099 my %found = ( $first => 1 );
1100 my $check = [ $first ];
1104 foreach my $r ( @$check ) {
1105 foreach my $nr ( $self->graph->neighbors( $r ) ) {
1106 next if $r eq $second;
1107 if( $self->get_relationship( $r, $nr )->colocated ) {
1108 push( @$more, $nr ) unless exists $found{$nr};
1118 =head2 rebuild_equivalence
1120 (Re)build the equivalence graph from scratch. Dumps the graph, makes a new one,
1121 adds all readings and edges, then makes an equivalence for all relationships.
1125 sub rebuild_equivalence {
1127 my $newgraph = Graph->new();
1128 # Set this as the new equivalence graph
1129 $self->_reset_equivalence( $newgraph );
1130 # Clear out the data hashes
1131 $self->_clear_equivalence;
1132 $self->_clear_eqreadings;
1134 $self->collation->tradition->_init_done(0);
1136 foreach my $r ( $self->collation->readings ) {
1138 $newgraph->add_vertex( $rid );
1139 $self->set_equivalence( $rid, $rid );
1140 $self->set_eqreadings( $rid, [ $rid ] );
1144 foreach my $e ( $self->collation->paths ) {
1145 $self->add_equivalence_edge( @$e );
1148 # Now equate the colocated readings. This does no testing;
1149 # it assumes that all preexisting relationships are valid.
1150 foreach my $rel ( $self->relationships ) {
1151 my $relobj = $self->get_relationship( $rel );
1152 next unless $relobj && $relobj->colocated;
1153 $self->_make_equivalence( @$rel );
1155 $self->collation->tradition->_init_done(1);
1158 =head2 equivalence_ranks
1160 Rank all vertices in the equivalence graph, and return a hash reference with
1161 vertex => rank mapping.
1165 sub equivalence_ranks {
1167 my $eqstart = $self->equivalence( $self->collation->start );
1168 my $eqranks = { $eqstart => 0 };
1169 my $rankeqs = { 0 => [ $eqstart ] };
1170 my @curr_origin = ( $eqstart );
1171 # A little iterative function.
1172 while( @curr_origin ) {
1173 @curr_origin = $self->_assign_rank( $eqranks, $rankeqs, @curr_origin );
1175 return( $eqranks, $rankeqs );
1179 my( $self, $node_ranks, $rank_nodes, @current_nodes ) = @_;
1180 my $graph = $self->equivalence_graph;
1181 # Look at each of the children of @current_nodes. If all the child's
1182 # parents have a rank, assign it the highest rank + 1 and add it to
1183 # @next_nodes. Otherwise skip it; we will return when the highest-ranked
1184 # parent gets a rank.
1186 foreach my $c ( @current_nodes ) {
1187 warn "Current reading $c has no rank!"
1188 unless exists $node_ranks->{$c};
1189 foreach my $child ( $graph->successors( $c ) ) {
1190 next if exists $node_ranks->{$child};
1191 my $highest_rank = -1;
1193 foreach my $parent ( $graph->predecessors( $child ) ) {
1194 if( exists $node_ranks->{$parent} ) {
1195 $highest_rank = $node_ranks->{$parent}
1196 if $highest_rank <= $node_ranks->{$parent};
1203 my $c_rank = $highest_rank + 1;
1204 # print STDERR "Assigning rank $c_rank to node $child \n";
1205 $node_ranks->{$child} = $c_rank if $node_ranks;
1206 push( @{$rank_nodes->{$c_rank}}, $child ) if $rank_nodes;
1207 push( @next_nodes, $child );
1216 my( $self, $graphml_ns, $xmlroot, $node_hash, $nodeid_key, $edge_keys ) = @_;
1218 my $rgraph = $xmlroot->addNewChild( $graphml_ns, 'graph' );
1219 $rgraph->setAttribute( 'edgedefault', 'directed' );
1220 $rgraph->setAttribute( 'id', 'relationships', );
1221 $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
1222 $rgraph->setAttribute( 'parse.edges', 0 );
1223 $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
1224 $rgraph->setAttribute( 'parse.nodes', 0 );
1225 $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
1227 # Add the vertices according to their XML IDs
1228 my %rdg_lookup = ( reverse %$node_hash );
1229 # my @nlist = sort _by_xmlid keys( %rdg_lookup ); ## CAUSES SEGFAULT
1230 my @nlist = sort keys( %rdg_lookup );
1231 foreach my $n ( @nlist ) {
1232 my $n_el = $rgraph->addNewChild( $graphml_ns, 'node' );
1233 $n_el->setAttribute( 'id', $n );
1234 _add_graphml_data( $n_el, $nodeid_key, $rdg_lookup{$n} );
1236 $rgraph->setAttribute( 'parse.nodes', scalar @nlist );
1238 # Add the relationship edges, with their object information
1240 foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->graph->edges ) {
1241 # Add an edge and fill in its relationship info.
1242 next unless( exists $node_hash->{$e->[0]} && exists $node_hash->{$e->[1]} );
1243 my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
1244 $edge_el->setAttribute( 'source', $node_hash->{$e->[0]} );
1245 $edge_el->setAttribute( 'target', $node_hash->{$e->[1]} );
1246 $edge_el->setAttribute( 'id', 'e'.$edge_ctr++ );
1248 my $rel_obj = $self->get_relationship( @$e );
1249 foreach my $key ( keys %$edge_keys ) {
1250 my $value = $rel_obj->$key;
1251 _add_graphml_data( $edge_el, $edge_keys->{$key}, $value )
1255 $rgraph->setAttribute( 'parse.edges', $edge_ctr );
1263 return $tmp_a <=> $tmp_b;
1266 sub _add_graphml_data {
1267 my( $el, $key, $value ) = @_;
1268 return unless defined $value;
1269 my $data_el = $el->addNewChild( $el->namespaceURI, 'data' );
1270 $data_el->setAttribute( 'key', $key );
1271 $data_el->appendText( $value );
1275 Text::Tradition::Error->throw(
1276 'ident' => 'Relationship error',
1282 __PACKAGE__->meta->make_immutable;