use IPC::Run qw( run binary );
use Text::CSV_XS;
use Text::Tradition::Collation::Reading;
+use Text::Tradition::Collation::RelationshipStore;
use XML::LibXML;
use Moose;
has 'relations' => (
is => 'ro',
- isa => 'Graph',
- default => sub { Graph->new( undirected => 1 ) },
- handles => {
- relationships => 'edges',
- },
+ isa => 'Text::Tradition::Collation::RelationshipStore',
+ handles => {
+ relationships => 'relationships',
+ related_readings => 'related_readings',
+ },
+ writer => '_set_relations',
);
has 'tradition' => (
sub BUILD {
my $self = shift;
+ $self->_set_relations( Text::Tradition::Collation::RelationshipStore->new( 'collation' => $self ) );
$self->_set_start( $self->add_reading( { 'collation' => $self, 'is_start' => 1 } ) );
$self->_set_end( $self->add_reading( { 'collation' => $self, 'is_end' => 1 } ) );
}
$self->_add_reading( $reading->id => $reading );
# Once the reading has been added, put it in both graphs.
$self->sequence->add_vertex( $reading->id );
- $self->relations->add_vertex( $reading->id );
+ $self->relations->add_reading( $reading->id );
return $reading;
};
}
# Remove the reading from the graphs.
$self->sequence->delete_vertex( $arg );
- $self->relations->delete_vertex( $arg );
+ $self->relations->delete_reading( $arg );
# Carry on.
$self->$orig( $arg );
@wits{keys %$fwits} = values %$fwits;
$self->sequence->set_edge_attributes( @vector, \%wits );
}
- foreach my $rel ( $self->relations->edges_at( $deleted ) ) {
- my @vector = ( $kept );
- push( @vector, $rel->[0] eq $deleted ? $rel->[1] : $rel->[0] );
- next if $vector[0] eq $vector[1]; # Don't add a self loop
- # Is there a relationship here already? If so, keep it.
- # TODO Warn about conflicting relationships
- next if $self->relations->has_edge( @vector );
- # If not, adopt the relationship that would be deleted.
- $self->relations->add_edge( @vector );
- my $attr = $self->relations->get_edge_attributes( @$rel );
- $self->relations->set_edge_attributes( @vector, $attr );
- }
+ $self->relations->merge_readings( $kept, $deleted, $combine_char );
# Do the deletion deed.
if( $combine_char ) {
return $self->sequence->has_edge_attribute( $source, $target, $wit );
}
-### Relationship logic
-
=head2 add_relationship( $reading1, $reading2, $definition )
Adds the specified relationship between the two readings. A relationship
-is transitive (i.e. undirected), and must have the following attributes
-specified in the hashref $definition:
-
-=over 4
-
-=item * type - Can be one of spelling, orthographic, grammatical, meaning, lexical, collated, repetition, transposition. All but the last two are only valid relationships between readings that occur at the same point in the text.
-
-=item * non_correctable - (Optional) True if the reading would not have been corrected independently.
-
-=item * non_independent - (Optional) True if the variant is unlikely to have occurred independently in unrelated witnesses.
-
-=item * global - (Optional) A meta-attribute, to set the same relationship between readings with the same text whenever they occur in the same place.
-
-=back
+is transitive (i.e. undirected); the options for its definition may be found
+in Text::Tradition::Collation::Relationship.
=cut
sub add_relationship {
my $self = shift;
- my( $source, $target, $options ) = $self->_stringify_args( @_ );
-
- # Check the options
- if( !defined $options->{'type'} ||
- $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|lexical|collated|repetition|transposition)$/i ) {
- my $t = $options->{'type'} ? $options->{'type'} : '';
- return( undef, "Invalid or missing type " . $options->{'type'} );
- }
- unless ( $options->{'type'} =~ /^(repetition|transposition)$/ ) {
- $options->{'colocated'} = 1;
- }
-
- # Make sure there is not another relationship between these two
- # readings already
- if( $self->relations->has_edge( $source, $target ) ) {
- return ( undef, "Relationship already exists between these readings" );
- }
- if( !$self->relationship_valid( $source, $target, $options->{'type'} ) ) {
- return ( undef, 'Relationship creates witness loop' );
- }
+ my( $source, $target, $opts ) = $self->_stringify_args( @_ );
+ return $self->relations->add_relationship( $source, $self->reading( $source ),
+ $target, $self->reading( $target ), $opts );
+}
- my @vector = ( $source, $target );
- $self->relations->add_edge( @vector );
- $self->relations->set_edge_attributes( @vector, $options );
-
- # TODO Handle global relationship setting
+=head2 reading_witnesses( $reading )
- return( 1, @vector );
-}
+Return a list of sigils corresponding to the witnesses in which the reading appears.
-sub relationship_valid {
- my( $self, $source, $target, $rel ) = @_;
- if( $rel eq 'repetition' ) {
- return 1;
- } elsif ( $rel eq 'transposition' ) {
- # Check that the two readings do not appear in the same witness.
- my %seen_wits;
- map { $seen_wits{$_} = 1 } $self->reading_witnesses( $source );
- foreach my $w ( $self->reading_witnesses( $target ) ) {
- return 0 if $seen_wits{$w};
- }
- return 1;
- } else {
- # Check that linking the source and target in a relationship won't lead
- # to a path loop for any witness. First make a lookup table of all the
- # readings related to either the source or the target.
- my @proposed_related = ( $source, $target );
- push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
- push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
- my %pr_ids;
- map { $pr_ids{ $_ } = 1 } @proposed_related;
-
- # None of these proposed related readings should have a neighbor that
- # is also in proposed_related.
- foreach my $pr ( keys %pr_ids ) {
- foreach my $neighbor( $self->sequence->neighbors( $pr ) ) {
- return 0 if exists $pr_ids{$neighbor};
- }
- }
- return 1;
- }
-}
+=cut
-# Return a list of the witnesses in which the reading appears.
sub reading_witnesses {
my( $self, $reading ) = @_;
# We need only check either the incoming or the outgoing edges; I have
return keys %all_witnesses;
}
-sub related_readings {
- my( $self, $reading, $colocated ) = @_;
- my $return_object;
- if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
- $reading = $reading->id;
- $return_object = 1;
-# print STDERR "Returning related objects\n";
-# } else {
-# print STDERR "Returning related object names\n";
- }
- my @related = $self->relations->all_reachable( $reading );
- if( $colocated ) {
- my @colo = grep { $self->relations->has_edge_attribute( $reading, $_, 'colocated' ) } @related;
- @related = @colo;
- }
- return $return_object ? map { $self->reading( $_ ) } @related : @related;
-}
-
=head2 Output method(s)
=over
$key->setAttribute( 'id', $edge_data_keys{$datum} );
}
- # Add the collation graphs themselves
+ # Add the collation graph itself
my $sgraph = $root->addNewChild( $graphml_ns, 'graph' );
$sgraph->setAttribute( 'edgedefault', 'directed' );
$sgraph->setAttribute( 'id', $self->tradition->name );
$sgraph->setAttribute( 'parse.nodeids', 'canonical' );
$sgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
$sgraph->setAttribute( 'parse.order', 'nodesfirst' );
-
- my $rgraph;
- if( scalar $self->relationships ) {
- my $rgraph = $root->addNewChild( $graphml_ns, 'graph' );
- $rgraph->setAttribute( 'edgedefault', 'undirected' );
- $rgraph->setAttribute( 'id', 'relationships' );
- $rgraph->setAttribute( 'parse.edgeids', 'canonical' );
- $rgraph->setAttribute( 'parse.edges', scalar($self->relationships) );
- $rgraph->setAttribute( 'parse.nodeids', 'canonical' );
- $rgraph->setAttribute( 'parse.nodes', scalar($self->readings) );
- $rgraph->setAttribute( 'parse.order', 'nodesfirst' );
- }
-
+
# Collation attribute data
foreach my $datum ( @graph_attributes ) {
my $value = $datum eq 'version' ? '3.0' : $self->$datum;
my $node_ctr = 0;
my %node_hash;
- # Add our readings to the graphs
+ # Add our readings to the graph
foreach my $n ( sort { $a->id cmp $b->id } $self->readings ) {
# Add to the main graph
my $node_el = $sgraph->addNewChild( $graphml_ns, 'node' );
_add_graphml_data( $node_el, $node_data_keys{$d}, $nval )
if defined $nval;
}
- # Add to the relationships graph
- if( $rgraph ) {
- my $rnode_el = $rgraph->addNewChild( $graphml_ns, 'node' );
- $rnode_el->setAttribute( 'id', $node_xmlid );
- }
}
# Add the path edges to the sequence graph
}
}
- # Add the relationship edges to the relationships graph
- if( $rgraph ) {
- foreach my $e ( sort { $a->[0] cmp $b->[0] } $self->relationships ) {
- my( $id, $from, $to ) = ( 'e'.$edge_ctr++,
- $node_hash{ $e->[0] },
- $node_hash{ $e->[1] } );
- my $edge_el = $rgraph->addNewChild( $graphml_ns, 'edge' );
- $edge_el->setAttribute( 'source', $from );
- $edge_el->setAttribute( 'target', $to );
- $edge_el->setAttribute( 'id', $id );
-
- my $data = $self->relations->get_edge_attributes( @$e );
- # It's a relationship, so save the relationship data
- _add_graphml_data( $edge_el, $edge_data_keys{'relationship'}, $data->{type} );
- _add_graphml_data( $edge_el, $edge_data_keys{'colocated'}, $data->{colocated} );
- if( exists $data->{non_correctable} ) {
- _add_graphml_data( $edge_el, $edge_data_keys{'non_correctable'},
- $data->{non_correctable} );
- }
- if( exists $data->{non_independent} ) {
- _add_graphml_data( $edge_el, $edge_data_keys{'non_independent'},
- $data->{non_independent} );
- }
- _add_graphml_data( $edge_el, $edge_data_keys{'class'}, 'relationship' );
- }
- }
+ # Add the relationship graph to the XML
+ $self->relations->as_graphml( $root );
# Save and return the thing
my $result = decode_utf8( $graphml->toString(1) );
return @answer;
}
+=begin testing
+
+use Text::Tradition;
+
+my $cxfile = 't/data/Collatex-16.xml';
+my $t = Text::Tradition->new(
+ 'name' => 'inline',
+ 'input' => 'CollateX',
+ 'file' => $cxfile,
+ );
+my $c = $t->collation;
+
+is( $c->common_predecessor( $c->reading('n9'), $c->reading('n23') )->id,
+ 'n20', "Found correct common predecessor" );
+is( $c->common_successor( $c->reading('n9'), $c->reading('n23') )->id,
+ '#END#', "Found correct common successor" );
+
+is( $c->common_predecessor( $c->reading('n19'), $c->reading('n17') )->id,
+ 'n16', "Found correct common predecessor for readings on same path" );
+is( $c->common_successor( $c->reading('n21'), $c->reading('n26') )->id,
+ '#END#', "Found correct common successor for readings on same path" );
+
+=end testing
+
+=cut
+
+## Return the closest reading that is a predecessor of both the given readings.
+sub common_predecessor {
+ my $self = shift;
+ return $self->common_in_path( @_, 'predecessors' );
+}
+
+sub common_successor {
+ my $self = shift;
+ return $self->common_in_path( @_, 'successors' );
+}
+
+sub common_in_path {
+ my( $self, $r1, $r2, $dir ) = @_;
+ my $iter = $r1->rank > $r2->rank ? $r1->rank : $r2->rank;
+ $iter = $self->end->rank - $iter if $dir eq 'successors';
+ my @candidates;
+ my @last_checked = ( $r1, $r2 );
+ my %all_seen;
+ while( !@candidates ) {
+ my @new_lc;
+ foreach my $lc ( @last_checked ) {
+ foreach my $p ( $lc->$dir ) {
+ if( $all_seen{$p->id} ) {
+ push( @candidates, $p );
+ } else {
+ $all_seen{$p->id} = 1;
+ push( @new_lc, $p );
+ }
+ }
+ }
+ @last_checked = @new_lc;
+ }
+ my @answer = sort { $a->rank <=> $b->rank } @candidates;
+ return $dir eq 'predecessors' ? pop( @answer ) : shift ( @answer );
+}
+
no Moose;
__PACKAGE__->meta->make_immutable;
--- /dev/null
+package Text::Tradition::Collation::RelationshipStore;
+
+use strict;
+use warnings;
+use Text::Tradition::Collation::Relationship;
+
+use Moose;
+
+=head1 NAME
+
+Text::Tradition::Collation::Reading - represents a reading (usually a word) in a collation.
+
+=head1 DESCRIPTION
+
+Text::Tradition is a library for representation and analysis of collated
+texts, particularly medieval ones. The RelationshipStore is an internal object
+of the collation, to keep track of the defined relationships (both specific and
+general) between readings.
+
+=head1 METHODS
+
+=head2 new( collation => $collation );
+
+Creates a new relationship store for the given collation.
+
+=cut
+
+has 'collation' => (
+ is => 'ro',
+ isa => 'Text::Tradition::Collation',
+ required => 1,
+ weak_ref => 1,
+ );
+
+has 'scopedrels' => (
+ is => 'ro',
+ isa => 'HashRef[HashRef[Text::Tradition::Collation::Relationship]]',
+ default => sub { {} },
+ );
+
+has 'graph' => (
+ is => 'ro',
+ isa => 'Graph',
+ default => sub { Graph->new( undirected => 1 ) },
+ handles => {
+ relationships => 'edges',
+ add_reading => 'add_vertex',
+ delete_reading => 'delete_vertex',
+ },
+ );
+
+=head2 create
+
+Create a new relationship with the given options and return it.
+Warn and return undef if the relationship cannot be created.
+
+=cut
+
+sub create {
+ my( $self, $options ) = @_;
+ # Check to see if a relationship exists between the two given readings
+ my $source = delete $options->{'orig_a'};
+ my $target = delete $options->{'orig_b'};
+ my $rel;
+ if( $self->graph->has_edge( $source, $target ) ) {
+ $rel = $self->graph->get_edge_attribute( $source, $target, 'object' );
+ if( $rel->type ne $options->type ) {
+ warn "Relationship of type " . $rel->type
+ . "already exists between $source and $target";
+ return;
+ } else {
+ return $rel;
+ }
+ }
+
+ # Check to see if a nonlocal relationship is defined for the two readings
+ $rel = $self->scoped_relationship( $options->{'reading_a'},
+ $options->{'reading_b'} );
+ if( $rel && $rel->type eq $options->{'type'} ) {
+ return $rel;
+ } elsif( $rel ) {
+ warn sprintf( "Relationship of type %s with scope %s already defined for readings %s and %s", $rel->type, $rel->scope, $options->{'reading_a'}, $options->{'reading_b'} );
+ return;
+ } else {
+ $rel = Text::Tradition::Collation::Relationship->new( $options );
+ $self->add_scoped_relationship( $rel ) if $rel->nonlocal;
+ return $rel;
+ }
+}
+
+=head2 add_scoped_relationship( $rel )
+
+Keep track of relationships defined between specific readings that are scoped
+non-locally. Key on whichever reading occurs first alphabetically.
+
+=cut
+
+sub add_scoped_relationship {
+ my( $self, $rel ) = @_;
+ my $r = $self->scoped_relationship( $rel->reading_a, $rel->reading_b );
+ if( $r ) {
+ warn sprintf( "Scoped relationship of type %s already exists between %s and %s",
+ $r->type, $rel->reading_a, $rel->reading_b );
+ return;
+ }
+ $self->scopedrels->{$rel->reading_a}->{$rel->reading_b} = $rel;
+}
+
+=head2 scoped_relationship( $reading_a, $reading_b )
+
+Returns the general (document-level or global) relationship that has been defined
+between the two reading strings. Returns undef if there is no general relationship.
+
+=cut
+
+sub scoped_relationship {
+ my( $self, $rdga, $rdgb ) = @_;
+ my( $first, $second ) = sort( $rdga, $rdgb );
+ if( exists $self->scopedrels->{$first}->{$second} ) {
+ return $self->scopedrels->{$first}->{$second};
+ } else {
+ return undef;
+ }
+}
+
+=head2 add_relationship( $self, $source, $sourcetext, $target, $targettext, $opts )
+
+Adds the relationship specified in $opts (see Text::Tradition::Collation::Relationship
+for the possible options) between the readings given in $source and $target. Sets
+up a scoped relationship between $sourcetext and $targettext if the relationship is
+scoped non-locally.
+
+Returns a status boolean and a list of all reading pairs connected by the call to
+add_relationship.
+
+=cut
+
+sub add_relationship {
+ my( $self, $source, $source_rdg, $target, $target_rdg, $options ) = @_;
+
+ # Check the options
+ $options->{'scope'} = 'local' unless $options->{'scope'};
+
+ my( $is_valid, $reason ) =
+ $self->relationship_valid( $source, $target, $options->{'type'} );
+ unless( $is_valid ) {
+ return ( undef, $reason );
+ }
+
+ # Try to create the relationship object.
+ $options->{'reading_a'} = $source_rdg->text;
+ $options->{'reading_b'} = $target_rdg->text;
+ $options->{'orig_a'} = $source;
+ $options->{'orig_b'} = $target;
+ my $relationship = $self->create( $options );
+ return( undef, "Relationship creation failed" ) unless $relationship;
+
+ # Find all the pairs for which we need to set the relationship.
+ my @vectors = ( [ $source, $target ] );
+ if( $relationship->colocated && $relationship->nonlocal ) {
+ my $c = $self->collation;
+ # Set the same relationship everywhere we can, throughout the graph.
+ my @identical_readings = grep { $_->text eq $relationship->reading_a }
+ $c->readings;
+ foreach my $ir ( @identical_readings ) {
+ # Check to see if there is a target reading with the same text at
+ # the same rank.
+ my @itarget = grep
+ { $_->rank == $ir->rank && $_->text eq $relationship->reading_b }
+ $c->readings;
+ if( @itarget ) {
+ # We found a hit.
+ warn "More than one reading with text " . $target_rdg->text
+ . " at rank " . $ir->rank . "!" if @itarget > 1;
+ push( @vectors, [ $ir, $itarget[0] ] );
+ }
+ }
+ }
+
+ # Now set the relationship(s).
+ my @pairs_set;
+ foreach my $v ( @vectors ) {
+ if( $self->graph->has_edge( @$v ) ) {
+ # Is it locally scoped?
+ my $rel = $self->graph->get_edge_attribute( @$v );
+ if( $rel->nonlocal ) {
+ # TODO I think we should not be able to get here.
+ warn "Found conflicting relationship at @$v";
+ } else {
+ warn "Not overriding local relationship set at @$v";
+ next;
+ }
+ }
+ $self->graph->add_edge( @$v );
+ $self->graph->set_edge_attribute( @$v, 'object', $relationship );
+ push( @pairs_set, $v );
+ }
+
+ return( 1, @pairs_set );
+}
+
+=head2 relationship_valid( $source, $target, $type )
+
+Checks whether a relationship of type $type may exist between the readings given
+in $source and $target. Returns a tuple of ( status, message ) where status is
+a yes/no boolean and, if the answer is no, message gives the reason why.
+
+=cut
+
+sub relationship_valid {
+ my( $self, $source, $target, $rel ) = @_;
+ my $c = $self->collation;
+ if ( $rel eq 'transposition' || $rel eq 'repetition' ) {
+ # Check that the two readings do (for a repetition) or do not (for
+ # a transposition) appear in the same witness.
+ my %seen_wits;
+ map { $seen_wits{$_} = 1 } $c->reading_witnesses( $source );
+ foreach my $w ( $c->reading_witnesses( $target ) ) {
+ if( $seen_wits{$w} ) {
+ return ( 0, "Readings both occur in witness $w" )
+ if $rel eq 'transposition';
+ return ( 1, "ok" ) if $rel eq 'repetition';
+ }
+ return $rel eq 'transposition' ? ( 1, "ok" )
+ : ( 0, "Readings occur only in distinct witnesses" );
+ }
+ } else {
+ # Check that linking the source and target in a relationship won't lead
+ # to a path loop for any witness. First make a lookup table of all the
+ # readings related to either the source or the target.
+ my @proposed_related = ( $source, $target );
+ push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
+ push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
+ my %pr_ids;
+ map { $pr_ids{ $_ } = 1 } @proposed_related;
+
+ # None of these proposed related readings should have a neighbor that
+ # is also in proposed_related.
+ foreach my $pr ( keys %pr_ids ) {
+ foreach my $neighbor( $c->sequence->neighbors( $pr ) ) {
+ return( 0, "Would relate neighboring readings $pr and $neighbor" )
+ if exists $pr_ids{$neighbor};
+ }
+ }
+ return ( 1, "ok" );
+ }
+}
+
+=head2 related_readings( $reading, $colocated_only )
+
+Returns a list of readings that are connected via relationship links to $reading.
+If $colocated_only is true, restricts the list to those readings that are in the
+same logical location (and therefore have the same rank in the collation graph.)
+
+=cut
+
+sub related_readings {
+ my( $self, $reading, $colocated ) = @_;
+ my $return_object;
+ if( ref( $reading ) eq 'Text::Tradition::Collation::Reading' ) {
+ $reading = $reading->id;
+ $return_object = 1;
+ }
+ my @related = $self->graph->all_reachable( $reading );
+ if( $colocated ) {
+ my @colo;
+ foreach my $r ( @related ) {
+ my $obj = $self->graph->get_edge_attribute( $reading, $r, 'object' );
+ push( @colo, $r ) if $obj->colocated;
+ }
+ @related = @colo;
+ }
+ if( $return_object ) {
+ my $c = $self->collation;
+ return map { $c->reading( $_ ) } @related;
+ } else {
+ return @related;
+ }
+}
+
+=head2 merge_readings( $kept, $deleted );
+
+Makes a best-effort merge of the relationship links between the given readings, and
+stops tracking the to-be-deleted reading.
+
+=cut
+
+sub merge_readings {
+ my( $self, $kept, $deleted, $combined ) = @_;
+ foreach my $edge ( $self->graph->edges_at( $deleted ) ) {
+ # Get the pair of kept / rel
+ my @vector = ( $kept );
+ push( @vector, $edge->[0] eq $deleted ? $edge->[1] : $edge->[0] );
+ next if $vector[0] eq $vector[1]; # Don't add a self loop
+
+ # If kept changes its text, drop the relationship.
+ next if $combined;
+
+ # If kept / rel already has a relationship, warn and keep the old
+ if( $self->graph->has_edge( @vector ) ) {
+ warn sprintf( "Readings %s and %s have existing relationship; dropping link with %s", @vector, $deleted );
+ next;
+ }
+
+ # Otherwise, adopt the relationship that would be deleted.
+ my $rel = $self->graph->get_edge_attribute( @$edge, 'object' );
+ $self->graph->add_edge( @vector );
+ $self->graph->set_edge_attribute( @vector, 'object', $rel );
+ }
+ $self->delete_reading( $deleted );
+}
+
+no Moose;
+__PACKAGE__->meta->make_immutable;
+
+1;