Removes the given reading from the collation, implicitly removing its
paths and relationships.
-=head2 merge_readings( $main, $second, $concatenate, $with_str )
-
-Merges the $second reading into the $main one. If $concatenate is true, then
-the merged node will carry the text of both readings, concatenated with either
-$with_str (if specified) or a sensible default (the empty string if the
-appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
-
-The first two arguments may be either readings or reading IDs.
-
=head2 has_reading( $id )
Predicate to see whether a given reading ID is in the graph.
$self->$orig( $arg );
};
+=head2 merge_readings( $main, $second, $concatenate, $with_str )
+
+Merges the $second reading into the $main one. If $concatenate is true, then
+the merged node will carry the text of both readings, concatenated with either
+$with_str (if specified) or a sensible default (the empty string if the
+appropriate 'join_*' flag is set on either reading, or else $self->wordsep.)
+
+The first two arguments may be either readings or reading IDs.
+
=begin testing
use Text::Tradition;
my $c = $t->collation;
my $rno = scalar $c->readings;
-# Split n21 for testing purposes
+# Split n21 ('unto') for testing purposes
my $new_r = $c->add_reading( { 'id' => 'n21p0', 'text' => 'un', 'join_next' => 1 } );
my $old_r = $c->reading( 'n21' );
$old_r->alter_text( 'to' );
$c->del_path( 'n20', 'n21', 'A' );
$c->add_path( 'n20', 'n21p0', 'A' );
$c->add_path( 'n21p0', 'n21', 'A' );
+$c->add_relationship( 'n21', 'n22', { type => 'collated', scope => 'local' } );
$c->flatten_ranks();
ok( $c->reading( 'n21p0' ), "New reading exists" );
is( scalar $c->readings, $rno, "Reading add offset by flatten_ranks" );
unless ref( $second ) eq 'Text::Tradition::Collation::Reading';
return( $first, $second, $arg );
}
+
+=head2 duplicate_reading( $reading, @witlist )
+
+Split the given reading into two, so that the new reading is in the path for
+the witnesses given in @witlist. If the result is that certain non-colocated
+relationships (e.g. transpositions) are no longer valid, these will be removed.
+Returns the newly-created reading.
+
+=begin testing
+
+use Text::Tradition;
+
+my $st = Text::Tradition->new( 'input' => 'Self', 'file' => 't/data/collatecorr.xml' );
+is( ref( $st ), 'Text::Tradition', "Got a tradition from test file" );
+ok( $st->has_witness('Ba96'), "Tradition has the affected witness" );
+
+my $sc = $st->collation;
+my $numr = 17;
+ok( $sc->reading('n131'), "Tradition has the affected reading" );
+is( scalar( $sc->readings ), $numr, "There are $numr readings in the graph" );
+is( $sc->end->rank, 14, "There are fourteen ranks in the graph" );
+
+# Detach the erroneously collated reading
+my( $newr, @del_rdgs ) = $sc->duplicate_reading( 'n131', 'Ba96' );
+ok( $newr, "New reading was created" );
+ok( $sc->reading('n131_0'), "Detached the bad collation with a new reading" );
+is( scalar( $sc->readings ), $numr + 1, "A reading was added to the graph" );
+is( $sc->end->rank, 10, "There are now only ten ranks in the graph" );
+my $csucc = $sc->common_successor( 'n131', 'n131_0' );
+is( $csucc->id, 'n136', "Found correct common successor to duped reading" );
+
+# Check that the bad transposition is gone
+is( scalar @del_rdgs, 1, "Deleted reading was returned by API call" );
+is( $sc->get_relationship( 'n130', 'n135' ), undef, "Bad transposition relationship is gone" );
+
+# The collation should not be fixed
+my @pairs = $sc->identical_readings();
+is( scalar @pairs, 0, "Not re-collated yet" );
+# Fix the collation
+ok( $sc->merge_readings( 'n124', 'n131_0' ), "Collated the readings correctly" );
+@pairs = $sc->identical_readings( start => 'n124', end => $csucc->id );
+is( scalar @pairs, 3, "Found three more identical readings" );
+is( $sc->end->rank, 11, "The ranks shifted appropriately" );
+$sc->flatten_ranks();
+is( scalar( $sc->readings ), $numr - 3, "Now we are collated correctly" );
+
+=end testing
+
+=cut
+
+sub duplicate_reading {
+ my( $self, $r, @wits ) = @_;
+ # Add the new reading, duplicating $r.
+ unless( ref( $r ) eq 'Text::Tradition::Collation::Reading' ) {
+ $r = $self->reading( $r );
+ }
+ throw( "Cannot duplicate a meta-reading" )
+ if $r->is_meta;
+
+ # Get all the reading attributes and duplicate them.
+ my $rmeta = Text::Tradition::Collation::Reading->meta;
+ my %args;
+ foreach my $attr( $rmeta->get_all_attributes ) {
+ next if $attr->name =~ /^_/;
+ my $acc = $attr->get_read_method;
+ if( !$acc && $attr->has_applied_traits ) {
+ my $tr = $attr->applied_traits;
+ if( $tr->[0] =~ /::(Array|Hash)$/ ) {
+ my $which = $1;
+ my %methods = reverse %{$attr->handles};
+ $acc = $methods{elements};
+ $args{$attr->name} = $which eq 'Array'
+ ? [ $r->$acc ] : { $r->$acc };
+ }
+ } else {
+ $args{$attr->name} = $r->$acc if $acc;
+ }
+ }
+ # By definition the new reading will no longer be common.
+ $args{is_common} = 0;
+ # The new reading also needs its own ID.
+ $args{id} = $self->_generate_dup_id( $r->id );
+
+ # Try to make the new reading.
+ my $newr = $self->add_reading( \%args );
+ # The old reading is also no longer common.
+ $r->is_common( 0 );
+
+ # For each of the witnesses, dissociate from the old reading and
+ # associate with the new.
+ foreach my $wit ( @wits ) {
+ my $prior = $self->prior_reading( $r, $wit );
+ my $next = $self->next_reading( $r, $wit );
+ $self->del_path( $prior, $r, $wit );
+ $self->add_path( $prior, $newr, $wit );
+ $self->del_path( $r, $next, $wit );
+ $self->add_path( $newr, $next, $wit );
+ }
+
+ # If the graph is ranked, we need to look for relationships that are now
+ # invalid (i.e. 'non-colocation' types that might now be colocated) and
+ # remove them. If not, we can skip it.
+ my $succ;
+ my %rrk;
+ my @deleted_relations;
+ if( $self->end->has_rank ) {
+ # Find the point where we can stop checking
+ $succ = $self->common_successor( $r, $newr );
+
+ # Hash the existing ranks
+ foreach my $rdg ( $self->readings ) {
+ $rrk{$rdg->id} = $rdg->rank;
+ }
+ # Calculate the new ranks
+ $self->calculate_ranks();
+
+ # Check for invalid non-colocated relationships among changed-rank readings
+ # from where the ranks start changing up to $succ
+ my $lastrank = $succ->rank;
+ foreach my $rdg ( $self->readings ) {
+ next if $rdg->rank > $lastrank;
+ next if $rdg->rank == $rrk{$rdg->id};
+ my @noncolo = $rdg->related_readings( sub { !$_[0]->colocated } );
+ next unless @noncolo;
+ foreach my $nc ( @noncolo ) {
+ unless( $self->relations->verify_or_delete( $rdg, $nc ) ) {
+ push( @deleted_relations, [ $rdg->id, $nc->id ] );
+ }
+ }
+ }
+ }
+ return ( $newr, @deleted_relations );
+}
+
+sub _generate_dup_id {
+ my( $self, $rid ) = @_;
+ my $newid;
+ my $i = 0;
+ while( !$newid ) {
+ $newid = $rid."_$i";
+ if( $self->has_reading( $newid ) ) {
+ $newid = '';
+ $i++;
+ }
+ }
+ return $newid;
+}
+
### Path logic
sub add_path {
@args = @_;
}
- # We only need the IDs for adding paths to the graph, not the reading
+ # We only need the IDs for removing paths from the graph, not the reading
# objects themselves.
my( $source, $target, $wit ) = $self->_stringify_args( @args );
if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
$self->sequence->delete_edge_attribute( $source, $target, $wit );
}
- unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
+ unless( $self->sequence->has_edge_attributes( $source, $target ) ) {
$self->sequence->delete_edge( $source, $target );
$self->relations->delete_equivalence_edge( $source, $target );
}
my $self = shift;
my( $source, $target, $opts ) = $self->_stringify_args( @_ );
my( @vectors ) = $self->relations->add_relationship( $source, $target, $opts );
- $self->_graphcalc_done(0);
+ foreach my $v ( @vectors ) {
+ next unless $self->get_relationship( $v )->colocated;
+ if( $self->reading( $v->[0] )->has_rank && $self->reading( $v->[1] )->has_rank
+ && $self->reading( $v->[0] )->rank ne $self->reading( $v->[1] )->rank ) {
+ $self->_graphcalc_done(0);
+ $self->_clear_cache;
+ last;
+ }
+ }
return @vectors;
}
if( @args == 1 && ref( $args[0] ) eq 'ARRAY' ) {
@args = @{$_[0]};
}
- my( $source, $target ) = $self->_stringify_args( @args );
- $self->$orig( $source, $target );
+ my @stringargs = $self->_stringify_args( @args );
+ $self->$orig( @stringargs );
};
=head2 reading_witnesses( $reading )
foreach my $edge ( @edges ) {
# Do we need to output this edge?
if( $used{$edge->[0]} && $used{$edge->[1]} ) {
- my $label = $self->_path_display_label( $self->path_witnesses( $edge ) );
+ my $label = $self->_path_display_label( $opts,
+ $self->path_witnesses( $edge ) );
my $variables = { %edge_attrs, 'label' => $label };
# Account for the rank gap if necessary
# Add substitute start and end edges if necessary
foreach my $node ( keys %substart ) {
- my $witstr = $self->_path_display_label ( $self->path_witnesses( $substart{$node}, $node ) );
+ my $witstr = $self->_path_display_label( $opts,
+ $self->path_witnesses( $substart{$node}, $node ) );
my $variables = { %edge_attrs, 'label' => $witstr };
my $nrdg = $self->reading( $node );
if( $nrdg->has_rank && $nrdg->rank > $startrank ) {
$dot .= "\t\"__SUBSTART__\" -> \"$node\" $varopts;\n";
}
foreach my $node ( keys %subend ) {
- my $witstr = $self->_path_display_label ( $self->path_witnesses( $node, $subend{$node} ) );
+ my $witstr = $self->_path_display_label( $opts,
+ $self->path_witnesses( $node, $subend{$node} ) );
my $variables = { %edge_attrs, 'label' => $witstr };
my $varopts = _dot_attr_string( $variables );
$dot .= "\t\"$node\" -> \"__SUBEND__\" $varopts;\n";
# witnesses only where the main witness is not also in the list.
sub _path_display_label {
my $self = shift;
+ my $opts = shift;
my %wits;
map { $wits{$_} = 1 } @_;
}
}
- # See if we are in a majority situation.
- my $maj = scalar( $self->tradition->witnesses ) * 0.6;
- $maj = $maj > 5 ? $maj : 5;
- if( scalar keys %wits > $maj ) {
- unshift( @disp_ac, 'majority' );
- return join( ', ', @disp_ac );
- } else {
+ if( $opts->{'explicit_wits'} ) {
return join( ', ', sort keys %wits );
+ } else {
+ # See if we are in a majority situation.
+ my $maj = scalar( $self->tradition->witnesses ) * 0.6;
+ $maj = $maj > 5 ? $maj : 5;
+ if( scalar keys %wits > $maj ) {
+ unshift( @disp_ac, 'majority' );
+ return join( ', ', @disp_ac );
+ } else {
+ return join( ', ', sort keys %wits );
+ }
}
}
my %graph_attributes = ( 'version' => 'string' );
# Graph attributes include those of Tradition and those of Collation.
my %gattr_from;
+ # TODO Use meta introspection method from duplicate_reading to do this
+ # instead of naming custom keys.
my $tmeta = $self->tradition->meta;
my $cmeta = $self->meta;
map { $gattr_from{$_->name} = 'Tradition' } $tmeta->get_all_attributes;
my @result;
# Make the header row
$csv->combine( map { $_->{'witness'} } @{$table->{'alignment'}} );
- push( @result, decode_utf8( $csv->string ) );
+ push( @result, $csv->string );
# Make the rest of the rows
foreach my $idx ( 0 .. $table->{'length'} - 1 ) {
my @rowobjs = map { $_->{'tokens'}->[$idx] } @{$table->{'alignment'}};
my @row = map { $_ ? $_->{'t'}->text : $_ } @rowobjs;
$csv->combine( @row );
- push( @result, decode_utf8( $csv->string ) );
+ push( @result, $csv->string );
}
return join( "\n", @result );
}
sub alignment_table {
my( $self ) = @_;
- $self->calculate_ranks() unless $self->_graphcalc_done;
return $self->cached_table if $self->has_cached_table;
# Make sure we can do this
throw( "Need a linear graph in order to make an alignment table" )
unless $self->linear;
- $self->calculate_ranks unless $self->end->has_rank;
-
+ $self->calculate_ranks()
+ unless $self->_graphcalc_done && $self->end->has_rank;
+
my $table = { 'alignment' => [], 'length' => $self->end->rank - 1 };
my @all_pos = ( 1 .. $self->end->rank - 1 );
foreach my $wit ( sort { $a->sigil cmp $b->sigil } $self->tradition->witnesses ) {
is( $c->alignment_table, $table, "Cached table returned upon second call" );
$c->calculate_ranks;
is( $c->alignment_table, $table, "Cached table retained with no rank change" );
-$c->add_relationship( 'n24', 'n23', { 'type' => 'spelling' } );
-isnt( $c->alignment_table, $table, "Alignment table changed after relationship add" );
+$c->add_relationship( 'n13', 'n23', { type => 'repetition' } );
+is( $c->alignment_table, $table, "Alignment table unchanged after non-colo relationship add" );
+$c->add_relationship( 'n24', 'n23', { type => 'spelling' } );
+isnt( $c->alignment_table, $table, "Alignment table changed after colo relationship add" );
=end testing
=cut
sub flatten_ranks {
- my $self = shift;
+ my ( $self, %args ) = shift;
my %unique_rank_rdg;
my $changed;
+ foreach my $p ( $self->identical_readings( %args ) ) {
+ # say STDERR "Combining readings at same rank: @$p";
+ $changed = 1;
+ $self->merge_readings( @$p );
+ # TODO see if this now makes a common point.
+ }
+ # If we merged readings, the ranks are still fine but the alignment
+ # table is wrong. Wipe it.
+ $self->wipe_table() if $changed;
+}
+
+=head2 identical_readings
+=head2 identical_readings( start => $startnode, end => $endnode )
+=head2 identical_readings( startrank => $startrank, endrank => $endrank )
+
+Goes through the graph identifying all pairs of readings that appear to be
+identical, and therefore able to be merged into a single reading. Returns the
+relevant identical pairs. Can be restricted to run over only a part of the
+graph, specified either by node or by rank.
+
+=cut
+
+sub identical_readings {
+ my ( $self, %args ) = @_;
+ # Find where we should start and end.
+ my $startrank = $args{startrank} || 0;
+ if( $args{start} ) {
+ throw( "Starting reading has no rank" ) unless $self->reading( $args{start} )
+ && $self->reading( $args{start} )->has_rank;
+ $startrank = $self->reading( $args{start} )->rank;
+ }
+ my $endrank = $args{endrank} || $self->end->rank;
+ if( $args{end} ) {
+ throw( "Ending reading has no rank" ) unless $self->reading( $args{end} )
+ && $self->reading( $args{end} )->has_rank;
+ $endrank = $self->reading( $args{end} )->rank;
+ }
+
+ # Make sure the ranks are correct.
+ unless( $self->_graphcalc_done ) {
+ $self->calculate_ranks;
+ }
+ # Go through the readings looking for duplicates.
+ my %unique_rank_rdg;
+ my @pairs;
foreach my $rdg ( $self->readings ) {
next unless $rdg->has_rank;
- my $key = $rdg->rank . "||" . $rdg->text;
+ my $rk = $rdg->rank;
+ next if $rk > $endrank || $rk < $startrank;
+ my $key = $rk . "||" . $rdg->text;
if( exists $unique_rank_rdg{$key} ) {
# Make sure they don't have different grammatical forms
my $ur = $unique_rank_rdg{$key};
if( $rdg->is_identical( $ur ) ) {
- # Combine!
- #say STDERR "Combining readings at same rank: $key";
- $changed = 1;
- $self->merge_readings( $unique_rank_rdg{$key}, $rdg );
- # TODO see if this now makes a common point.
+ push( @pairs, [ $ur, $rdg ] );
}
} else {
$unique_rank_rdg{$key} = $rdg;
}
- }
- # If we merged readings, the ranks are still fine but the alignment
- # table is wrong. Wipe it.
- $self->wipe_table() if $changed;
+ }
+
+ return @pairs;
}