if( ref( $arg ) eq 'Text::Tradition::Collation::Reading' ) {
$arg = $arg->id;
}
-
# Remove the reading from the graphs.
$self->sequence->delete_vertex( $arg );
$self->relations->delete_vertex( $arg );
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
- my( $kept, $deleted ) = $self->_stringify_args( @_ );
+ my( $kept, $deleted, $combine_char ) = $self->_stringify_args( @_ );
# The kept reading should inherit the paths and the relationships
# of the deleted reading.
my @vector = ( $kept );
push( @vector, $path->[1] ) if $path->[0] eq $deleted;
unshift( @vector, $path->[0] ) if $path->[1] eq $deleted;
+ next if $vector[0] eq $vector[1]; # Don't add a self loop
my %wits = %{$self->sequence->get_edge_attributes( @$path )};
$self->sequence->add_edge( @vector );
my $fwits = $self->sequence->get_edge_attributes( @vector );
foreach my $rel ( $self->relations->edges_at( $deleted ) ) {
my @vector = ( $kept );
push( @vector, $rel->[0] eq $deleted ? $rel->[1] : $rel->[0] );
+ next if $vector[0] eq $vector[1]; # Don't add a self loop
# Is there a relationship here already? If so, keep it.
# TODO Warn about conflicting relationships
next if $self->relations->has_edge( @vector );
}
# Do the deletion deed.
+ if( $combine_char ) {
+ my $kept_obj = $self->reading( $kept );
+ my $new_text = join( $combine_char, $kept_obj->text,
+ $self->reading( $deleted )->text );
+ $kept_obj->alter_text( $new_text );
+ }
$self->del_reading( $deleted );
}
sub del_path {
my $self = shift;
+ my @args;
+ if( ref( $_[0] ) eq 'ARRAY' ) {
+ my $e = shift @_;
+ @args = ( @$e, @_ );
+ } else {
+ @args = @_;
+ }
# We only need the IDs for adding paths to the graph, not the reading
# objects themselves.
- my( $source, $target, $wit ) = $self->_stringify_args( @_ );
+ my( $source, $target, $wit ) = $self->_stringify_args( @args );
if( $self->sequence->has_edge_attribute( $source, $target, $wit ) ) {
- $self->sequence->del_edge_attribute( $source, $target, $wit );
+ $self->sequence->delete_edge_attribute( $source, $target, $wit );
}
unless( keys %{$self->sequence->get_edge_attributes( $source, $target )} ) {
$self->sequence->delete_edge( $source, $target );
# Check the options
if( !defined $options->{'type'} ||
- $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|repetition|transposition)$/i ) {
+ $options->{'type'} !~ /^(spelling|orthographic|grammatical|meaning|lookalike|repetition|transposition)$/i ) {
my $t = $options->{'type'} ? $options->{'type'} : '';
- return( undef, "Invalid or missing type" . $options->{'type'} );
+ return( undef, "Invalid or missing type " . $options->{'type'} );
}
- if( $options->{'type'} =~ /^(spelling|orthographic|grammatical|meaning)$/ ) {
+ unless ( $options->{'type'} =~ /^(repetition|transposition)$/ ) {
$options->{'colocated'} = 1;
}
# to a path loop for any witness. First make a lookup table of all the
# readings related to either the source or the target.
my @proposed_related = ( $source, $target );
- push( @proposed_related, $source->related_readings( 'colocated' ) );
- push( @proposed_related, $target->related_readings( 'colocated' ) );
+ push( @proposed_related, $self->related_readings( $source, 'colocated' ) );
+ push( @proposed_related, $self->related_readings( $target, 'colocated' ) );
my %pr_ids;
- map { $pr_ids{ $_->id } = 1 } @proposed_related;
+ map { $pr_ids{ $_ } = 1 } @proposed_related;
# None of these proposed related readings should have a neighbor that
# is also in proposed_related.
$seen{$n->id} = 1;
my $next = $self->next_reading( $n, $witness, $backup );
- $DB::single = 1 if $next->id eq $end->id;
unless( $next ) {
warn "Did not find any path for $witness from reading " . $n->id;
last;
foreach my $n ( $self->sequence->successors( $r->id ) ) {
my( $tfrom, $tto ) = ( $rel_containers{$r->id},
$rel_containers{$n} );
+ $DB::single = 1 unless $tfrom && $tto;
$topo_graph->add_edge( $tfrom, $tto );
}
}
my @base_line_starts = read_base( $base_file, $collation );
my %all_witnesses;
- my @unwitnessed_lemma_nodes;
foreach my $app ( @app_entries ) {
my( $line, $num ) = split( /\./, $app->{_id} );
# DEBUG with a short graph
my %seen;
while( $lemma_start ne $too_far ) {
# Loop detection
- if( $seen{ $lemma_start->name() } ) {
- warn "Detected loop at " . $lemma_start->name() .
+ if( $seen{ $lemma_start->id() } ) {
+ warn "Detected loop at " . $lemma_start->id() .
", ref $line,$num";
last;
}
- $seen{ $lemma_start->name() } = 1;
+ $seen{ $lemma_start->id() } = 1;
# Try to match the lemma.
my $unmatch = 0;
foreach my $k ( grep { /^rdg/ } keys( %$app ) ) {
my @mss = grep { $app->{$_} eq $k } keys( %$app );
- # Keep track of lemma nodes that don't actually appear in
- # any MSS; we will want to remove them from the collation.
- push( @unwitnessed_lemma_nodes, @lemma_set )
- if !@mss && $k eq 'rdg_0';
-
# Keep track of what witnesses we have seen.
@all_witnesses{ @mss } = ( 1 ) x scalar( @mss );
# Keep track of which witnesses bear corrected readings here.
my $ctr = 0;
foreach my $vw ( @variant ) {
my $vwname = "$k/$line.$num.$ctr"; $ctr++;
- my $vwreading = $collation->add_reading( $vwname );
- $vwreading->text( $vw );
+ my $vwreading = $collation->add_reading( {
+ 'id' => $vwname,
+ 'text' => $vw } );
push( @variant_readings, $vwreading );
}
foreach my $rkey ( keys %$variant_objects ) {
# Object is argument list for splice, so:
# offset, length, replacements
- my $edit_object = [ $lemma_start->name,
+ my $edit_object = [ $lemma_start->id,
scalar( @lemma_set ),
$variant_objects->{$rkey}->{reading} ];
foreach my $ms ( @{$variant_objects->{$rkey}->{mss}} ) {
# ones we have created so far. Also remove any unwitnessed
# lemma nodes (TODO unless we are treating base as witness)
foreach ( $collation->paths() ) {
- $collation->del_path( $_ );
- }
- foreach( @unwitnessed_lemma_nodes ) {
- $collation->del_reading( $_ );
- # TODO do we need to delete any relationship paths here?
+ $collation->del_path( $_, $collation->baselabel );
}
### HACKY HACKY Do some one-off path corrections here.
# Now walk paths and calculate positional rank.
$collation->make_witness_paths();
+ # Now delete any orphaned readings.
+ foreach my $r ( $collation->sequence->isolated_vertices ) {
+ print STDERR "Deleting unconnected reading $r / " .
+ $collation->reading( $r )->text . "\n";
+ $collation->del_reading( $r );
+ }
+
KUL::HACK::post_path_hack( $collation );
# Have to check relationship validity at this point, because before that
# we had no paths.
# next unless $rel->equal_rank;
# unless( Text::Tradition::Collation::relationship_valid( $rel->from, $rel->to ) ) {
# warn sprintf( "Relationship type %s between %s and %s is invalid, deleting",
-# $rel->type, $rel->from->name, $rel->to->name );
+# $rel->type, $rel->from->id, $rel->to->id );
# }
# }
$collation->calculate_ranks();
# This array gives the first reading for each line. We put the
# common starting point in line zero.
- my $last_reading = $collation->start();
- $base_text_index{$last_reading->name} = 0;
+ my $last_reading = $collation->start;
+ $base_text_index{$last_reading->id} = 0;
my $lineref_array = [ $last_reading ]; # There is no line zero.
open( BASE, $base_file ) or die "Could not open file $base_file: $!";
last if $SHORTEND && $lineref > $SHORTEND;
foreach my $w ( @words ) {
my $readingref = join( ',', $lineref, ++$wordref );
- my $reading = $collation->add_reading( $readingref );
- $reading->text( $w );
+ my $reading = $collation->add_reading( { id => $readingref, text => $w } );
unless( $started ) {
push( @$lineref_array, $reading );
$started = 1;
# Ending point for all texts
$collation->add_path( $last_reading, $collation->end, $collation->baselabel );
push( @$lineref_array, $collation->end );
- $base_text_index{$collation->end->name} = $i;
+ $base_text_index{$collation->end->id} = $i;
return( @$lineref_array );
}
$labels{cmp_str( $r )} = $r;
}
foreach my $r( @$var ) {
- if( exists $labels{$r->label} &&
- $r->name ne $labels{$r->label}->name ) {
+ if( exists $labels{$r->text} &&
+ $r->id ne $labels{$r->text}->id ) {
if( $type eq 'repetition' ) {
# Repetition
- $collation->add_relationship( $r, $labels{$r->label}, \%rel_options );
+ $collation->add_relationship( $r, $labels{$r->text}, \%rel_options );
} else {
# Transposition
- $r->set_identical( $labels{$r->label} );
+ $r->set_identical( $labels{$r->text} );
}
}
}
sub apply_edits {
my( $collation, $edit_sequence, $debug ) = @_;
- my @lemma_text = $collation->reading_sequence( $collation->start,
- $collation->reading( '#END#' ) );
+ my @lemma_text = $collation->reading_sequence(
+ $collation->start, $collation->end );
my $drift = 0;
foreach my $correction ( @$edit_sequence ) {
my( $lemma_start, $length, $items ) = @$correction;
my $offset = $base_text_index{$lemma_start};
my $realoffset = $offset + $drift;
if( $debug ||
- $lemma_text[$realoffset]->name ne $lemma_start ) {
+ $lemma_text[$realoffset]->id ne $lemma_start ) {
my @this_phrase = @lemma_text[$realoffset..$realoffset+$length-1];
my @base_phrase;
my $i = $realoffset;
print STDERR sprintf( "Trying to replace %s (%s) starting at %d " .
"with %s (%s) with drift %d\n",
- join( ' ', map {$_->label} @base_phrase ),
- join( ' ', map {$_->name} @base_phrase ),
+ join( ' ', map {$_->text} @base_phrase ),
+ join( ' ', map {$_->id} @base_phrase ),
$realoffset,
- join( ' ', map {$_->label} @$items ),
- join( ' ', map {$_->name} @$items ),
+ join( ' ', map {$_->text} @$items ),
+ join( ' ', map {$_->id} @$items ),
$drift,
) if $debug;
- if( $lemma_text[$realoffset]->name ne $lemma_start ) {
+ if( $lemma_text[$realoffset]->id ne $lemma_start ) {
warn( sprintf( "Should be replacing %s (%s) with %s (%s) " .
"but %s (%s) is there instead",
- join( ' ', map {$_->label} @base_phrase ),
- join( ' ', map {$_->name} @base_phrase ),
- join( ' ', map {$_->label} @$items ),
- join( ' ', map {$_->name} @$items ),
- join( ' ', map {$_->label} @this_phrase ),
- join( ' ', map {$_->name} @this_phrase ),
+ join( ' ', map {$_->text} @base_phrase ),
+ join( ' ', map {$_->id} @base_phrase ),
+ join( ' ', map {$_->text} @$items ),
+ join( ' ', map {$_->id} @$items ),
+ join( ' ', map {$_->text} @this_phrase ),
+ join( ' ', map {$_->id} @this_phrase ),
) );
# next;
}