sub reading_sequence {
my( $self, $start, $end, $witness, $backup ) = @_;
- $witness = 'base text' unless $witness;
+ $witness = $self->baselabel unless $witness;
my @readings = ( $start );
my %seen;
my $n = $start;
- while( $n ne $end ) {
+ while( $n && $n ne $end ) {
if( exists( $seen{$n->name()} ) ) {
warn "Detected loop at " . $n->name();
last;
my $next_reading = $graph->next_reading( $reading, $witpath );
Returns the reading that follows the given reading along the given witness
-path. TODO These are badly named.
+path.
=cut
my $prior_reading = $graph->prior_reading( $reading, $witpath );
Returns the reading that precedes the given reading along the given witness
-path. TODO These are badly named.
+path.
=cut
my $base_le;
my $alt_le;
foreach my $le ( @linked_paths ) {
- if( $le->name eq 'base text' ) {
+ if( $le->name eq $self->baselabel ) {
$base_le = $le;
} else {
my @le_wits = $self->witnesses_of_label( $le->name );
$wit->path( \@wit_path );
# Detect the common readings.
- if( @common_readings ) {
- my @cn;
- foreach my $n ( @wit_path ) {
- push( @cn, $n ) if grep { $_ eq $n } @common_readings;
- }
- @common_readings = ();
- push( @common_readings, @cn );
- } else {
- push( @common_readings, @wit_path );
- }
+ @common_readings = _find_common( \@common_readings, \@wit_path );
}
# Mark all the nodes as either common or not.
return @common_readings;
}
+sub _find_common {
+ my( $common_readings, $new_path ) = @_;
+ my @cr;
+ if( @$common_readings ) {
+ foreach my $n ( @$new_path ) {
+ push( @cr, $n ) if grep { $_ eq $n } @$common_readings;
+ }
+ } else {
+ push( @cr, @$new_path );
+ }
+ return @cr;
+}
+
+sub _remove_common {
+ my( $common_readings, $divergence ) = @_;
+ my @cr;
+ my %diverged;
+ map { $diverged{$_->name} = 1 } @$divergence;
+ foreach( @$common_readings ) {
+ push( @cr, $_ ) unless $diverged{$_->name};
+ }
+ return @cr;
+}
+
+
# An alternative to walk_witness_paths, for use when a collation is
# constructed from a base text and an apparatus. Also modifies the
# collation graph to remove all 'base text' paths and replace them
sub walk_and_expand_base {
my( $self, $end ) = @_;
+ my @common_readings;
foreach my $wit ( @{$self->tradition->witnesses} ) {
my $sig = $wit->sigil;
my $post_sig;
$post_sig = $wit->post_correctione
if $wit->has_post_correctione;
+
+ # $DB::single = 1 if $wit->sigil eq 'Vb11';
my @wit_path = $self->reading_sequence( $self->start, $end, $sig );
$wit->path( \@wit_path );
$self->connect_readings_for_witness( $wit );
+ @common_readings = _find_common( \@common_readings, \@wit_path );
# If there is a post-correctio, get its path and compare.
# Add a correction range for each divergence.
if( $post_sig ) {
- # TODO this is looping
my @corr_wit_path = $self->reading_sequence( $self->start, $end,
"$sig$post_sig", $sig );
my $diverged = 0;
my $last_common;
my @correction;
+ $DB::single = 1 if $sig eq 'Vb12';
foreach my $rdg ( @corr_wit_path ) {
if( exists( $in_orig{$rdg->name} ) && !$diverged ) {
# We are reading the same here
$last_common = $in_orig{$rdg->name};
- next;
} elsif ( exists( $in_orig{$rdg->name} ) ) {
# We have been diverging but are reading the same again.
# Add the correction to the witness.
- my $offset = $last_common;
- my $length = $in_orig{$rdg->name} - $last_common;
- $wit->add_correction( $offset, $length, \@correction );
+ my $offset = $last_common + 1;
+ my $length = $in_orig{$rdg->name} - $offset;
+ $wit->add_correction( $offset, $length, @correction );
$diverged = 0;
+ @common_readings = _remove_common( \@common_readings, \@correction );
@correction = ();
$last_common = $in_orig{$rdg->name};
} elsif( $diverged ) {
push( @correction, $rdg );
}
}
- # Add any divergence that is at the end of the line
+ # Add any divergence that is at the end of the text
if( $diverged ) {
- $wit->add_correction( $last_common, $#wit_path, \@correction );
+ $wit->add_correction( $last_common+1, $#wit_path, \@correction );
}
}
}
my $node_pos = {};
foreach my $wit ( @{$self->tradition->witnesses} ) {
- # First we walk each path, making a matrix for each witness that
- # corresponds to its eventual position identifier. Common nodes
- # always start a new row, and are thus always in the first column.
-
- my $wit_matrix = [];
- my $cn = 0; # We should hit the common readings in order.
- my $row = [];
- foreach my $wn ( @{$wit->path} ) {
- if( $wn eq $ordered_common[$cn] ) {
- # Set up to look for the next common node, and
- # start a new row of words.
- $cn++;
- push( @$wit_matrix, $row ) if scalar( @$row );
- $row = [];
- }
- push( @$row, $wn );
- }
- push( @$wit_matrix, $row ); # Push the last row onto the matrix
-
- # Now we have a matrix per witness, so that each row in the
- # matrix begins with a common node, and continues with all the
- # variant words that appear in the witness. We turn this into
- # real positions in row,cell format. But we need some
- # trickery in order to make sure that each node gets assigned
- # to only one position.
-
- foreach my $li ( 1..scalar(@$wit_matrix) ) {
- foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
- my $reading = $wit_matrix->[$li-1]->[$di-1];
- my $position = "$li,$di";
- # If we have seen this node before, we need to compare
- # its position with what went before.
- unless( $reading->has_position &&
- _cmp_position( $position, $reading->position ) < 1 ) {
- # The new position ID replaces the old one.
- $reading->position( $position );
- } # otherwise, the old position needs to stay.
- }
- }
+ print STDERR "Calculating positions in " . $wit->sigil . "\n";
+ _update_positions_from_path( $wit->path, @ordered_common );
+ _update_positions_from_path( $wit->corrected_path, @ordered_common )
+ if $wit->has_post_correctione;
+ }
+
+ # DEBUG
+ foreach my $r ( $self->readings() ) {
+ print STDERR "Reading " . $r->name . "/" . $r->label . " has no position\n"
+ unless( $r->has_position );
}
$self->init_lemmata();
}
+sub _update_positions_from_path {
+ my( $path, @ordered_common ) = @_;
+
+ # First we walk the given path, making a matrix for the witness
+ # that corresponds to its eventual position identifier. Common
+ # nodes always start a new row, and are thus always in the first
+ # column.
+
+ my $wit_matrix = [];
+ my $cn = 0; # We should hit the common readings in order.
+ my $row = [];
+ foreach my $wn ( @{$path} ) {
+ if( $wn eq $ordered_common[$cn] ) {
+ # Set up to look for the next common node, and
+ # start a new row of words.
+ $cn++;
+ push( @$wit_matrix, $row ) if scalar( @$row );
+ $row = [];
+ }
+ push( @$row, $wn );
+ }
+ push( @$wit_matrix, $row ); # Push the last row onto the matrix
+
+ # Now we have a matrix per witness, so that each row in the
+ # matrix begins with a common node, and continues with all the
+ # variant words that appear in the witness. We turn this into
+ # real positions in row,cell format. But we need some
+ # trickery in order to make sure that each node gets assigned
+ # to only one position.
+
+ foreach my $li ( 1..scalar(@$wit_matrix) ) {
+ foreach my $di ( 1..scalar(@{$wit_matrix->[$li-1]}) ) {
+ my $reading = $wit_matrix->[$li-1]->[$di-1];
+ my $position = "$li,$di";
+ # If we have seen this node before, we need to compare
+ # its position with what went before.
+ unless( $reading->has_position &&
+ _cmp_position( $position, $reading->position ) < 1 ) {
+ # The new position ID replaces the old one.
+ $reading->position( $position );
+ } # otherwise, the old position needs to stay.
+ }
+ }
+}
+
sub _cmp_position {
my( $a, $b ) = @_;
if ( $a && $b ) {
=cut
+my $SHORT = 20;
+
sub merge_base {
my( $collation, $base_file, @app_entries ) = @_;
my @base_line_starts = read_base( $base_file, $collation );
foreach my $app ( @app_entries ) {
my( $line, $num ) = split( /\./, $app->{_id} );
# DEBUG with a short graph
- # last if $line > 2;
+ last if $SHORT && $line > $SHORT;
# DEBUG for problematic entries
- my $scrutinize = "7.3";
+ my $scrutinize = "";
my $first_line_reading = $base_line_starts[ $line ];
my $too_far = $base_line_starts[ $line+1 ];
# These are no longer common readings; unmark them as such.
my @lemma_readings = $collation->reading_sequence( $lemma_start,
$lemma_end );
- map { $_->set_attribute( 'class', 'lemma' ) } @lemma_readings;
+ map { $_->make_variant } @lemma_readings;
}
# Now we have our lemma readings; we add the variant readings
# any explicit post-correctione readings and add the
# relevant path.
my @mss = grep { $app->{$_} eq $k } keys( %$app );
+ # Keep track of what witnesses we have seen.
+ @all_witnesses{ @mss } = ( 1 ) x scalar( @mss );
foreach my $m ( @mss ) {
my $base = _is_post_corr( $m );
next unless $base;
my @lem = $collation->reading_sequence( $lemma_start, $lemma_end );
+ $collation->add_path( $collation->prior_reading( $lem[0] ), $lem[0], $m );
foreach my $i ( 0 .. $#lem-1 ) {
- $collation->add_path( $lem[$i], $lem[$i++], $m );
+ $collation->add_path( $lem[$i], $lem[++$i], $m );
}
+ $collation->add_path( $lem[-1], $collation->next_reading( $lem[-1] ), $m );
}
+ next;
}
my @variant = split( /\s+/, $app->{$k} );
@variant = () if $app->{$k} eq '/'; # This is an omission.
my $started = 0;
my $wordref = 0;
my $lineref = scalar @$lineref_array;
+ last if $SHORT && $lineref > $SHORT;
foreach my $w ( @words ) {
my $readingref = join( ',', $lineref, ++$wordref );
my $reading = $collation->add_reading( $readingref );
}
if( $last_reading ) {
my $path = $collation->add_path( $last_reading, $reading,
- "base text" );
+ $collation->baselabel );
$path->set_attribute( 'class', 'basetext' );
$last_reading = $reading;
} # TODO there should be no else here...
close BASE;
# Ending point for all texts
my $endpoint = $collation->add_reading( '#END#' );
- $collation->add_path( $last_reading, $endpoint, "base text" );
+ $collation->add_path( $last_reading, $endpoint, $collation->baselabel );
push( @$lineref_array, $endpoint );
return( @$lineref_array );
# Start the list of distinct readings with those readings in the lemma.
my @distinct_readings;
while( $lemma_start ne $lemma_end ) {
- push( @distinct_readings, [ $lemma_start, 'base text' ] );
+ push( @distinct_readings, [ $lemma_start, $collation->baselabel ] );
$lemma_start = $collation->next_reading( $lemma_start );
}
- push( @distinct_readings, [ $lemma_end, 'base text' ] );
+ push( @distinct_readings, [ $lemma_end, $collation->baselabel ] );
while( scalar @readings ) {
# sigil,return the base witness. If not, return a false value.
sub _is_post_corr {
my( $sigil ) = @_;
- if( $sigil =~ /^(.*?)(\s*\(p\.\s*c\.\))$/ ) {
+ if( $sigil =~ /^(.*?)(\s*\(?p\.\s*c\.\)?)$/ ) {
return $1;
}
return undef;