From: Jarkko Hietaniemi Date: Thu, 8 May 2003 17:46:26 +0000 (+0000) Subject: Upgrade to Tie::File 0.95. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=6ae23f4134087bc885284bd6ae11bfeb719e2459;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Tie::File 0.95. p4raw-id: //depot/perl@19450 --- diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm index 637d6cf..43fd1d6 100644 --- a/lib/Tie/File.pm +++ b/lib/Tie/File.pm @@ -1,18 +1,20 @@ package Tie::File; require 5.005; -use Carp; +use Carp ':DEFAULT', 'confess'; use POSIX 'SEEK_SET'; -use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'O_WRONLY', 'O_RDONLY'; +use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'LOCK_SH', 'O_WRONLY', 'O_RDONLY'; sub O_ACCMODE () { O_RDONLY | O_RDWR | O_WRONLY } -$VERSION = "0.93"; + +$VERSION = "0.95"; my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful -my %good_opt = map {$_ => 1, "-$_" => 1} - qw(memory dw_size mode recsep discipline autodefer autochomp); +my %good_opt = map {$_ => 1, "-$_" => 1} + qw(memory dw_size mode recsep discipline + autodefer autochomp autodefer_threshhold); sub TIEARRAY { if (@_ % 2 != 0) { @@ -35,7 +37,7 @@ sub TIEARRAY { # default is the larger of the default cache size and the # deferred-write buffer size (if specified) $opts{memory} = $DEFAULT_MEMORY_SIZE; - $opts{memory} = $opts{dw_size} + $opts{memory} = $opts{dw_size} if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE; # Dora Winifred Read } @@ -49,9 +51,8 @@ sub TIEARRAY { $opts{deferred_s} = 0; # count of total bytes in ->{deferred} $opts{deferred_max} = -1; # empty - # the cache is a hash instead of an array because it is likely to be - # sparsely populated - $opts{cache} = Tie::File::Cache->new($opts{memory}); + # What's a good way to arrange that this class can be overridden? + $opts{cache} = Tie::File::Cache->new($opts{memory}); # autodeferment is enabled by default $opts{autodefer} = 1 unless defined $opts{autodefer}; @@ -114,13 +115,13 @@ sub FETCH { my $rec; # check the defer buffer - if ($self->_is_deferring && exists $self->{deferred}{$n}) { - $rec = $self->{deferred}{$n}; - } else { - $rec = $self->_fetch($n); - } + $rec = $self->{deferred}{$n} if exists $self->{deferred}{$n}; + $rec = $self->_fetch($n) unless defined $rec; - $self->_chomp1($rec); + # inlined _chomp1 + substr($rec, - $self->{recseplen}) = "" + if defined $rec && $self->{autochomp}; + $rec; } # Chomp many records in-place; return nothing useful @@ -153,7 +154,7 @@ sub _fetch { } if ($#{$self->{offsets}} < $n) { - return if $self->{eof}; + return if $self->{eof}; # request for record beyond end of file my $o = $self->_fill_offsets_to($n); # If it's still undefined, there is no such record, so return 'undef' return unless defined $o; @@ -198,26 +199,18 @@ sub STORE { # 20020324 Wait, but this DOES alter the cache. TODO BUG? my $oldrec = $self->_fetch($n); - if (defined($self->{cache}->lookup($n))) { - $self->{cache}->update($n, $rec); - } - if (not defined $oldrec) { # We're storing a record beyond the end of the file $self->_extend_file_to($n+1); $oldrec = $self->{recsep}; } +# return if $oldrec eq $rec; # don't bother my $len_diff = length($rec) - length($oldrec); # length($oldrec) here is not consistent with text mode TODO XXX BUG - $self->_twrite($rec, $self->{offsets}[$n], length($oldrec)); - - # now update the offsets - # array slice goes from element $n+1 (the first one to move) - # to the end - for (@{$self->{offsets}}[$n+1 .. $#{$self->{offsets}}]) { - $_ += $len_diff; - } + $self->_mtwrite($rec, $self->{offsets}[$n], length($oldrec)); + $self->_oadjust([$n, 1, $rec]); + $self->{cache}->update($n, $rec); } sub _store_deferred { @@ -259,13 +252,8 @@ sub _delete_deferred { sub FETCHSIZE { my $self = shift; - my $n = $#{$self->{offsets}}; - # 20020317 Change this to binary search - unless ($self->{eof}) { - while (defined ($self->_fill_offsets_to($n+1))) { - ++$n; - } - } + my $n = $self->{eof} ? $#{$self->{offsets}} : $self->_fill_offsets; + my $top_deferred = $self->_defer_max; $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1; $n; @@ -310,10 +298,15 @@ sub STORESIZE { $self->{cache}->remove(grep $_ >= $len, $self->{cache}->ckeys); } +### OPTIMIZE ME +### It should not be necessary to do FETCHSIZE +### Just seek to the end of the file. sub PUSH { my $self = shift; $self->SPLICE($self->FETCHSIZE, scalar(@_), @_); -# $self->FETCHSIZE; # av.c takes care of this for me + + # No need to return: + # $self->FETCHSIZE; # because av.c takes care of this for me } sub POP { @@ -391,7 +384,6 @@ sub DELETE { sub EXISTS { my ($self, $n) = @_; return 1 if exists $self->{deferred}{$n}; - $self->_fill_offsets_to($n); # I think this is unnecessary $n < $self->FETCHSIZE; } @@ -447,6 +439,12 @@ sub _splice { $nrecs = $oldsize - $pos + $nrecs; $nrecs = 0 if $nrecs < 0; } + + # nrecs is too big---it really means "until the end" + # 20030507 + if ($nrecs + $pos > $oldsize) { + $nrecs = $oldsize - $pos; + } } $self->_fixrecs(@data); @@ -470,76 +468,52 @@ sub _splice { $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_] if defined $self->{offsets}[$_+1]; } + $self->_fill_offsets_to($pos+$nrecs); # Modify the file - $self->_twrite($data, $self->{offsets}[$pos], $oldlen); - - # update the offsets table part 1 - # compute the offsets of the new records: - my @new_offsets; - if (@data) { - push @new_offsets, $self->{offsets}[$pos]; - for (0 .. $#data-1) { - push @new_offsets, $new_offsets[-1] + length($data[$_]); + $self->_mtwrite($data, $self->{offsets}[$pos], $oldlen); + # Adjust the offsets table + $self->_oadjust([$pos, $nrecs, @data]); + + { # Take this read cache stuff out into a separate function + # You made a half-attempt to put it into _oadjust. + # Finish something like that up eventually. + # STORE also needs to do something similarish + + # update the read cache, part 1 + # modified records + for ($pos .. $pos+$nrecs-1) { + my $new = $data[$_-$pos]; + if (defined $new) { + $self->{cache}->update($_, $new); + } else { + $self->{cache}->remove($_); + } } - } - - # If we're about to splice out the end of the offsets table... - if ($pos + $nrecs >= @{$self->{offsets}}) { - $self->{eof} = 0; # ... the table is no longer complete - } - splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets); - - # update the offsets table part 2 - # adjust the offsets of the following old records - for ($pos+@data .. $#{$self->{offsets}}) { - $self->{offsets}[$_] += $datalen - $oldlen; - } - # If we scrubbed out all known offsets, regenerate the trivial table - # that knows that the file does indeed start at 0. - $self->{offsets}[0] = 0 unless @{$self->{offsets}}; - # If the file got longer, the offsets table is no longer complete - $self->{eof} = 0 if @data > $nrecs; - - - # Perhaps the following cache foolery could be factored out - # into a bunch of mor opaque cache functions. For example, - # it's odd to delete a record from the cache and then remove - # it from the LRU queue later on; there should be a function to - # do both at once. - - # update the read cache, part 1 - # modified records - for ($pos .. $pos+$nrecs-1) { - my $new = $data[$_-$pos]; - if (defined $new) { - $self->{cache}->update($_, $new); - } else { - $self->{cache}->remove($_); + + # update the read cache, part 2 + # moved records - records past the site of the change + # need to be renumbered + # Maybe merge this with the previous block? + { + my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys; + my @newkeys = map $_-$nrecs+@data, @oldkeys; + $self->{cache}->rekey(\@oldkeys, \@newkeys); } - } - # update the read cache, part 2 - # moved records - records past the site of the change - # need to be renumbered - # Maybe merge this with the previous block? - { - my @oldkeys = grep $_ >= $pos + $nrecs, $self->{cache}->ckeys; - my @newkeys = map $_-$nrecs+@data, @oldkeys; - $self->{cache}->rekey(\@oldkeys, \@newkeys); + # Now there might be too much data in the cache, if we spliced out + # some short records and spliced in some long ones. If so, flush + # the cache. + $self->_cache_flush; } - # Now there might be too much data in the cache, if we spliced out - # some short records and spliced in some long ones. If so, flush - # the cache. - $self->_cache_flush; - # Yes, the return value of 'splice' *is* actually this complicated wantarray ? @result : @result ? $result[-1] : undef; } + # write data into the file -# $data is the data to be written. +# $data is the data to be written. # it should be written at position $pos, and should overwrite # exactly $len of the following bytes. # Note that if length($data) > $len, the subsequent bytes will have to @@ -582,7 +556,7 @@ sub _twrite { $readpos += $br; $writepos += length $data; $data = $next_block; - } while $more_data; # BUG XXX TODO how could this have worked? + } while $more_data; $self->_seekb($writepos); $self->_write_record($next_block); @@ -590,6 +564,229 @@ sub _twrite { $self->_chop_file if $len_diff < 0; } +# _iwrite(D, S, E) +# Insert text D at position S. +# Let C = E-S-|D|. If C < 0; die. +# Data in [S,S+C) is copied to [S+D,S+D+C) = [S+D,E). +# Data in [S+C = E-D, E) is returned. Data in [E, oo) is untouched. +# +# In a later version, don't read the entire intervening area into +# memory at once; do the copying block by block. +sub _iwrite { + my $self = shift; + my ($D, $s, $e) = @_; + my $d = length $D; + my $c = $e-$s-$d; + local *FH = $self->{fh}; + confess "Not enough space to insert $d bytes between $s and $e" + if $c < 0; + confess "[$s,$e) is an invalid insertion range" if $e < $s; + + $self->_seekb($s); + read FH, my $buf, $e-$s; + + $D .= substr($buf, 0, $c, ""); + + $self->_seekb($s); + $self->_write_record($D); + + return $buf; +} + +# Like _twrite, but the data-pos-len triple may be repeated; you may +# write several chunks. All the writing will be done in +# one pass. Chunks SHALL be in ascending order and SHALL NOT overlap. +sub _mtwrite { + my $self = shift; + my $unwritten = ""; + my $delta = 0; + + @_ % 3 == 0 + or die "Arguments to _mtwrite did not come in groups of three"; + + while (@_) { + my ($data, $pos, $len) = splice @_, 0, 3; + my $end = $pos + $len; # The OLD end of the segment to be replaced + $data = $unwritten . $data; + $delta -= length($unwritten); + $unwritten = ""; + $pos += $delta; # This is where the data goes now + my $dlen = length $data; + $self->_seekb($pos); + if ($len >= $dlen) { # the data will fit + $self->_write_record($data); + $delta += ($dlen - $len); # everything following moves down by this much + $data = ""; # All the data in the buffer has been written + } else { # won't fit + my $writable = substr($data, 0, $len - $delta, ""); + $self->_write_record($writable); + $delta += ($dlen - $len); # everything following moves down by this much + } + + # At this point we've written some but maybe not all of the data. + # There might be a gap to close up, or $data might still contain a + # bunch of unwritten data that didn't fit. + my $ndlen = length $data; + if ($delta == 0) { + $self->_write_record($data); + } elsif ($delta < 0) { + # upcopy (close up gap) + if (@_) { + $self->_upcopy($end, $end + $delta, $_[1] - $end); + } else { + $self->_upcopy($end, $end + $delta); + } + } else { + # downcopy (insert data that didn't fit; replace this data in memory + # with _later_ data that doesn't fit) + if (@_) { + $unwritten = $self->_downcopy($data, $end, $_[1] - $end); + } else { + # Make the file longer to accomodate the last segment that doesn' + $unwritten = $self->_downcopy($data, $end); + } + } + } +} + +# Copy block of data of length $len from position $spos to position $dpos +# $dpos must be <= $spos +# +# If $len is undefined, go all the way to the end of the file +# and then truncate it ($spos - $dpos bytes will be removed) +sub _upcopy { + my $blocksize = 8192; + my ($self, $spos, $dpos, $len) = @_; + if ($dpos > $spos) { + die "source ($spos) was upstream of destination ($dpos) in _upcopy"; + } elsif ($dpos == $spos) { + return; + } + + while (! defined ($len) || $len > 0) { + my $readsize = ! defined($len) ? $blocksize + : $len > $blocksize ? $blocksize + : $len; + + my $fh = $self->{fh}; + $self->_seekb($spos); + my $bytes_read = read $fh, my $data, $readsize; + $self->_seekb($dpos); + if ($data eq "") { + $self->_chop_file; + last; + } + $self->_write_record($data); + $spos += $bytes_read; + $dpos += $bytes_read; + $len -= $bytes_read if defined $len; + } +} + +# Write $data into a block of length $len at position $pos, +# moving everything in the block forwards to make room. +# Instead of writing the last length($data) bytes from the block +# (because there isn't room for them any longer) return them. +sub _downcopy { + my $blocksize = 8192; + my ($self, $data, $pos, $len) = @_; + my $fh = $self->{fh}; + + while (! defined $len || $len > 0) { + my $readsize = ! defined($len) ? $blocksize + : $len > $blocksize? $blocksize : $len; + $self->_seekb($pos); + read $fh, my $old, $readsize; + $data .= $old; + $self->_seekb($pos); + my $writable = substr($data, 0, $readsize, ""); + last if $writable eq ""; + $self->_write_record($writable); + $len -= $readsize if defined $len; + $pos += $readsize; + } + return $data; +} + +# Adjust the object data structures following an '_mtwrite' +# Arguments are +# [$pos, $nrecs, @length] items +# indicating that $nrecs records were removed at $recpos (a record offset) +# and replaced with records of length @length... +# Arguments guarantee that $recpos is strictly increasing. +# No return value +sub _oadjust { + my $self = shift; + my $delta = 0; + my $delta_recs = 0; + my $prev_end = -1; + my %newkeys; + + for (@_) { + my ($pos, $nrecs, @data) = @$_; + $pos += $delta_recs; + + # Adjust the offsets of the records after the previous batch up + # to the first new one of this batch + for my $i ($prev_end+2 .. $pos - 1) { + $self->{offsets}[$i] += $delta; + $newkey{$i} = $i + $delta_recs; + } + + $prev_end = $pos + @data - 1; # last record moved on this pass + + # Remove the offsets for the removed records; + # replace with the offsets for the inserted records + my @newoff = ($self->{offsets}[$pos] + $delta); + for my $i (0 .. $#data) { + my $newlen = length $data[$i]; + push @newoff, $newoff[$i] + $newlen; + $delta += $newlen; + } + + for my $i ($pos .. $pos+$nrecs-1) { + last if $i+1 > $#{$self->{offsets}}; + my $oldlen = $self->{offsets}[$i+1] - $self->{offsets}[$i]; + $delta -= $oldlen; + } + +# # also this data has changed, so update it in the cache +# for (0 .. $#data) { +# $self->{cache}->update($pos + $_, $data[$_]); +# } +# if ($delta_recs) { +# my @oldkeys = grep $_ >= $pos + @data, $self->{cache}->ckeys; +# my @newkeys = map $_ + $delta_recs, @oldkeys; +# $self->{cache}->rekey(\@oldkeys, \@newkeys); +# } + + # replace old offsets with new + splice @{$self->{offsets}}, $pos, $nrecs+1, @newoff; + # What if we just spliced out the end of the offsets table? + # shouldn't we clear $self->{eof}? Test for this XXX BUG TODO + + $delta_recs += @data - $nrecs; # net change in total number of records + } + + # The trailing records at the very end of the file + if ($delta) { + for my $i ($prev_end+2 .. $#{$self->{offsets}}) { + $self->{offsets}[$i] += $delta; + } + } + + # If we scrubbed out all known offsets, regenerate the trivial table + # that knows that the file does indeed start at 0. + $self->{offsets}[0] = 0 unless @{$self->{offsets}}; + # If the file got longer, the offsets table is no longer complete + # $self->{eof} = 0 if $delta_recs > 0; + + # Now there might be too much data in the cache, if we spliced out + # some short records and spliced in some long ones. If so, flush + # the cache. + $self->_cache_flush; +} + # If a record does not already end with the appropriate terminator # string, append one. sub _fixrecs { @@ -620,9 +817,10 @@ sub _seek { defined($o) or confess("logic error: undefined offset for record $n"); seek $self->{fh}, $o, SEEK_SET - or die "Couldn't seek filehandle: $!"; # "Should never happen." + or confess "Couldn't seek filehandle: $!"; # "Should never happen." } +# seek to byte $b in the file sub _seekb { my ($self, $b) = @_; seek $self->{fh}, $b, SEEK_SET @@ -641,11 +839,10 @@ sub _fill_offsets_to { my $rec; until ($#OFF >= $n) { - my $o = $OFF[-1]; $self->_seek(-1); # tricky -- see comment at _seek $rec = $self->_read_record; if (defined $rec) { - push @OFF, tell $fh; + push @OFF, int(tell $fh); # Tels says that int() saves memory here } else { $self->{eof} = 1; return; # It turns out there is no such record @@ -654,7 +851,26 @@ sub _fill_offsets_to { # we have now read all the records up to record n-1, # so we can return the offset of record n - return $OFF[$n]; + $OFF[$n]; +} + +sub _fill_offsets { + my ($self) = @_; + + my $fh = $self->{fh}; + local *OFF = $self->{offsets}; + + $self->_seek(-1); # tricky -- see comment at _seek + + # Tels says that inlining read_record() would make this loop + # five times faster. 20030508 + while ( defined $self->_read_record()) { + # int() saves us memory here + push @OFF, int(tell $fh); + } + + $self->{eof} = 1; + $#OFF; } # assumes that $rec is already suitably terminated @@ -735,7 +951,7 @@ sub _extend_file_to { # Todo : just use $self->{recsep} x $extras here? while ($extras-- > 0) { $self->_write_record($self->{recsep}); - push @{$self->{offsets}}, tell $self->{fh}; + push @{$self->{offsets}}, int(tell $self->{fh}); } } @@ -752,7 +968,7 @@ sub _chop_file { # The result should be at least $n. sub _bufsize { my $n = shift; - return 8192 if $n < 0; + return 8192 if $n <= 0; my $b = $n & ~8191; $b += 8192 if $n & 8191; $b; @@ -772,7 +988,18 @@ sub flock { } my $fh = $self->{fh}; $op = LOCK_EX unless defined $op; - flock $fh, $op; + my $locked = flock $fh, $op; + + if ($locked && ($op & (LOCK_EX | LOCK_SH))) { + # If you're locking the file, then presumably it's because + # there might have been a write access by another process. + # In that case, the read cache contents and the offsets table + # might be invalid, so discard them. 20030508 + $self->{offsets} = [0]; + $self->{cache}->empty; + } + + $locked; } # Get/set autochomp option @@ -787,6 +1014,25 @@ sub autochomp { } } +# Get offset table entries; returns offset of nth record +sub offset { + my ($self, $n) = @_; + + if ($#{$self->{offsets}} < $n) { + return if $self->{eof}; # request for record beyond the end of file + my $o = $self->_fill_offsets_to($n); + # If it's still undefined, there is no such record, so return 'undef' + return unless defined $o; + } + + $self->{offsets}[$n]; +} + +sub discard_offsets { + my $self = shift; + $self->{offsets} = [0]; +} + ################################################################ # # Matters related to deferred writing @@ -804,7 +1050,7 @@ sub defer { # # This could be better optimized to write the file in one pass, instead # of one pass per block of records. But that will require modifications -# to _twrite, so I should have a good _twite test suite first. +# to _twrite, so I should have a good _twrite test suite first. sub flush { my $self = shift; @@ -812,10 +1058,10 @@ sub flush { $self->{defer} = 0; } -sub _flush { +sub _old_flush { my $self = shift; my @writable = sort {$a<=>$b} (keys %{$self->{deferred}}); - + while (@writable) { # gather all consecutive records from the front of @writable my $first_rec = shift @writable; @@ -831,6 +1077,40 @@ sub _flush { $self->_discard; # clear out defered-write-cache } +sub _flush { + my $self = shift; + my @writable = sort {$a<=>$b} (keys %{$self->{deferred}}); + my @args; + my @adjust; + + while (@writable) { + # gather all consecutive records from the front of @writable + my $first_rec = shift @writable; + my $last_rec = $first_rec+1; + ++$last_rec, shift @writable while @writable && $last_rec == $writable[0]; + --$last_rec; + my $end = $self->_fill_offsets_to($last_rec+1); + if (not defined $end) { + $self->_extend_file_to($last_rec); + $end = $self->{offsets}[$last_rec]; + } + my ($start) = $self->{offsets}[$first_rec]; + push @args, + join("", @{$self->{deferred}}{$first_rec .. $last_rec}), # data + $start, # position + $end-$start; # length + push @adjust, [$first_rec, # starting at this position... + $last_rec-$first_rec+1, # this many records... + # are replaced with these... + @{$self->{deferred}}{$first_rec .. $last_rec}, + ]; + } + + $self->_mtwrite(@args); # write multiple record groups + $self->_discard; # clear out defered-write-cache + $self->_oadjust(@adjust); +} + # Discard deferred writes and disable future deferred writes sub discard { my $self = shift; @@ -940,7 +1220,7 @@ sub _annotate_ad_history { } } -# If autodferring was enabled, cut it out and discard the history +# If autodeferring was enabled, cut it out and discard the history sub _stop_autodeferring { my $self = shift; if ($self->{autodeferring}) { @@ -1150,6 +1430,9 @@ sub HEAP () { 0 } sub HASH () { 1 } sub MAX () { 2 } sub BYTES() { 3 } +#sub STAT () { 4 } # Array with request statistics for each record +#sub MISS () { 5 } # Total number of cache misses +#sub REQ () { 6 } # Total number of cache requests use strict 'vars'; sub new { @@ -1183,7 +1466,7 @@ sub _heap_move { if (defined $n) { $self->[HASH]{$k} = $n; } else { - delete $self->[HASH]{$k}; + delete $self->[HASH]{$k}; } } @@ -1196,6 +1479,12 @@ sub insert { } confess "undefined val" unless defined $val; return if length($val) > $self->[MAX]; + +# if ($self->[STAT]) { +# $self->[STAT][$key] = 1; +# return; +# } + my $oldnode = $self->[HASH]{$key}; if (defined $oldnode) { my $oldval = $self->[HEAP]->set_val($oldnode, $val); @@ -1204,7 +1493,7 @@ sub insert { $self->[HEAP]->insert($key, $val); } $self->[BYTES] += length($val); - $self->flush; + $self->flush if $self->[BYTES] > $self->[MAX]; } sub expire { @@ -1218,6 +1507,14 @@ sub expire { sub remove { my ($self, @keys) = @_; my @result; + +# if ($self->[STAT]) { +# for my $key (@keys) { +# $self->[STAT][$key] = 0; +# } +# return; +# } + for my $key (@keys) { next unless exists $self->[HASH]{$key}; my $old_data = $self->[HEAP]->remove($self->[HASH]{$key}); @@ -1231,6 +1528,15 @@ sub lookup { my ($self, $key) = @_; local *_; croak "missing argument to ->lookup" unless defined $key; + +# if ($self->[STAT]) { +# $self->[MISS]++ if $self->[STAT][$key]++ == 0; +# $self->[REQ]++; +# my $hit_rate = 1 - $self->[MISS] / $self->[REQ]; +# # Do some testing to determine this threshhold +# $#$self = STAT - 1 if $hit_rate > 0.20; +# } + if (exists $self->[HASH]{$key}) { $self->[HEAP]->lookup($self->[HASH]{$key}); } else { @@ -1257,6 +1563,9 @@ sub empty { %{$self->[HASH]} = (); $self->[BYTES] = 0; $self->[HEAP]->empty; +# @{$self->[STAT]} = (); +# $self->[MISS] = 0; +# $self->[REQ] = 0; } sub is_empty { @@ -1269,7 +1578,7 @@ sub update { local *_; croak "missing argument to ->update" unless defined $key; if (length($val) > $self->[MAX]) { - my $oldval = $self->remove($key); + my ($oldval) = $self->remove($key); $self->[BYTES] -= length($oldval) if defined $oldval; } elsif (exists $self->[HASH]{$key}) { my $oldval = $self->[HEAP]->set_val($self->[HASH]{$key}, $val); @@ -1307,23 +1616,29 @@ sub ckeys { @a; } +# Return total amount of cached data sub bytes { my $self = shift; $self->[BYTES]; } +# Expire oldest item from cache until cache size is smaller than $max sub reduce_size_to { my ($self, $max) = @_; - until ($self->is_empty || $self->[BYTES] <= $max) { - $self->expire; + until ($self->[BYTES] <= $max) { + # Note that Tie::File::Cache::expire has been inlined here + my $old_data = $self->[HEAP]->popheap; + return unless defined $old_data; + $self->[BYTES] -= length $old_data; } } +# Why not just $self->reduce_size_to($self->[MAX])? +# Try this when things stabilize TODO XXX +# If the cache is too full, expire the oldest records sub flush { my $self = shift; - until ($self->is_empty || $self->[BYTES] <= $self->[MAX]) { - $self->expire; - } + $self->reduce_size_to($self->[MAX]) if $self->[BYTES] > $self->[MAX]; } # For internal use only @@ -1502,7 +1817,7 @@ sub _insert { # Remove the item at node $i from the heap, moving child items upwards. # The item with the smallest sequence number is always at the top. # Moving items upwards maintains this condition. -# Return the removed item. +# Return the removed item. Return undef if there was no item at node $i. sub remove { my ($self, $i) = @_; $i = 1 unless defined $i; @@ -1661,13 +1976,16 @@ sub _nodes { "Cogito, ergo sum."; # don't forget to return a true value from the file +__END__ + =head1 NAME Tie::File - Access the lines of a disk file via a Perl array =head1 SYNOPSIS - # This file documents Tie::File version 0.93 + # This file documents Tie::File version 0.95 + use Tie::File; tie @array, 'Tie::File', filename or die ...; @@ -1688,7 +2006,7 @@ Tie::File - Access the lines of a disk file via a Perl array push @array, new recs...; my $r1 = pop @array; unshift @array, new recs...; - my $r1 = shift @array; + my $r2 = shift @array; @old_recs = splice @array, 3, 7, new recs...; untie @array; # all finished @@ -1712,7 +2030,7 @@ Lazy people and beginners may now stop reading the manual. What is a 'record'? By default, the meaning is the same as for the C...E> operator: It's a string terminated by C<$/>, which is -probably C<"\n">. (Minor exception: on dos and Win32 systems, a +probably C<"\n">. (Minor exception: on DOS and Win32 systems, a 'record' is a string terminated by C<"\r\n">.) You may change the definition of "record" by supplying the C option in the C call: @@ -1836,7 +2154,13 @@ fetched from disk every time you examine them. The C value is not an absolute or exact limit on the memory used. C objects contains some structures besides the read cache and the deferred write buffer, whose sizes are not charged -against C. +against C. + +The cache itself consumes about 310 bytes per cached record, so if +your file has many short records, you may want to decrease the cache +memory limit, or else the cache overhead may exceed the size of the +cached data. + =head2 C @@ -1888,6 +2212,15 @@ the C declaration.) C is optional; the default is C. +C maintains an internal table of the byte offset of each +record it has seen in the file. + +When you use C to lock the file, C assumes that the +read cache is no longer trustworthy, because another process might +have modified the file since the last time it was read. Therefore, a +successful call to C discards the contents of the read cache +and the internal record offset table. + C promises that the following sequence of operations will be safe: @@ -1929,6 +2262,14 @@ See L<"autochomp">, above. See L<"Deferred Writing">, below. +=head2 C + + $off = $o->offset($n); + +This method returns the byte offset of the start of the C<$n>th record +in the file. If there is no such record, it returns an undefined +value. + =head1 Tying to an already-opened filehandle If C<$fh> is a filehandle, such as is returned by C or one @@ -2052,21 +2393,24 @@ or Similarly, C<-Eautodefer(1)> re-enables autodeferment, and C<-Eautodefer()> recovers the current value of the autodefer setting. -=head1 CAVEATS -(That's Latin for 'warnings'.) +=head1 CONCURRENT ACCESS TO FILES -=over 4 +Caching and deferred writing are inappropriate if you want the same +file to be accessed simultaneously from more than one process. You +will want to disable these features. You should do that by including +the C 0> option in your C calls; this will inhibit +caching and deferred writing. -=item * +You will also want to lock the file while reading or writing it. You +can use the C<-Eflock> method for this. A future version of this +module may provide an 'autolocking' mode. + +=head1 CAVEATS -This is BETA RELEASE SOFTWARE. It may have bugs. See the discussion -below about the (lack of any) warranty. +(That's Latin for 'warnings'.) -In particular, this means that the interface may change in -incompatible ways from one version to the next, without warning. That -has happened at least once already. The interface will freeze before -Perl 5.8 is released, probably sometime in April 2002. +=over 4 =item * @@ -2100,27 +2444,12 @@ and C, but in general, the correspondence is extremely close. =item * -Not quite every effort was made to make this module as efficient as -possible. C should use binary search instead of linear -search. - -The performance of the C method could be improved. At present, -it still rewrites the tail of the file once for each block of -contiguous lines to be changed. In the typical case, this will result -in only one rewrite, but in peculiar cases it might be bad. It should -be possible to perform I deferred writing with a single rewrite. - -Profiling suggests that these defects are probably minor; in any -event, they will be fixed in a future version of the module. - -=item * - I have supposed that since this module is concerned with file I/O, almost all normal use of it will be heavily I/O bound. This means that the time to maintain complicated data structures inside the module will be dominated by the time to actually perform the I/O. When there was an opportunity to spend CPU time to avoid doing I/O, I -tried to take it. +usually tried to take it. =item * @@ -2128,6 +2457,17 @@ You might be tempted to think that deferred writing is like transactions, with C as C and C as C, but it isn't, so don't. +=item * + +There is a large memory overhead for each record offset and for each +cache entry: about 310 bytes per cached data record, and about 21 bytes per offset table entry. + +The per-record overhead will limit the maximum number of records you +can access per file. Note that I the length of the array +via C<$x = scalar @tied_file> accesses B records and stores their +offsets. The same for C, even if you exit the +loop early. + =back =head1 SUBCLASSING @@ -2162,7 +2502,7 @@ any news of importance, will be available at =head1 LICENSE -C version 0.93 is copyright (C) 2002 Mark Jason Dominus. +C version 0.95 is copyright (C) 2002 Mark Jason Dominus. This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself. @@ -2190,7 +2530,7 @@ For licensing inquiries, contact the author at: =head1 WARRANTY -C version 0.93 comes with ABSOLUTELY NO WARRANTY. +C version 0.95 comes with ABSOLUTELY NO WARRANTY. For details, see the license. =head1 THANKS @@ -2206,28 +2546,29 @@ Autrijus Tang (for heroic eleventh-hour Win32 testing above and beyond the call of duty), Michael G Schwern (for testing advice), and the rest of the CPAN testers (for testing generally). +Special thanks to Tels for suggesting several speed and memory +optimizations. + Additional thanks to: Edward Avis / +Mattia Barbon / Gerrit Haase / +Jarkko Hietaniemi (again) / Nikola Knezevic / +John Kominetz / Nick Ing-Simmons / Tassilo von Parseval / H. Dieter Pearcey / Slaven Rezic / +Eric Roode / Peter Scott / Peter Somu / Autrijus Tang (again) / -Tels / -Juerd Wallboer +Tels (again) / +Juerd Waalboer =head1 TODO -More tests. (The cache and heap modules need more unit tests.) - -Improve SPLICE algorithm to use deferred writing machinery. - -Cleverer strategy for flushing deferred writes. - More tests. (Stuff I didn't think of yet.) Paragraph mode? @@ -2236,9 +2577,23 @@ Fixed-length mode. Leave-blanks mode. Maybe an autolocking mode? +For many common uses of the module, the read cache is a liability. +For example, a program that inserts a single record, or that scans the +file once, will have a cache hit rate of zero. This suggests a major +optimization: The cache should be initially disabled. Here's a hybrid +approach: Initially, the cache is disabled, but the cache code +maintains statistics about how high the hit rate would be *if* it were +enabled. When it sees the hit rate get high enough, it enables +itself. The STAT comments in this code are the beginning of an +implementation of this. + Record locking with fcntl()? Then the module might support an undo log and get real transactions. What a tour de force that would be. +Keeping track of the highest cached record. This would allow reads-in-a-row +to skip the cache lookup faster (if reading from 1..N with empty cache at +start, the last cached value will be always N-1). + More tests. =cut diff --git a/lib/Tie/File/t/00_version.t b/lib/Tie/File/t/00_version.t index 71c1c82..9285257 100644 --- a/lib/Tie/File/t/00_version.t +++ b/lib/Tie/File/t/00_version.t @@ -2,7 +2,7 @@ print "1..1\n"; -my $testversion = "0.93"; +my $testversion = "0.95"; use Tie::File; if ($Tie::File::VERSION != $testversion) { @@ -19,3 +19,4 @@ None of the other test results will be reliable. } print "ok 1\n"; + diff --git a/lib/Tie/File/t/01_gen.t b/lib/Tie/File/t/01_gen.t index 0fc0176..202b09c 100644 --- a/lib/Tie/File/t/01_gen.t +++ b/lib/Tie/File/t/01_gen.t @@ -1,6 +1,8 @@ #!/usr/bin/perl +$| = 1; my $file = "tf$$.txt"; +1 while unlink $file; print "1..75\n"; diff --git a/lib/Tie/File/t/04_splice.t b/lib/Tie/File/t/04_splice.t index cb08dac..b3880b7 100644 --- a/lib/Tie/File/t/04_splice.t +++ b/lib/Tie/File/t/04_splice.t @@ -12,6 +12,7 @@ # contents. +$| = 1; my $file = "tf$$.txt"; $: = Tie::File::_default_recsep(); my $data = "rec0$:rec1$:rec2$:"; diff --git a/lib/Tie/File/t/09_gen_rs.t b/lib/Tie/File/t/09_gen_rs.t index c202556..7d70e3e 100644 --- a/lib/Tie/File/t/09_gen_rs.t +++ b/lib/Tie/File/t/09_gen_rs.t @@ -104,7 +104,6 @@ if (setup_badly_terminated_file(3)) { $N++; push @a, "next"; check_contents($badrec, "next"); - undef $o; untie @a; } # (51-52) if (setup_badly_terminated_file(2)) { @@ -113,7 +112,6 @@ if (setup_badly_terminated_file(2)) { or die "Couldn't tie file: $!"; splice @a, 1, 0, "x", "y"; check_contents($badrec, "x", "y"); - undef $o; untie @a; } # (53-56) if (setup_badly_terminated_file(4)) { @@ -128,7 +126,6 @@ if (setup_badly_terminated_file(4)) { : "not ok $N \# expected <$badrec>, got <$r[0]>\n"; $N++; check_contents("x", "y"); - undef $o; untie @a; } # (57-58) 20020402 The modifiaction would have failed if $\ were set wrong. @@ -141,7 +138,6 @@ if (setup_badly_terminated_file(2)) { my $z = $a[0]; } check_contents($badrec); - undef $o; untie @a; } sub setup_badly_terminated_file { diff --git a/lib/Tie/File/t/16_handle.t b/lib/Tie/File/t/16_handle.t index f799496..72ff10b 100644 --- a/lib/Tie/File/t/16_handle.t +++ b/lib/Tie/File/t/16_handle.t @@ -79,7 +79,7 @@ undef $o; untie @a; # (39) Does it correctly detect a non-seekable handle? -{ if ($^O =~ /^(MSWin32|dos|beos)$/) { +{ if ($^O =~ /^(MSWin32|dos|BeOS)$/) { print "ok $N # skipped ($^O has broken pipe semantics)\n"; last; } diff --git a/lib/Tie/File/t/19_cache.t b/lib/Tie/File/t/19_cache.t index 74228c0..81c6932 100644 --- a/lib/Tie/File/t/19_cache.t +++ b/lib/Tie/File/t/19_cache.t @@ -3,12 +3,13 @@ # Tests for various caching errors # +$|=1; my $file = "tf$$.txt"; $: = Tie::File::_default_recsep(); my $data = join $:, "rec0" .. "rec9", ""; my $V = $ENV{INTEGRITY}; # Verbose integrity checking? -print "1..54\n"; +print "1..55\n"; my $N = 1; use Tie::File; @@ -160,6 +161,11 @@ check(); splice @a; check(); +# (55) This was broken on 20030507 when you moved the cache management +# stuff out of _oadjust back into _splice without also putting it back +# into _store. +@a = (0..11); +check(); sub init_file { my $data = shift; diff --git a/lib/Tie/File/t/21_win32.t b/lib/Tie/File/t/21_win32.t index 80d795a..d068544 100644 --- a/lib/Tie/File/t/21_win32.t +++ b/lib/Tie/File/t/21_win32.t @@ -9,7 +9,7 @@ my $file = "tf$$.txt"; -unless ($^O =~ /^(MSWin32)$/) { +unless ($^O =~ /^(MSWin32|dos)$/) { print "1..0\n"; exit; } diff --git a/lib/Tie/File/t/25_gen_nocache.t b/lib/Tie/File/t/25_gen_nocache.t index bafecf0..78e5506 100644 --- a/lib/Tie/File/t/25_gen_nocache.t +++ b/lib/Tie/File/t/25_gen_nocache.t @@ -3,7 +3,6 @@ # Regular read-write tests with caching disabled # (Same as 01_gen.t) # - my $file = "tf$$.txt"; print "1..68\n"; diff --git a/lib/Tie/File/t/26_twrite.t b/lib/Tie/File/t/26_twrite.t index 1d9073c..e2a925f 100644 --- a/lib/Tie/File/t/26_twrite.t +++ b/lib/Tie/File/t/26_twrite.t @@ -269,7 +269,7 @@ sub try { # The record has exactly 17 characters. This will help ensure that # even if _twrite screws up, the data doesn't coincidentally # look good because the remainder accidentally lines up. - my $d = length($:) == 1 ? "0123456789abcdef$:" : "0123456789abcde$:"; + my $d = substr("0123456789abcdef$:", -17); my $recs = defined($FLEN) ? int($FLEN/length($d))+1 : # enough to make up at least $FLEN int(8192*5/length($d))+1; # at least 5 blocks' worth diff --git a/lib/Tie/File/t/27_iwrite.t b/lib/Tie/File/t/27_iwrite.t new file mode 100644 index 0000000..db591a8 --- /dev/null +++ b/lib/Tie/File/t/27_iwrite.t @@ -0,0 +1,235 @@ +#!/usr/bin/perl +# +# Unit tests of _iwrite function +# +# _iwrite($self, $data, $start, $end) +# +# 'i' here is for 'insert'. This writes $data at absolute position $start +# in the file, copying the data at that position downwards--- +# but only down to position $end. Data at or past $end is not moved +# or even examined. Since there isn't enough room for the full copy +# (Because we inserted $data at the beginning) we copy as much as possible +# and return a string containing the remainder. + +my $file = "tf$$.txt"; +$| = 1; + +print "1..203\n"; + +my $N = 1; +my $oldfile; +use Tie::File; +print "ok $N\n"; $N++; + +$: = Tie::File::_default_recsep(); + +$FLEN = 40970; # Use files of this length +$oldfile = mkrand($FLEN); +print "# MOF tests\n"; +# (2-85) These were generated by 'gentests.pl' to cover all possible cases +# (I hope) +# Legend: +# x: data is entirely contained within one block +# x>: data runs from the middle to the end of the block +# : data occupies precisely one block +# x>: data runs from the middle of one block to the end of the next +# : data occupies two blocks exactly +# : data occupies three blocks exactly +# 0: data is null +# +# For each possible alignment of the old and new data, we investigate +# up to three situations: old data is shorter, old and new data are the +# same length, and new data is shorter. +# +# try($pos, $old, $new) means to run a test where the area being +# written into starts at position $pos, the area being written into +# has length $old, and and the new data has length $new. +try( 8605, 2394, 2394); # old=x , new=x ; old = new +try( 9768, 1361, 664); # old=x , new=x ; old > new +try( 9955, 6429, 6429); # old=x> , new=x ; old = new +try(10550, 5834, 4123); # old=x> , new=x ; old > new +try(14580, 6158, 851); # old=x> new +try(13442, 11134, 1572); # old=x> , new=x ; old > new +try( 8192, 514, 514); # old= new +try( 8192, 8192, 8192); # old= , new= , new= new +try( 8192, 10575, 6644); # old= new +try( 8192, 16384, 5616); # old= , new= new +try( 8192, 24576, 6253); # old=, new= new +try( 9965, 6419, 6419); # old=x> , new=x> ; old = new +try(16059, 6102, 325); # old=x> ; old > new +try( 9503, 15073, 6881); # old=x> , new=x> ; old > new +try(16316, 1605, 1605); # old=x> new +try(14739, 9837, 9837); # old=x> , new=x> , new=x> new +try( 8192, 8192, 8192); # old= , new= ; old = new +try( 8192, 14817, 8192); # old= ; old > new +try( 8192, 16384, 8192); # old= , new= ; old > new +try( 8192, 24576, 8192); # old=, new= ; old > new +try( 8192, 9001, 9001); # old= new +try( 8192, 16384, 10781); # old= , new= new +try( 8192, 24576, 9284); # old=, new= new +try(14761, 9815, 9815); # old=x> , new=x> ; old = new +try( 8192, 16384, 16384); # old= , new= ; old = new +try( 8192, 24576, 16384); # old=, new= ; old > new +try( 8192, 24576, 24576); # old=, new=; old = new +try( 8771, 776, 0); # old=x , new=0 ; old > new +try( 8192, 2813, 0); # old= new +try(13945, 2439, 0); # old=x> , new=0 ; old > new +try(14493, 6090, 0); # old=x> new +try( 8192, 8192, 0); # old= , new=0 ; old > new +try( 8192, 10030, 0); # old= new +try(14983, 9593, 0); # old=x> , new=0 ; old > new +try( 8192, 16384, 0); # old= , new=0 ; old > new +try( 8192, 24576, 0); # old=, new=0 ; old > new +try(10489, 0, 0); # old=0 , new=0 ; old = new + +print "# SOF tests\n"; +# (86-133) +# These tests all take place at the start of the file +try( 0, 4868, 4868); # old= new +try( 0, 8192, 8192); # old= , new= , new= new +try( 0, 11891, 1917); # old= new +try( 0, 16384, 5155); # old= , new= new +try( 0, 24576, 2953); # old=, new= new +try( 0, 8192, 8192); # old= , new= ; old = new +try( 0, 11083, 8192); # old= ; old > new +try( 0, 16384, 8192); # old= , new= ; old > new +try( 0, 24576, 8192); # old=, new= ; old > new +try( 0, 14126, 14126); # old= new +try( 0, 16384, 13258); # old= , new= new +try( 0, 24576, 14367); # old=, new= new +try( 0, 16384, 16384); # old= , new= ; old = new +try( 0, 24576, 16384); # old=, new= ; old > new +try( 0, 24576, 24576); # old=, new=; old = new +try( 0, 6530, 0); # old= new +try( 0, 8192, 0); # old= , new=0 ; old > new +try( 0, 14707, 0); # old= new +try( 0, 16384, 0); # old= , new=0 ; old > new +try( 0, 24576, 0); # old=, new=0 ; old > new +try( 0, 0, 0); # old=0 , new=0 ; old = new + +print "# EOF tests 1\n"; +# (134-169) +# These tests all take place at the end of the file +$FLEN = 40960; # Force the file to be exactly 40960 bytes long +$oldfile = mkrand($FLEN); +try(32768, 8192, 8192); # old= , new= , new= new +try(24576, 16384, 1917); # old= , new= new +try(16384, 24576, 3818); # old=, new= new +try(32768, 8192, 8192); # old= , new= ; old = new +try(24576, 16384, 8192); # old= , new= ; old > new +try(16384, 24576, 8192); # old=, new= ; old > new +try(24576, 16384, 12221); # old= , new= new +try(16384, 24576, 15030); # old=, new= new +try(24576, 16384, 16384); # old= , new= ; old = new +try(16384, 24576, 16384); # old=, new= ; old > new +try(16384, 24576, 24576); # old=, new=; old = new +try(35973, 4987, 0); # old=x> , new=0 ; old > new +try(32768, 8192, 0); # old= , new=0 ; old > new +try(29932, 11028, 0); # old=x> , new=0 ; old > new +try(24576, 16384, 0); # old= , new=0 ; old > new +try(16384, 24576, 0); # old=, new=0 ; old > new +try(40960, 0, 0); # old=0 , new=0 ; old = new + +print "# EOF tests 2\n"; +# (170-203) +# These tests all take place at the end of the file +$FLEN = 42000; # Force the file to be exactly 42000 bytes long +$oldfile = mkrand($FLEN); +try(41683, 317, 317); # old=x , new=x ; old = new +try(41225, 775, 405); # old=x , new=x ; old > new +try(35709, 6291, 284); # old=x> new +try(40960, 1040, 1040); # old= new +try(32768, 9232, 5604); # old= new +try(39994, 2006, 966); # old=x> ; old > new +try(36725, 5275, 5275); # old=x> new +try(32768, 9232, 8192); # old= ; old > new +try(32768, 9232, 9232); # old= new +try(41500, 500, 0); # old=x , new=0 ; old > new +try(40960, 1040, 0); # old= new +try(35272, 6728, 0); # old=x> new +try(32768, 9232, 0); # old= new +try(42000, 0, 0); # old=0 , new=0 ; old = new + +sub mkrand { + my $len = shift; + srand $len; + my @c = ('a' .. 'z', 'A' .. 'Z', 0..9, $:); + my $d = ""; + $d .= $c[rand @c] until length($d) >= $len; + substr($d, $len) = ""; # chop it off to the proper length + $d; +} + +sub try { + my ($s, $len, $newlen) = @_; + my $e = $s + $len; + + open F, "> $file" or die "Couldn't open file $file: $!"; + binmode F; + + print F $oldfile; + close F; + + die "wrong length!" unless -s $file == $FLEN; + + my $newdata = "-" x $newlen; + my $expected = $oldfile; + + my $expected_return = substr($expected, $e - $newlen, $newlen, ""); + substr($expected, $s, 0, $newdata); + + my $o = tie my @lines, 'Tie::File', $file or die $!; + my $actual_return = $o->_iwrite($newdata, $s, $e); + undef $o; untie @lines; + + open F, "< $file" or die "Couldn't open file $file: $!"; + binmode F; + my $actual; + { local $/; + $actual = ; + } + close F; + + my ($alen, $xlen) = (length $actual, length $expected); + unless ($alen == $xlen) { + print "# try(@_) expected file length $xlen, actual $alen!\n"; + } + print $actual eq $expected ? "ok $N\n" : "not ok $N\n"; + $N++; + + if (! defined $actual_return && ! defined $expected_return) { + print "ok $N\n"; + } elsif (! defined $actual_return || ! defined $expected_return) { + print "not ok $N\n"; + } else { + print $actual_return eq $expected_return ? "ok $N\n" : "not ok $N\n"; + } + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/28_mtwrite.t b/lib/Tie/File/t/28_mtwrite.t new file mode 100644 index 0000000..d21d03a --- /dev/null +++ b/lib/Tie/File/t/28_mtwrite.t @@ -0,0 +1,351 @@ +#!/usr/bin/perl +# +# Unit tests of _mtwrite function +# +# _mtwrite($self, $d1, $s1, $l1, $d2, $s2, $l2, ...) +# +# 'm' here is for 'multiple'. This writes data $d1 at position $s1 +# over a block of space $l1, moving subsequent data up or down as necessary. + +my $file = "tf$$.txt"; +$| = 1; + +print "1..2252\n"; + +my $N = 1; +my $oldfile; +use Tie::File; +print "ok $N\n"; $N++; + +$: = Tie::File::_default_recsep(); + +# Only these are used for the triple-region tests +@BASE_TRIES = ( + [10, 20, 30], + [10, 30, 20], + [100, 30, 20], + [100, 20, 30], + [100, 40, 20], + [100, 20, 40], + [200, 20, 30], + [200, 30, 20], + [200, 20, 60], + [200, 60, 20], + ); + +@TRIES = @BASE_TRIES; + +$FLEN = 40970; # Use files of this length +$oldfile = mkrand($FLEN); +print "# MOF tests\n"; +# These were generated by 'gentests.pl' to cover all possible cases +# (I hope) +# Legend: +# x: data is entirely contained within one block +# x>: data runs from the middle to the end of the block +# : data occupies precisely one block +# x>: data runs from the middle of one block to the end of the next +# : data occupies two blocks exactly +# : data occupies three blocks exactly +# 0: data is null +# +# For each possible alignment of the old and new data, we investigate +# up to three situations: old data is shorter, old and new data are the +# same length, and new data is shorter. +# +# try($pos, $old, $new) means to run a test where the area being +# written into starts at position $pos, the area being written into +# has length $old, and and the new data has length $new. +try( 8605, 2394, 2394); # old=x , new=x ; old = new +try( 9768, 1361, 664); # old=x , new=x ; old > new +try( 9955, 6429, 6429); # old=x> , new=x ; old = new +try(10550, 5834, 4123); # old=x> , new=x ; old > new +try(14580, 6158, 851); # old=x> new +try(13442, 11134, 1572); # old=x> , new=x ; old > new +try( 8192, 514, 514); # old= new +try( 8192, 8192, 8192); # old= , new= , new= new +try( 8192, 10575, 6644); # old= new +try( 8192, 16384, 5616); # old= , new= new +try( 8192, 24576, 6253); # old=, new= new +try( 9965, 6419, 6419); # old=x> , new=x> ; old = new +try(16059, 6102, 325); # old=x> ; old > new +try( 9503, 15073, 6881); # old=x> , new=x> ; old > new +try(16316, 1605, 1605); # old=x> new +try(14739, 9837, 9837); # old=x> , new=x> , new=x> new +try( 8192, 8192, 8192); # old= , new= ; old = new +try( 8192, 14817, 8192); # old= ; old > new +try( 8192, 16384, 8192); # old= , new= ; old > new +try( 8192, 24576, 8192); # old=, new= ; old > new +try( 8192, 9001, 9001); # old= new +try( 8192, 16384, 10781); # old= , new= new +try( 8192, 24576, 9284); # old=, new= new +try(14761, 9815, 9815); # old=x> , new=x> ; old = new +try( 8192, 16384, 16384); # old= , new= ; old = new +try( 8192, 24576, 16384); # old=, new= ; old > new +try( 8192, 24576, 24576); # old=, new=; old = new +try( 8771, 776, 0); # old=x , new=0 ; old > new +try( 8192, 2813, 0); # old= new +try(13945, 2439, 0); # old=x> , new=0 ; old > new +try(14493, 6090, 0); # old=x> new +try( 8192, 8192, 0); # old= , new=0 ; old > new +try( 8192, 10030, 0); # old= new +try(14983, 9593, 0); # old=x> , new=0 ; old > new +try( 8192, 16384, 0); # old= , new=0 ; old > new +try( 8192, 24576, 0); # old=, new=0 ; old > new +try(10489, 0, 0); # old=0 , new=0 ; old = new + +print "# SOF tests\n"; +# These tests all take place at the start of the file +try( 0, 4868, 4868); # old= new +try( 0, 8192, 8192); # old= , new= , new= new +try( 0, 11891, 1917); # old= new +try( 0, 16384, 5155); # old= , new= new +try( 0, 24576, 2953); # old=, new= new +try( 0, 8192, 8192); # old= , new= ; old = new +try( 0, 11083, 8192); # old= ; old > new +try( 0, 16384, 8192); # old= , new= ; old > new +try( 0, 24576, 8192); # old=, new= ; old > new +try( 0, 14126, 14126); # old= new +try( 0, 16384, 13258); # old= , new= new +try( 0, 24576, 14367); # old=, new= new +try( 0, 16384, 16384); # old= , new= ; old = new +try( 0, 24576, 16384); # old=, new= ; old > new +try( 0, 24576, 24576); # old=, new=; old = new +try( 0, 6530, 0); # old= new +try( 0, 8192, 0); # old= , new=0 ; old > new +try( 0, 14707, 0); # old= new +try( 0, 16384, 0); # old= , new=0 ; old > new +try( 0, 24576, 0); # old=, new=0 ; old > new +try( 0, 0, 0); # old=0 , new=0 ; old = new + +print "# EOF tests 1\n"; +# These tests all take place at the end of the file +$FLEN = 40960; # Force the file to be exactly 40960 bytes long +$oldfile = mkrand($FLEN); +try(32768, 8192, 8192); # old= , new= , new= new +try(24576, 16384, 1917); # old= , new= new +try(16384, 24576, 3818); # old=, new= new +try(32768, 8192, 8192); # old= , new= ; old = new +try(24576, 16384, 8192); # old= , new= ; old > new +try(16384, 24576, 8192); # old=, new= ; old > new +try(24576, 16384, 12221); # old= , new= new +try(16384, 24576, 15030); # old=, new= new +try(24576, 16384, 16384); # old= , new= ; old = new +try(16384, 24576, 16384); # old=, new= ; old > new +try(16384, 24576, 24576); # old=, new=; old = new +try(35973, 4987, 0); # old=x> , new=0 ; old > new +try(32768, 8192, 0); # old= , new=0 ; old > new +try(29932, 11028, 0); # old=x> , new=0 ; old > new +try(24576, 16384, 0); # old= , new=0 ; old > new +try(16384, 24576, 0); # old=, new=0 ; old > new +try(40960, 0, 0); # old=0 , new=0 ; old = new + +print "# EOF tests 2\n"; +# These tests all take place at the end of the file +$FLEN = 42000; # Force the file to be exactly 42000 bytes long +$oldfile = mkrand($FLEN); +try(41683, 317, 317); # old=x , new=x ; old = new +try(41225, 775, 405); # old=x , new=x ; old > new +try(35709, 6291, 284); # old=x> new +try(40960, 1040, 1040); # old= new +try(32768, 9232, 5604); # old= new +try(39994, 2006, 966); # old=x> ; old > new +try(36725, 5275, 5275); # old=x> new +try(32768, 9232, 8192); # old= ; old > new +try(32768, 9232, 9232); # old= new +try(41500, 500, 0); # old=x , new=0 ; old > new +try(40960, 1040, 0); # old= new +try(35272, 6728, 0); # old=x> new +try(32768, 9232, 0); # old= new +try(42000, 0, 0); # old=0 , new=0 ; old = new + +# Now the REAL tests +# Make sure mtwrite can properly write sequences of several intervals +# The intervals tested above were accumulated into @TRIES. +# try_all_doubles() tries every possible sensible pair of those intervals. +# try_all_triples() tries every possible sensible group of +# tree intervals from the more restrictive set @BASE_TRIES. +$FLEN = 40970; +$oldfile = mkrand($FLEN); +try_all_doubles(); +try_all_triples(); + +sub mkrand { + my $len = shift; + srand $len; + my @c = ('a' .. 'z', 'A' .. 'Z', 0..9, $:); + my $d = ""; + $d .= $c[rand @c] until length($d) >= $len; + substr($d, $len) = ""; # chop it off to the proper length + $d; +} + +sub try { + push @TRIES, [@_] if @_ == 3; + + open F, "> $file" or die "Couldn't open file $file: $!"; + binmode F; + print F $oldfile; + close F; + die "wrong length!" unless -s $file == $FLEN; + + my @mt_args; + my $expected = $oldfile; + { my @a = @_; + my $c = "a"; + while (@a) { + my ($s, $len, $newlen) = splice @a, -3; + my $newdata = $c++ x $newlen; + substr($expected, $s, $len, $newdata); + unshift @mt_args, $newdata, $s, $len; + } + } + + my $o = tie my @lines, 'Tie::File', $file or die $!; + my $actual_return = $o->_mtwrite(@mt_args); + undef $o; untie @lines; + + open F, "< $file" or die "Couldn't open file $file: $!"; + binmode F; + my $actual; + { local $/; + $actual = ; + } + close F; + + my ($alen, $xlen) = (length $actual, length $expected); + unless ($alen == $xlen) { + print "# try(@_) expected file length $xlen, actual $alen!\n"; + } + print $actual eq $expected ? "ok $N\n" : "not ok $N\n"; + $N++; + +# if (! defined $actual_return && ! defined $expected_return) { +# print "ok $N\n"; +# } elsif (! defined $actual_return || ! defined $expected_return) { +# print "not ok $N\n"; +# } else { +# print $actual_return eq $expected_return ? "ok $N\n" : "not ok $N\n"; +# } +# $N++; +} + +sub try_all_doubles { + print "# Trying double regions.\n"; + for my $a (@TRIES) { + next if $a->[0] + $a->[1] >= $FLEN; + next if $a->[0] + $a->[2] >= $FLEN; + for my $b (@TRIES) { + next if $b->[0] + $b->[1] >= $FLEN; + next if $b->[0] + $b->[2] >= $FLEN; + + next if $b->[0] < $a->[0] + $a->[1]; # Overlapping regions + try(@$a, @$b); + } + } +} + +sub try_all_triples { + print "# Trying triple regions.\n"; + for my $a (@BASE_TRIES) { + next if $a->[0] + $a->[1] >= $FLEN; + next if $a->[0] + $a->[2] >= $FLEN; + for my $b (@BASE_TRIES) { + next if $b->[0] + $b->[1] >= $FLEN; + next if $b->[0] + $b->[2] >= $FLEN; + + next if $b->[0] < $a->[0] + $a->[1]; # Overlapping regions + + for my $c (@BASE_TRIES) { + next if $c->[0] + $c->[1] >= $FLEN; + next if $c->[0] + $c->[2] >= $FLEN; + + next if $c->[0] < $b->[0] + $b->[1]; # Overlapping regions + try(@$a, @$b, @$c); + } + } + } +} + +# Each element of @TRIES has [start, oldlen, newlen] +# Try them pairwise +sub xxtry_all_doubles { + print "# Trying double regions.\n"; + my %reg; # regions + for my $i (0 .. $#TRIES) { + $a = $TRIES[$i]; + ($reg{a}{st}, $reg{a}{ol}, $reg{a}{nl}) = @{$TRIES[$i]}; + next if $reg{a}{st} + $reg{a}{ol} >= $FLEN; + next if $reg{a}{st} + $reg{a}{nl} >= $FLEN; + for my $j (0 .. $#TRIES){ + $b = $TRIES[$j]; + ($reg{b}{st}, $reg{b}{ol}, $reg{b}{nl}) = @{$TRIES[$j]}; + next if $reg{b}{st} + $reg{b}{ol} >= $FLEN; + next if $reg{b}{st} + $reg{b}{nl} >= $FLEN; + + next if $reg{b}{st} < $reg{a}{st} + $reg{a}{ol}; # Overlapping regions +# $reg{b}{st} -= $reg{a}{ol} - $reg{a}{nl}; + + open F, "> $file" or die "Couldn't open file $file: $!"; + binmode F; + print F $oldfile; + close F; + die "wrong length!" unless -s $file == $FLEN; + + my $expected = $oldfile; + for ('b', 'a') { + $reg{$_}{nd} = $_ x $reg{$_}{nl}; + substr($expected, $reg{$_}{st}, $reg{$_}{ol}, $reg{$_}{nd}); + } + + my $o = tie my @lines, 'Tie::File', $file or die $!; + $o->_mtwrite($reg{a}{nd}, $reg{a}{st}, $reg{a}{ol}, + $reg{b}{nd}, $reg{b}{st}, $reg{b}{ol}, + ); + undef $o; untie @lines; + + open F, "< $file" or die "Couldn't open file $file: $!"; + binmode F; + my $actual; + { local $/; + $actual = ; + } + close F; + + my ($alen, $xlen) = (length $actual, length $expected); + print "# try_all_doubles(@$a, @$b)\n"; + unless ($alen == $xlen) { + print "# expected file length $xlen, actual $alen!\n"; + } + print $actual eq $expected ? "ok $N\n" : "not ok $N\n"; + $N++; + } + } +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/29_downcopy.t b/lib/Tie/File/t/29_downcopy.t new file mode 100644 index 0000000..24f9597 --- /dev/null +++ b/lib/Tie/File/t/29_downcopy.t @@ -0,0 +1,366 @@ +#!/usr/bin/perl +# +# Unit tests of _twrite function +# +# _downcopy($self, $data, $pos, $len) +# Write $data into a block of length $len at position $pos, +# moving everything in the block forwards to make room. +# Instead of writing the last length($data) bytes from the block +# (because there isn't room for them any longer) return them. +# +# + +my $file = "tf$$.txt"; + +print "1..718\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +$: = Tie::File::_default_recsep(); + +my @subtests = qw(x x> 0); +print "ok $N\n"; $N++; + +# (3-144) These were generated by 'gentests.pl' to cover all possible cases +# (I hope) +# Legend: +# x: data is entirely contained within one block +# x>: data runs from the middle to the end of the block +# : data occupies precisely one block +# x>: data runs from the middle of one block to the end of the next +# : data occupies two blocks exactly +# : data occupies three blocks exactly +# 0: data is null +# +# For each possible alignment of the old and new data, we investigate +# up to three situations: old data is shorter, old and new data are the +# same length, and new data is shorter. +# +# try($pos, $old, $new) means to run a test where the data starts at +# position $pos, the old data has length $old, +# and the new data has length $new. +try( 9659, 6635, 6691); # old=x , new=x ; old < new +try( 8605, 2394, 2394); # old=x , new=x ; old = new +try( 9768, 1361, 664); # old=x , new=x ; old > new +try( 9955, 6429, 6429); # old=x> , new=x ; old = new +try(10550, 5834, 4123); # old=x> , new=x ; old > new +try(14580, 6158, 851); # old=x> new +try(13442, 11134, 1572); # old=x> , new=x ; old > new +try( 8394, 0, 5742); # old=0 , new=x ; old < new +try( 8192, 2819, 6738); # old= new +try( 8192, 8192, 8192); # old= , new= , new= new +try( 8192, 10575, 6644); # old= new +try( 8192, 16384, 5616); # old= , new= new +try( 8192, 24576, 6253); # old=, new= new +try( 8192, 0, 6870); # old=0 , new= ; old < new +try( 9965, 6419, 6419); # old=x> , new=x> ; old = new +try(16059, 6102, 325); # old=x> ; old > new +try( 9503, 15073, 6881); # old=x> , new=x> ; old > new +try( 9759, 0, 6625); # old=0 , new=x> ; old < new +try( 8525, 2081, 8534); # old=x , new=x> , new=x> new +try(14739, 9837, 9837); # old=x> , new=x> , new=x> new +try(12602, 0, 8354); # old=0 , new=x> ; old < new +try( 8192, 8192, 8192); # old= , new= ; old = new +try( 8192, 14817, 8192); # old= ; old > new +try( 8192, 16384, 8192); # old= , new= ; old > new +try( 8192, 24576, 8192); # old=, new= ; old > new +try( 8192, 0, 8192); # old=0 , new= ; old < new +try( 8192, 6532, 10882); # old= , new= new +try( 8192, 16384, 10781); # old= , new= new +try( 8192, 24576, 9284); # old=, new= new +try( 8192, 0, 12488); # old=0 , new= ; old < new +try(13500, 2884, 11076); # old=x> , new=x> ; old < new +try(14069, 4334, 10507); # old=x> ; old < new +try(14761, 9815, 9815); # old=x> , new=x> ; old = new +try(10469, 0, 14107); # old=0 , new=x> ; old < new +try( 8192, 4181, 16384); # old= ; old < new +try( 8192, 8192, 16384); # old= , new= ; old < new +try( 8192, 12087, 16384); # old= ; old < new +try( 8192, 16384, 16384); # old= , new= ; old = new +try( 8192, 24576, 16384); # old=, new= ; old > new +try( 8192, 0, 16384); # old=0 , new= ; old < new +try( 8192, 4968, 24576); # old=; old < new +try( 8192, 8192, 24576); # old= , new=; old < new +try( 8192, 14163, 24576); # old=; old < new +try( 8192, 16384, 24576); # old= , new=; old < new +try( 8192, 24576, 24576); # old=, new=; old = new +try( 8192, 0, 24576); # old=0 , new=; old < new +try( 8771, 776, 0); # old=x , new=0 ; old > new +try( 8192, 2813, 0); # old= new +try(13945, 2439, 0); # old=x> , new=0 ; old > new +try(14493, 6090, 0); # old=x> new +try( 8192, 8192, 0); # old= , new=0 ; old > new +try( 8192, 10030, 0); # old= new +try(14983, 9593, 0); # old=x> , new=0 ; old > new +try( 8192, 16384, 0); # old= , new=0 ; old > new +try( 8192, 24576, 0); # old=, new=0 ; old > new +try(10489, 0, 0); # old=0 , new=0 ; old = new + +# (142-223) +# These tests all take place at the start of the file +try( 0, 771, 1593); # old= new +try( 0, 8192, 8192); # old= , new= , new= new +try( 0, 11891, 1917); # old= new +try( 0, 16384, 5155); # old= , new= new +try( 0, 24576, 2953); # old=, new= new +try( 0, 0, 1317); # old=0 , new= ; old < new +try( 0, 8192, 8192); # old= , new= ; old = new +try( 0, 11083, 8192); # old= ; old > new +try( 0, 16384, 8192); # old= , new= ; old > new +try( 0, 24576, 8192); # old=, new= ; old > new +try( 0, 0, 8192); # old=0 , new= ; old < new +try( 0, 6265, 9991); # old= , new= new +try( 0, 16384, 13258); # old= , new= new +try( 0, 24576, 14367); # old=, new= new +try( 0, 0, 10881); # old=0 , new= ; old < new +try( 0, 8192, 16384); # old= , new= ; old < new +try( 0, 15082, 16384); # old= ; old < new +try( 0, 16384, 16384); # old= , new= ; old = new +try( 0, 24576, 16384); # old=, new= ; old > new +try( 0, 0, 16384); # old=0 , new= ; old < new +try( 0, 2421, 24576); # old=; old < new +try( 0, 8192, 24576); # old= , new=; old < new +try( 0, 11655, 24576); # old=; old < new +try( 0, 16384, 24576); # old= , new=; old < new +try( 0, 24576, 24576); # old=, new=; old = new +try( 0, 0, 24576); # old=0 , new=; old < new +try( 0, 6530, 0); # old= new +try( 0, 8192, 0); # old= , new=0 ; old > new +try( 0, 14707, 0); # old= new +try( 0, 16384, 0); # old= , new=0 ; old > new +try( 0, 24576, 0); # old=, new=0 ; old > new +try( 0, 0, 0); # old=0 , new=0 ; old = new + +# (224-277) +# These tests all take place at the end of the file +$FLEN = 40960; # Force the file to be exactly 40960 bytes long +try(32768, 8192, 8192); # old= , new= , new= new +try(24576, 16384, 1917); # old= , new= new +try(16384, 24576, 3818); # old=, new= new +try(40960, 0, 2779); # old=0 , new= , new= ; old = new +try(24576, 16384, 8192); # old= , new= ; old > new +try(16384, 24576, 8192); # old=, new= ; old > new +try(40960, 0, 8192); # old=0 , new= ; old < new +try(32768, 8192, 10724); # old= , new= , new= new +try(16384, 24576, 15030); # old=, new= new +try(40960, 0, 11752); # old=0 , new= , new= ; old < new +try(24576, 16384, 16384); # old= , new= ; old = new +try(16384, 24576, 16384); # old=, new= ; old > new +try(40960, 0, 16384); # old=0 , new= ; old < new +try(32768, 8192, 24576); # old= , new=; old < new +try(24576, 16384, 24576); # old= , new=; old < new +try(16384, 24576, 24576); # old=, new=; old = new +try(40960, 0, 24576); # old=0 , new=; old < new +try(35973, 4987, 0); # old=x> , new=0 ; old > new +try(32768, 8192, 0); # old= , new=0 ; old > new +try(29932, 11028, 0); # old=x> , new=0 ; old > new +try(24576, 16384, 0); # old= , new=0 ; old > new +try(16384, 24576, 0); # old=, new=0 ; old > new +try(40960, 0, 0); # old=0 , new=0 ; old = new + +# (278-357) +# These tests all take place at the end of the file +$FLEN = 42000; # Force the file to be exactly 42000 bytes long +try(41275, 725, 4059); # old=x , new=x ; old < new +try(41683, 317, 317); # old=x , new=x ; old = new +try(41225, 775, 405); # old=x , new=x ; old > new +try(35709, 6291, 284); # old=x> new +try(42000, 0, 2434); # old=0 , new=x ; old < new +try(40960, 1040, 1608); # old= new +try(32768, 9232, 5604); # old= new +try(42000, 0, 6637); # old=0 , new= ; old < new +try(39994, 2006, 966); # old=x> ; old > new +try(42000, 0, 7152); # old=0 , new=x> ; old < new +try(41613, 387, 10601); # old=x , new=x> new +try(42000, 0, 9189); # old=0 , new=x> ; old < new +try(32768, 9232, 8192); # old= ; old > new +try(42000, 0, 8192); # old=0 , new= ; old < new +try(40960, 1040, 11778); # old= new +try(42000, 0, 8578); # old=0 , new= ; old < new +try(39618, 2382, 9534); # old=x> ; old < new +try(42000, 0, 15344); # old=0 , new=x> ; old < new +try(40960, 1040, 16384); # old= ; old < new +try(32768, 9232, 16384); # old= ; old < new +try(42000, 0, 16384); # old=0 , new= ; old < new +try(40960, 1040, 24576); # old=; old < new +try(32768, 9232, 24576); # old=; old < new +try(42000, 0, 24576); # old=0 , new=; old < new +try(41500, 500, 0); # old=x , new=0 ; old > new +try(40960, 1040, 0); # old= new +try(35272, 6728, 0); # old=x> new +try(32768, 9232, 0); # old= new +try(42000, 0, 0); # old=0 , new=0 ; old = new + + + +sub try { + my ($pos, $len, $newlen) = @_; + open F, "> $file" or die "Couldn't open file $file: $!"; + binmode F; + + # The record has exactly 17 characters. This will help ensure that + # even if _downcoopy screws up, the data doesn't coincidentally + # look good because the remainder accidentally lines up. + my $d = substr("0123456789abcdef$:", -17); + my $recs = defined($FLEN) ? + int($FLEN/length($d))+1 : # enough to make up at least $FLEN + int(8192*5/length($d))+1; # at least 5 blocks' worth + my $oldfile = $d x $recs; + my $flen = defined($FLEN) ? $FLEN : $recs * 17; + substr($oldfile, $FLEN) = "" if defined $FLEN; # truncate + print F $oldfile; + close F; + + die "wrong length!" unless -s $file == $flen; + + my $newdata = "-" x $newlen; + my $expected = $oldfile; + my $old = defined $len ? substr($expected, $pos, $len) + : substr($expected, $pos); + $old = "$newdata$old"; + my $x_retval; + if (defined $len) { + substr($expected, $pos, $len, substr($old, 0, $len, "")); + $x_retval = $old; + } else { + substr($expected, $pos) = $old; + $x_retval = ""; + } + + my $o = tie my @lines, 'Tie::File', $file or die $!; + local $SIG{ALRM} = sub { die "Alarm clock" }; + my $a_retval = eval { alarm(5) unless $^P; $o->_downcopy($newdata, $pos, $len) }; + my $err = $@; + undef $o; untie @lines; + if ($err) { + if ($err =~ /^Alarm clock/) { + print "# Timeout\n"; + print "not ok $N\n"; $N++; + print "not ok $N\n"; $N++; + return; + } else { + $@ = $err; + die; + } + } + + open F, "< $file" or die "Couldn't open file $file: $!"; + binmode F; + my $actual; + { local $/; + $actual = ; + } + close F; + + my ($alen, $xlen) = (length $actual, length $expected); + unless ($alen == $xlen) { + my @ARGS = @_; + for (@ARGS) { $_ = "UNDEF" unless defined } + print "# try(@ARGS) expected file length $xlen, actual $alen!\n"; + } + print $actual eq $expected ? "ok $N\n" : "not ok $N\n"; + $N++; + print $a_retval eq $x_retval ? "ok $N\n" : "not ok $N\n"; + $N++; + + if (defined $len) { + try($pos, undef, $newlen); + } +} + + + +use POSIX 'SEEK_SET'; +sub check_contents { + my @c = @_; + my $x = join $:, @c, ''; + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; +# my $open = open FH, "< $file"; + my $a; + { local $/; $a = } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + ctrlfix($a, $x); + print "not ok $N\n# expected <$x>, got <$a>\n"; + } + $N++; + + # now check FETCH: + my $good = 1; + my $msg; + for (0.. $#c) { + my $aa = $a[$_]; + unless ($aa eq "$c[$_]$:") { + $msg = "expected <$c[$_]$:>, got <$aa>"; + ctrlfix($msg); + $good = 0; + } + } + print $good ? "ok $N\n" : "not ok $N # $msg\n"; + $N++; + + print $o->_check_integrity($file, $ENV{INTEGRITY}) + ? "ok $N\n" : "not ok $N\n"; + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/29a_upcopy.t b/lib/Tie/File/t/29a_upcopy.t new file mode 100644 index 0000000..67e8526 --- /dev/null +++ b/lib/Tie/File/t/29a_upcopy.t @@ -0,0 +1,211 @@ +#!/usr/bin/perl +# +# Unit tests of _upcopy function +# +# _upcopy($self, $source, $dest, $len) +# +# Take a block of data of leength $len at $source and copy it +# to $dest, which must be <= $source but which need not be <= $source - $len +# (That is, this will only copy a block to a position earlier in the file, +# but the source and destination regions may overlap.) + + +my $file = "tf$$.txt"; + +print "1..55\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +$: = Tie::File::_default_recsep(); + +my @subtests = qw(x x> 0); + +$FLEN = 40970; # 2410 records of 17 chars each + +# (2-7) Trivial non-moves at start of file +try(0, 0, 0); +try(0, 0, 10); +try(0, 0, 100); +try(0, 0, 1000); +try(0, 0, 10000); +try(0, 0, 20000); + +# (8-13) Trivial non-moves in middle of file +try(100, 100, 0); +try(100, 100, 10); +try(100, 100, 100); +try(100, 100, 1000); +try(100, 100, 10000); +try(100, 100, 20000); + +# (14) Trivial non-move at end of file +try($FLEN, $FLEN, 0); + +# (15-17) Trivial non-move of tail of file +try(0, 0, undef); +try(100, 100, undef); +try($FLEN, $FLEN, undef); + +# (18-24) Moves to start of file +try(100, 0, 0); +try(100, 0, 10); +try(100, 0, 100); +try(100, 0, 1000); +try(100, 0, 10000); +try(100, 0, 20000); +try(100, 0, undef); + +# (25-31) Moves in middle of file +try(200, 100, 0); +try(200, 100, 10); +try(200, 100, 100); +try(200, 100, 1000); +try(200, 100, 10000); +try(200, 100, 20000); +try(200, 100, undef); + +# (32-43) Moves from end of file +try($FLEN, 10000, 0); +try($FLEN-10, 10000, 10); +try($FLEN-100, 10000, 100); +try($FLEN-1000, 200, 1000); +try($FLEN-10000, 200, 10000); +try($FLEN-20000, 200, 20000); +try($FLEN, 10000, undef); +try($FLEN-10, 10000, undef); +try($FLEN-100, 10000, undef); +try($FLEN-1000, 200, undef); +try($FLEN-10000, 200, undef); +try($FLEN-20000, 200, undef); + +$FLEN = 40960; + +# (44-55) Moves from end of file when file ends on a block boundary +try($FLEN, 10000, 0); +try($FLEN-10, 10000, 10); +try($FLEN-100, 10000, 100); +try($FLEN-1000, 200, 1000); +try($FLEN-10000, 200, 10000); +try($FLEN-20000, 200, 20000); +try($FLEN, 10000, undef); +try($FLEN-10, 10000, undef); +try($FLEN-100, 10000, undef); +try($FLEN-1000, 200, undef); +try($FLEN-10000, 200, undef); +try($FLEN-20000, 200, undef); + +sub try { + my ($src, $dst, $len) = @_; + open F, "> $file" or die "Couldn't open file $file: $!"; + binmode F; + + # The record has exactly 17 characters. This will help ensure that + # even if _upcopy screws up, the data doesn't coincidentally + # look good because the remainder accidentally lines up. + my $d = substr("0123456789abcdef$:", -17); + my $recs = defined($FLEN) ? + int($FLEN/length($d))+1 : # enough to make up at least $FLEN + int(8192*5/length($d))+1; # at least 5 blocks' worth + my $oldfile = $d x $recs; + my $flen = defined($FLEN) ? $FLEN : $recs * 17; + substr($oldfile, $FLEN) = "" if defined $FLEN; # truncate + print F $oldfile; + close F; + + die "wrong length!" unless -s $file == $flen; + + # If len is specified, use that. If it's undef, + # then behave *as if* we had specified the whole rest of the file + my $expected = $oldfile; + if (defined $len) { + substr($expected, $dst, $len) = substr($expected, $src, $len); + } else { + substr($expected, $dst) = substr($expected, $src); + } + + my $o = tie my @lines, 'Tie::File', $file or die $!; + local $SIG{ALRM} = sub { die "Alarm clock" }; + my $a_retval = eval { alarm(5) unless $^P; $o->_upcopy($src, $dst, $len) }; + my $err = $@; + undef $o; untie @lines; + if ($err) { + if ($err =~ /^Alarm clock/) { + print "# Timeout\n"; + print "not ok $N\n"; $N++; + return; + } else { + $@ = $err; + die; + } + } + + open F, "< $file" or die "Couldn't open file $file: $!"; + binmode F; + my $actual; + { local $/; + $actual = ; + } + close F; + + my ($alen, $xlen) = (length $actual, length $expected); + unless ($alen == $xlen) { + print "# try(@_) expected file length $xlen, actual $alen!\n"; + } + print $actual eq $expected ? "ok $N\n" : "not ok $N\n"; + $N++; +} + + + +use POSIX 'SEEK_SET'; +sub check_contents { + my @c = @_; + my $x = join $:, @c, ''; + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; +# my $open = open FH, "< $file"; + my $a; + { local $/; $a = } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + ctrlfix($a, $x); + print "not ok $N\n# expected <$x>, got <$a>\n"; + } + $N++; + + # now check FETCH: + my $good = 1; + my $msg; + for (0.. $#c) { + my $aa = $a[$_]; + unless ($aa eq "$c[$_]$:") { + $msg = "expected <$c[$_]$:>, got <$aa>"; + ctrlfix($msg); + $good = 0; + } + } + print $good ? "ok $N\n" : "not ok $N # $msg\n"; + $N++; + + print $o->_check_integrity($file, $ENV{INTEGRITY}) + ? "ok $N\n" : "not ok $N\n"; + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/30_defer.t b/lib/Tie/File/t/30_defer.t index 7503829..063b3a7 100644 --- a/lib/Tie/File/t/30_defer.t +++ b/lib/Tie/File/t/30_defer.t @@ -125,17 +125,14 @@ check_caches({map(($_ => "record$_$:"), 5..7)}, check_contents($data); $a[2] = "recordC"; # That should flush the whole darn defer -# Flushing the defer requires looking up the true lengths of records -# 0..2, which flushes out the read cache, leaving only 1..2 there. -# Then the splicer updates the cached versions of 1..2 to contain the -# new data -check_caches({1 => "recordB$:", 2 => "recordC$:"}, +# This shouldn't change the cache contents +check_caches({map(($_ => "record$_$:"), 5..7)}, {}); # URRRP check_contents(join("$:", qw(recordA recordB recordC record3 record4 record5 record6 record7)) . "$:"); $a[3] = "recordD"; # even though we flushed, deferring is STILL ENABLED -check_caches({1 => "recordB$:", 2 => "recordC$:"}, +check_caches({map(($_ => "record$_$:"), 5..7)}, {3 => "recordD$:"}); check_contents(join("$:", qw(recordA recordB recordC record3 record4 record5 record6 record7)) . "$:"); @@ -143,86 +140,84 @@ check_contents(join("$:", qw(recordA recordB recordC # Check readcache-deferbuffer interactions # (45-47) This should remove outdated data from the read cache -$a[2] = "recordE"; -check_caches({1 => "recordB$:", }, - {3 => "recordD$:", 2 => "recordE$:"}); +$a[5] = "recordE"; +check_caches({6 => "record6$:", 7 => "record7$:"}, + {3 => "recordD$:", 5 => "recordE$:"}); check_contents(join("$:", qw(recordA recordB recordC record3 record4 record5 record6 record7)) . "$:"); # (48-51) This should read back out of the defer buffer # without adding anything to the read cache my $z; -$z = $a[2]; +$z = $a[5]; print $z eq "recordE" ? "ok $N\n" : "not ok $N\n"; $N++; -check_caches({1 => "recordB$:", }, - {3 => "recordD$:", 2 => "recordE$:"}); +check_caches({6 => "record6$:", 7 => "record7$:"}, + {3 => "recordD$:", 5 => "recordE$:"}); check_contents(join("$:", qw(recordA recordB recordC record3 record4 record5 record6 record7)) . "$:"); # (52-55) This should repopulate the read cache with a new record $z = $a[0]; print $z eq "recordA" ? "ok $N\n" : "not ok $N\n"; $N++; -check_caches({1 => "recordB$:", 0 => "recordA$:"}, - {3 => "recordD$:", 2 => "recordE$:"}); +check_caches({0 => "recordA$:", 6 => "record6$:", 7 => "record7$:"}, + {3 => "recordD$:", 5 => "recordE$:"}); check_contents(join("$:", qw(recordA recordB recordC record3 record4 record5 record6 record7)) . "$:"); # (56-59) This should flush the LRU record from the read cache -$z = $a[4]; $z = $a[5]; -print $z eq "record5" ? "ok $N\n" : "not ok $N\n"; $N++; -check_caches({5 => "record5$:", 0 => "recordA$:", 4 => "record4$:"}, - {3 => "recordD$:", 2 => "recordE$:"}); +$z = $a[4]; +print $z eq "record4" ? "ok $N\n" : "not ok $N\n"; $N++; +check_caches({7 => "record7$:", 0 => "recordA$:", 4 => "record4$:"}, + {3 => "recordD$:", 5 => "recordE$:"}); check_contents(join("$:", qw(recordA recordB recordC record3 record4 record5 record6 record7)) . "$:"); # (60-63) This should FLUSH the deferred buffer -# In doing so, it will read in records 2 and 3, flushing 0 and 4 -# from the read cache, leaving 2, 3, and 5. $z = splice @a, 3, 1, "recordZ"; print $z eq "recordD" ? "ok $N\n" : "not ok $N\n"; $N++; -check_caches({5 => "record5$:", 3 => "recordZ$:", 2 => "recordE$:"}, +check_caches({7 => "record7$:", 0 => "recordA$:", 4 => "record4$:", 3 => "recordZ$:"}, {}); -check_contents(join("$:", qw(recordA recordB recordE - recordZ record4 record5 record6 record7)) . "$:"); +check_contents(join("$:", qw(recordA recordB recordC + recordZ record4 recordE record6 record7)) . "$:"); # (64-66) We should STILL be in deferred writing mode $a[5] = "recordX"; -check_caches({3 => "recordZ$:", 2 => "recordE$:"}, +check_caches({7 => "record7$:", 0 => "recordA$:", 4 => "record4$:", 3 => "recordZ$:"}, {5 => "recordX$:"}); -check_contents(join("$:", qw(recordA recordB recordE - recordZ record4 record5 record6 record7)) . "$:"); +check_contents(join("$:", qw(recordA recordB recordC + recordZ record4 recordE record6 record7)) . "$:"); # Fill up the defer buffer again $a[4] = "recordP"; # (67-69) This should OVERWRITE the existing deferred record # and NOT flush the buffer $a[5] = "recordQ"; -check_caches({3 => "recordZ$:", 2 => "recordE$:"}, +check_caches({7 => "record7$:", 0 => "recordA$:", 3 => "recordZ$:"}, {5 => "recordQ$:", 4 => "recordP$:"}); -check_contents(join("$:", qw(recordA recordB recordE - recordZ record4 record5 record6 record7)) . "$:"); - +check_contents(join("$:", qw(recordA recordB recordC + recordZ record4 recordE record6 record7)) . "$:"); # (70-72) Discard should just dump the whole deferbuffer $o->discard; -check_caches({3 => "recordZ$:", 2 => "recordE$:"}, +check_caches({7 => "record7$:", 0 => "recordA$:", 3 => "recordZ$:"}, {}); -check_contents(join("$:", qw(recordA recordB recordE - recordZ record4 record5 record6 record7)) . "$:"); +check_contents(join("$:", qw(recordA recordB recordC + recordZ record4 recordE record6 record7)) . "$:"); + # (73-75) NOW we are out of deferred writing mode $a[0] = "recordF"; -check_caches({3 => "recordZ$:", 2 => "recordE$:", 0 => "recordF$:"}, +check_caches({7 => "record7$:", 0 => "recordF$:", 3 => "recordZ$:"}, {}); -check_contents(join("$:", qw(recordF recordB recordE - recordZ record4 record5 record6 record7)) . "$:"); +check_contents(join("$:", qw(recordF recordB recordC + recordZ record4 recordE record6 record7)) . "$:"); # (76-79) Last call--untying the array should flush the deferbuffer $o->defer; $a[0] = "flushed"; -check_caches({3 => "recordZ$:", 2 => "recordE$:"}, +check_caches({7 => "record7$:", 3 => "recordZ$:"}, {0 => "flushed$:" }); -check_contents(join("$:", qw(recordF recordB recordE - recordZ record4 record5 record6 record7)) . "$:"); +check_contents(join("$:", qw(recordF recordB recordC + recordZ record4 recordE record6 record7)) . "$:"); undef $o; untie @a; # (79) We can't use check_contents any more, because the object is dead @@ -230,8 +225,8 @@ open F, "< $file" or die; binmode F; { local $/ ; $z = } close F; -my $x = join("$:", qw(flushed recordB recordE - recordZ record4 record5 record6 record7)) . "$:"; +my $x = join("$:", qw(flushed recordB recordC + recordZ record4 recordE record6 record7)) . "$:"; if ($z eq $x) { print "ok $N\n"; } else { @@ -326,6 +321,8 @@ sub ctrlfix { } END { + undef $o; + untie @a if tied @a; 1 while unlink $file; } diff --git a/lib/Tie/File/t/42_offset.t b/lib/Tie/File/t/42_offset.t new file mode 100644 index 0000000..1762443 --- /dev/null +++ b/lib/Tie/File/t/42_offset.t @@ -0,0 +1,74 @@ +#!/usr/bin/perl + +# 2003-04-09 Tels: test the offset method from 0.94 + +use Test::More; +use strict; +use File::Spec; + +use POSIX 'SEEK_SET'; +my $file = "tf$$.txt"; + +BEGIN + { + $| = 1; + if ($ENV{PERL_CORE}) + { + # testing with the core distribution + @INC = ( File::Spec->catdir(File::Spec->updir, 't', 'lib') ); + } + unshift @INC, File::Spec->catdir(File::Spec->updir, 'lib'); + chdir 't' if -d 't'; + print "# INC = @INC\n"; + + plan tests => 24; + + use_ok ('Tie::File'); + } + +$/ = "#"; # avoid problems with \n\r vs. \n + +my @a; +my $o = tie @a, 'Tie::File', $file, autodefer => 0; + +is (ref($o), 'Tie::File'); + +is ($o->offset(0), 0, 'first one always there'); +is ($o->offset(1), undef, 'no offsets yet'); + +$a[0] = 'Bourbon'; +is ($o->offset(0), 0, 'first is ok'); +is ($o->offset(1), 8, 'and second ok'); +is ($o->offset(2), undef, 'third undef'); + +$a[1] = 'makes'; +is ($o->offset(0), 0, 'first is ok'); +is ($o->offset(1), 8, 'and second ok'); +is ($o->offset(2), 14, 'and third ok'); +is ($o->offset(3), undef, 'fourth undef'); + +$a[2] = 'the baby'; +is ($o->offset(0), 0, 'first is ok'); +is ($o->offset(1), 8, 'and second ok'); +is ($o->offset(2), 14, 'and third ok'); +is ($o->offset(3), 23, 'and fourth ok'); +is ($o->offset(4), undef, 'fourth undef'); + +$a[3] = 'grin'; +is ($o->offset(0), 0, 'first is ok'); +is ($o->offset(1), 8, 'and second ok'); +is ($o->offset(2), 14, 'and third ok'); +is ($o->offset(3), 23, 'and fourth ok'); +is ($o->offset(4), 28, 'and fifth ok'); + +$a[4] = '!'; +is ($o->offset(5), 30, 'and fifth ok'); +$a[3] = 'water'; +is ($o->offset(4), 29, 'and fourth changed ok'); +is ($o->offset(5), 31, 'and fifth ok'); + +END { + undef $o; + untie @a; + 1 while unlink $file; +}