From: Jarkko Hietaniemi Date: Mon, 1 Apr 2002 02:55:22 +0000 (+0000) Subject: Upgrade to Tie::File 0.91, from mjd. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=27531ffbbae52c3c662973c4df1f79bc672a8573;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Tie::File 0.91, from mjd. p4raw-id: //depot/perl@15651 --- diff --git a/MANIFEST b/MANIFEST index df1cdd5..1e9e29f 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1447,6 +1447,9 @@ lib/Tie/File/t/20_cache_full.t Test for Tie::File lib/Tie/File/t/21_win32.t Test for Tie::File lib/Tie/File/t/22_autochomp.t Test for Tie::File lib/Tie/File/t/23_rv_ac_splice.t Test for Tie::File +lib/Tie/File/t/24_cache_loop.t Test for Tie::File +lib/Tie/File/t/25_gen_nocache.t Test for Tie::File +lib/Tie/File/t/26_twrite.t Test for Tie::File lib/Tie/File/t/30_defer.t Test for Tie::File lib/Tie/File/t/31_autodefer.t Test for Tie::File lib/Tie/File/t/32_defer_misc.t Test for Tie::File diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm index d0888df..1186f00 100644 --- a/lib/Tie/File.pm +++ b/lib/Tie/File.pm @@ -3,9 +3,9 @@ package Tie::File; require 5.005; use Carp; use POSIX 'SEEK_SET'; -use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX'; +use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX', 'O_ACCMODE', 'O_RDONLY'; -$VERSION = "0.90"; +$VERSION = "0.91"; my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes my $DEFAULT_AUTODEFER_THRESHHOLD = 3; # 3 records my $DEFAULT_AUTODEFER_FILELEN_THRESHHOLD = 65536; # 16 disk blocksful @@ -73,7 +73,9 @@ sub TIEARRAY { $opts{autochomp} = 1 unless defined $opts{autochomp}; - my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR; + $opts{mode} = O_CREAT|O_RDWR unless defined $opts{mode}; + $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY); + my $fh; if (UNIVERSAL::isa($file, 'GLOB')) { @@ -90,7 +92,7 @@ sub TIEARRAY { croak "usage: tie \@array, $pack, filename, [option => value]..."; } else { $fh = \do { local *FH }; # only works in 5.005 and later - sysopen $fh, $file, $mode, 0666 or return; + sysopen $fh, $file, $opts{mode}, 0666 or return; binmode $fh; } { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write @@ -148,7 +150,8 @@ sub _fetch { return $cached if defined $cached; } - unless ($#{$self->{offsets}} >= $n) { + if ($#{$self->{offsets}} < $n) { + return if $self->{eof}; my $o = $self->_fill_offsets_to($n); # If it's still undefined, there is no such record, so return 'undef' return unless defined $o; @@ -256,8 +259,10 @@ sub FETCHSIZE { my $self = shift; my $n = $#{$self->{offsets}}; # 20020317 Change this to binary search - while (defined ($self->_fill_offsets_to($n+1))) { - ++$n; + unless ($self->{eof}) { + while (defined ($self->_fill_offsets_to($n+1))) { + ++$n; + } } my $top_deferred = $self->_defer_max; $n = $top_deferred+1 if defined $top_deferred && $n < $top_deferred+1; @@ -443,7 +448,7 @@ sub _splice { # compute length of data being removed for ($pos .. $pos+$nrecs-1) { - $self->_fill_offsets_to($_); + last unless defined $self->_fill_offsets_to($_); my $rec = $self->_fetch($_); last unless defined $rec; push @result, $rec; @@ -451,9 +456,9 @@ sub _splice { # Why don't we just use length($rec) here? # Because that record might have come from the cache. _splice # might have been called to flush out the deferred-write records, - # and in this case length($rec) is the length of the record to be *written*, - # not the length of the actual record in the file. But the offsets are - # still true. 20020322 + # and in this case length($rec) is the length of the record to be + # *written*, not the length of the actual record in the file. But + # the offsets are still true. 20020322 $oldlen += $self->{offsets}[$_+1] - $self->{offsets}[$_] if defined $self->{offsets}[$_+1]; } @@ -470,6 +475,11 @@ sub _splice { push @new_offsets, $new_offsets[-1] + length($data[$_]); } } + + # If we're about to splice out the end of the offsets table... + if ($pos + $nrecs >= @{$self->{offsets}}) { + $self->{eof} = 0; # ... the table is no longer complete + } splice(@{$self->{offsets}}, $pos, $nrecs, @new_offsets); # update the offsets table part 2 @@ -480,6 +490,9 @@ sub _splice { # If we scrubbed out all known offsets, regenerate the trivial table # that knows that the file does indeed start at 0. $self->{offsets}[0] = 0 unless @{$self->{offsets}}; + # If the file got longer, the offsets table is no longer complete + $self->{eof} = 0 if @data > $nrecs; + # Perhaps the following cache foolery could be factored out # into a bunch of mor opaque cache functions. For example, @@ -574,6 +587,7 @@ sub _twrite { sub _fixrecs { my $self = shift; for (@_) { + $_ = "" unless defined $_; $_ .= $self->{recsep} unless substr($_, - $self->{recseplen}) eq $self->{recsep}; } @@ -611,6 +625,9 @@ sub _seekb { # return the offset of record $n sub _fill_offsets_to { my ($self, $n) = @_; + + return $self->{offsets}[$n] if $self->{eof}; + my $fh = $self->{fh}; local *OFF = $self->{offsets}; my $rec; @@ -622,6 +639,7 @@ sub _fill_offsets_to { if (defined $rec) { push @OFF, tell $fh; } else { + $self->{eof} = 1; return; # It turns out there is no such record } } @@ -637,7 +655,7 @@ sub _write_record { my $fh = $self->{fh}; print $fh $rec or die "Couldn't write record: $!"; # "Should never happen." - $self->{_written} += length($rec); +# $self->{_written} += length($rec); } sub _read_record { @@ -647,11 +665,23 @@ sub _read_record { my $fh = $self->{fh}; $rec = <$fh>; } - $self->{_read} += length($rec) if defined $rec; + return unless defined $rec; + if (substr($rec, -$self->{recseplen}) ne $self->{recsep}) { + # improperly terminated final record --- quietly fix it. +# my $ac = substr($rec, -$self->{recseplen}); +# $ac =~ s/\n/\\n/g; + unless ($self->{rdonly}) { + my $fh = $self->{fh}; + print $fh $self->{recsep}; + } + $rec .= $self->{recsep}; + } +# $self->{_read} += length($rec) if defined $rec; $rec; } sub _rw_stats { + my $self = shift; @{$self}{'_read', '_written'}; } @@ -989,8 +1019,18 @@ sub _check_integrity { _ci_warn("rec $n: cached <$cached> actual <$_>"); } if (defined $cached && substr($cached, -$rsl) ne $rs) { + $good = 0; _ci_warn("rec $n in the cache is missing the record separator"); } + if (! defined $offset && $self->{eof}) { + $good = 0; + _ci_warn("The offset table was marked complete, but it is missing element $."); + } + } + if (@{$self->{offsets}} > $.+1) { + $good = 0; + my $n = @{$self->{offsets}}; + _ci_warn("The offset table has $n items, but the file has only $."); } my $deferring = $self->_is_deferring; @@ -1347,7 +1387,7 @@ sub empty { $self->[0][0] = 0; # might as well reset the sequence numbers } -# notify the parent cache objec tthat we moved something +# notify the parent cache object that we moved something sub _heap_move { my $self = shift; $self->_cache->_heap_move(@_); @@ -1369,7 +1409,7 @@ sub _insert_new { my $i = @$self; $i = int($i/2) until defined $self->[$i/2]; $self->[$i] = $item; - $self->_heap_move($self->[$i][KEY], $i); + $self->[0][1]->_heap_move($self->[$i][KEY], $i); $self->_nelts_inc; } @@ -1382,7 +1422,7 @@ sub _insert { until (! defined $self->[$i]) { if ($self->[$i][SEQ] > $item->[SEQ]) { # inserted item is older ($self->[$i], $item) = ($item, $self->[$i]); - $self->_heap_move($self->[$i][KEY], $i); + $self->[0][1]->_heap_move($self->[$i][KEY], $i); } # If either is undefined, go that way. Otherwise, choose at random my $dir; @@ -1392,7 +1432,7 @@ sub _insert { $i = 2*$i + $dir; } $self->[$i] = $item; - $self->_heap_move($self->[$i][KEY], $i); + $self->[0][1]->_heap_move($self->[$i][KEY], $i); $self->_nelts_inc; } @@ -1419,10 +1459,10 @@ sub remove { } $self->[$i] = $self->[$ii]; # Promote child to fill vacated spot - $self->_heap_move($self->[$i][KEY], $i); + $self->[0][1]->_heap_move($self->[$i][KEY], $i); $i = $ii; # Fill new vacated spot } - $self->_heap_move($top->[KEY], undef); + $self->[0][1]->_heap_move($top->[KEY], undef); undef $self->[$i]; $self->_nelts_dec; return $top->[DAT]; @@ -1452,7 +1492,7 @@ sub promote { } @{$self}[$i, $dir] = @{$self}[$dir, $i]; for ($i, $dir) { - $self->_heap_move($self->[$_][KEY], $_) if defined $self->[$_]; + $self->[0][1]->_heap_move($self->[$_][KEY], $_) if defined $self->[$_]; } $i = $dir; } @@ -1541,10 +1581,6 @@ sub _nodes { ($self->[$i], $self->_nodes($i*2), $self->_nodes($i*2+1)); } -1; - - - "Cogito, ergo sum."; # don't forget to return a true value from the file =head1 NAME @@ -1610,7 +1646,7 @@ contained the following data: Curse these pesky flies!\n -then the C<@array> would appear to have four elements: +then the C<@array> would appear to have four elements: "Curse th" "e p" @@ -1622,7 +1658,7 @@ special "paragraph mode" semantics (E la C<$/ = "">) are not emulated. Records read from the tied array do not have the record separator -string on the end; this is to allow +string on the end; this is to allow $array[17] .= "extra"; @@ -1639,7 +1675,7 @@ the same thing: The result is that the contents of line 17 of the file will be replaced with "Cherry pie"; a newline character will separate line 17 -from line 18. This means that in particular, this will do nothing: +from line 18. This means that this code will do nothing: chomp $array[17]; @@ -1647,9 +1683,10 @@ Because the Ced value will have the separator reattached when it is written back to the file. There is no way to create a file whose trailing record separator string is missing. -Inserting records that I the record separator string will -produce a reasonable result, but if you can't foresee what this result -will be, you'd better avoid doing this. +Inserting records that I the record separator string is not +supported by this module. It will probably produce a reasonable +result, but what this result will be may change in a future version. +Use 'splice' to insert records or to replace one record with several. =head2 C @@ -1718,10 +1755,15 @@ desired cache size, in bytes. Setting the memory limit to 0 will inhibit caching; records will be fetched from disk every time you examine them. +The C value is not an absolute or exact limit on the memory +used. C objects contains some structures besides the read +cache and the deferred write buffer, whose sizes are not charged +against C. + =head2 C (This is an advanced feature. Skip this section on first reading.) - + If you use deferred writing (See L<"Deferred Writing">, below) then data you write into the array will not be written directly to the file; instead, it will be saved in the I to be @@ -1748,7 +1790,7 @@ idea. =head1 Public Methods -The C call returns an object, say C<$o>. You may call +The C call returns an object, say C<$o>. You may call $rec = $o->FETCH($n); $o->STORE($n, $rec); @@ -1824,9 +1866,9 @@ C, you may use: Handles that were opened write-only won't work. Handles that were opened read-only will work as long as you don't try to modify the array. Handles must be attached to seekable sources of data---that -means no pipes or sockets. If you supply a non-seekable handle, the -C call will try to throw an exception. (On Unix systems, it -B throw an exception.) +means no pipes or sockets. If C can detect that you +supplied a non-seekable handle, the C call will throw an +exception. (On Unix systems, it can detect this.) =head1 Deferred Writing @@ -1889,7 +1931,7 @@ deferred writes. If the deferred-write buffer isn't yet full, but the total size of the buffer and the read cache would exceed the C limit, the oldest -records will be flushed out of the read cache until total usage is +records will be expired from the read cache until the total size is under the limit. C, C, C, C, and C cannot be @@ -1899,14 +1941,22 @@ This may change in a future version. If you resize the array with deferred writing enabled, the file will be resized immediately, but deferred records will not be written. +This has a surprising consequence: C<@a = (...)> erases the file +immediately, but the writing of the actual data is deferred. This +might be a bug. If it is a bug, it will be fixed in a future version. =head2 Autodeferring C tries to guess when deferred writing might be helpful, -and to turn it on and off automatically. In the example above, only -the first two assignments will be done immediately; after this, all -the changes to the file will be deferred up to the user-specified -memory limit. +and to turn it on and off automatically. + + for (@a) { + $_ = "> $_"; + } + +In this example, only the first two assignments will be done +immediately; after this, all the changes to the file will be deferred +up to the user-specified memory limit. You should usually be able to ignore this and just use the module without thinking about deferring. However, special applications may @@ -1921,6 +1971,9 @@ or tie @array, 'Tie::File', $file, autodefer => 0; +Similarly, C<-Eautodefer(1)> re-enables autodeferment, and +C<-Eautodefer()> recovers the current value of the autodefer setting. + =head1 CAVEATS (That's Latin for 'warnings'.) @@ -1937,7 +1990,7 @@ incompatible ways from one version to the next, without warning. That has happened at least once already. The interface will freeze before Perl 5.8 is released, probably sometime in April 2002. -=item * +=item * Reasonable effort was made to make this module efficient. Nevertheless, changing the size of a record in the middle of a large file will @@ -1958,7 +2011,7 @@ get the empty string, so the supposedly-C'ed value will be defined. Similarly, if you have C disabled, then # This DOES print "How unusual!" if 'autochomp' is disabled - undef $a[10]; + undef $a[10]; print "How unusual!\n" if $a[10]; Because when C is disabled, C<$a[10]> will read back as @@ -1971,7 +2024,7 @@ and C, but in general, the correspondence is extremely close. Not quite every effort was made to make this module as efficient as possible. C should use binary search instead of linear -search. The cache's LRU queue should be a heap instead of a list. +search. The performance of the C method could be improved. At present, it still rewrites the tail of the file once for each block of @@ -1979,18 +2032,17 @@ contiguous lines to be changed. In the typical case, this will result in only one rewrite, but in peculiar cases it might be bad. It should be possible to perform I deferred writing with a single rewrite. -These defects are probably minor; in any event, they will be fixed in -a future version of the module. +Profiling suggests that these defects are probably minor; in any +event, they will be fixed in a future version of the module. =item * -The author has supposed that since this module is concerned with file -I/O, almost all normal use of it will be heavily I/O bound, and that -the time to maintain complicated data structures inside the module -will be dominated by the time to actually perform the I/O. This -suggests, for example, that an LRU read-cache is a good tradeoff, even -if it requires substantial bookkeeping following a C -operation. +I have supposed that since this module is concerned with file I/O, +almost all normal use of it will be heavily I/O bound. This means +that the time to maintain complicated data structures inside the +module will be dominated by the time to actually perform the I/O. +When there was an opportunity to spend CPU time to avoid doing I/O, I +tried to take it. =item * @@ -2008,87 +2060,11 @@ well-defined and stable subclassing API. =head1 WHAT ABOUT C? -C's C feature does something similar to -C, but there are a number of reasons that you might prefer -C. C is a great piece of software, but the -C part is less great than the rest of it. - -=over 4 - -=item * - -C reads your entire file into memory, modifies it in memory, -and the writes out the entire file again when you untie the file. -This is completely impractical for large files. +People sometimes point out that L will do something similar, +and ask why C module is necessary. -C does not do any of those things. It doesn't try to read -the entire file into memory; instead it uses a lazy approach and -caches recently-used records. The cache size is strictly bounded by -the C option. DB_File's C<-E{cachesize}> doesn't prevent -your process from blowing up when reading a big file. - -=item * - -C has an extremely poor writing strategy. If you have a -ten-megabyte file and tie it with C, and then use - - $a[0] =~ s/PERL/Perl/; - -C will then read the entire ten-megabyte file into memory, do -the change, and write the entire file back to disk, reading ten -megabytes and writing ten megabytes. C will read and write -only the first record. - -If you have a million-record file and tie it with C, and then -use - - $a[999998] =~ s/Larry/Larry Wall/; - -C will read the entire million-record file into memory, do -the change, and write the entire file back to disk. C will -only rewrite records 999998 and 999999. During the writing process, -it will never have more than a few kilobytes of data in memory at any -time, even if the two records are very large. - -=item * - -Since changes to C files only appear when you do C, it -can be inconvenient to arrange for concurrent access to the same file -by two or more processes. Each process needs to call C<$db-Esync> -after every write. When you change a C array, the changes -are reflected in the file immediately; no explicit C<-Esync> call -is required. (Or you can enable deferred writing mode to require that -changes be explicitly sync'ed.) - -=item * - -C is only installed by default if you already have the C -library on your system; C is pure Perl and is installed by -default no matter what. Starting with Perl 5.7.3 you can be -absolutely sure it will be everywhere. You will never have that -surety with C. If you don't have C yet, it requires -a C compiler. You can install C from CPAN in five minutes -with no compiler. - -=item * - -C is written in C, so if you aren't allowed to install -modules on your system, it is useless. C is written in Perl, -so even if you aren't allowed to install modules, you can look into -the source code, see how it works, and copy the subroutines or the -ideas from the subroutines directly into your own Perl program. - -=item * - -Except in very old, unsupported versions, C's free license -requires that you distribute the source code for your entire -application. If you are not able to distribute the source code for -your application, you must negotiate an alternative license from -Sleepycat, possibly for a fee. Tie::File is under the Perl Artistic -license and can be distributed free under the same terms as Perl -itself. - -=back +There are a number of reasons that you might prefer C. +A list is available at C. =head1 AUTHOR @@ -2163,15 +2139,17 @@ Slaven Rezic / Peter Scott / Peter Somu / Autrijus Tang (again) / -Tels +Tels / +Juerd Wallboer =head1 TODO -More tests. (_twrite should be tested separately, because there are a -lot of weird special cases lurking in there.) +More tests. (The cache and heap modules need more unit tests.) Improve SPLICE algorithm to use deferred writing machinery. +Cleverer strategy for flushing deferred writes. + More tests. (Stuff I didn't think of yet.) Paragraph mode? @@ -2183,15 +2161,7 @@ Maybe an autolocking mode? Record locking with fcntl()? Then the module might support an undo log and get real transactions. What a tour de force that would be. -Cleverer strategy for flushing deferred writes. - -oMore tests. +More tests. =cut - - - - - - diff --git a/lib/Tie/File/t/00_version.t b/lib/Tie/File/t/00_version.t index cfe05bc..a4135fe 100644 --- a/lib/Tie/File/t/00_version.t +++ b/lib/Tie/File/t/00_version.t @@ -2,7 +2,7 @@ print "1..1\n"; -my $testversion = "0.90"; +my $testversion = "0.91"; use Tie::File; if ($Tie::File::VERSION != $testversion) { diff --git a/lib/Tie/File/t/01_gen.t b/lib/Tie/File/t/01_gen.t index f86fdd4..b91a074 100644 --- a/lib/Tie/File/t/01_gen.t +++ b/lib/Tie/File/t/01_gen.t @@ -2,7 +2,7 @@ my $file = "tf$$.txt"; -print "1..68\n"; +print "1..72\n"; my $N = 1; use Tie::File; @@ -81,6 +81,28 @@ check_contents("", "0", "", "rec3"); $a[1] = "whoops"; check_contents("", "whoops", "", "rec3"); +# (69-72) make sure that undefs are treated correctly---they should +# be converted to empty records, and should not raise any warnings. +# (Some of these failed in 0.90. The change to _fixrec fixed them.) +# 20020331 +{ + my $good = 1; my $warn; + # If any of these raise warnings, we have a problem. + local $SIG{__WARN__} = sub { $good = 0; $warn = shift(); ctrlfix($warn)}; + local $^W = 1; + @a = (1); + $a[0] = undef; + print $good ? "ok $N\n" : "not ok $N # $warn\n"; + $N++; $good = 1; + print defined($a[0]) ? "ok $N\n" : "not ok $N\n"; + $N++; $good = 1; + $a[3] = '3'; + print defined($a[1]) ? "ok $N\n" : "not ok $N\n"; + $N++; $good = 1; + undef $a[3]; + print $good ? "ok $N\n" : "not ok $N # $warn\n"; + $N++; $good = 1; +} use POSIX 'SEEK_SET'; sub check_contents { diff --git a/lib/Tie/File/t/04_splice.t b/lib/Tie/File/t/04_splice.t index ed0b43f..156adc3 100644 --- a/lib/Tie/File/t/04_splice.t +++ b/lib/Tie/File/t/04_splice.t @@ -11,10 +11,11 @@ # Then, it checks the actual contents of the file against the expected # contents. + my $file = "tf$$.txt"; $: = Tie::File::_default_recsep(); my $data = "rec0$:rec1$:rec2$:"; -print "1..101\n"; +print "1..106\n"; init_file($data); @@ -170,7 +171,31 @@ splice @a, 4; check_contents("0$:1$:2$:3$:"); splice @a; check_contents(""); + +# (102-103) I think there's a bug here---it will fail to clear the EOF flag +@a = (0..11); +splice @a, -1, 1000; +check_contents("0$:1$:2$:3$:4$:5$:6$:7$:8$:9$:10$:"); +# (104-106) make sure that undefs are treated correctly---they should +# be converted to empty records, and should not raise any warnings. +# (Some of these failed in 0.90. The change to _fixrec fixed them.) +# 20020331 +{ + my $good = 1; my $warn; + # If any of these raise warnings, we have a problem. + local $SIG{__WARN__} = sub { $good = 0; $warn = shift(); ctrlfix($warn)}; + local $^W = 1; + @a = (1); + splice @a, 1, 0, undef, undef, undef; + print $good ? "ok $N\n" : "not ok $N # $warn\n"; + $N++; $good = 1; + print defined($a[2]) ? "ok $N\n" : "not ok $N\n"; + $N++; $good = 1; + my @r = splice @a, 2; + print defined($r[0]) ? "ok $N\n" : "not ok $N\n"; + $N++; $good = 1; +} sub init_file { my $data = shift; diff --git a/lib/Tie/File/t/08_ro.t b/lib/Tie/File/t/08_ro.t index 8f3d998..5fd8933 100644 --- a/lib/Tie/File/t/08_ro.t +++ b/lib/Tie/File/t/08_ro.t @@ -6,7 +6,7 @@ my $file = "tf$$.txt"; $: = Tie::File::_default_recsep(); -print "1..9\n"; +print "1..13\n"; my $N = 1; use Tie::File; @@ -36,6 +36,47 @@ sub init_file { close F; } +undef $o; untie @a; +my $badrec = "Malformed"; +# (10-13) When a record lacks the record seprator, we sneakily try +# to fix it. How does that work when the file is read-only? +if (setup_badly_terminated_file(4)) { + my $good = 1; + my $warn; + local $SIG{__WARN__} = sub { $good = 0; ctrlfix($warn = shift); }; + local $^W = 1; + my $o = tie @a, 'Tie::File', $file, mode => O_RDONLY, autochomp => 0 + or die "Couldn't tie $file: $!"; + + print $a[0] eq "Malformed$:" ? "ok $N\n" : "not ok $N\n"; $N++; + print $good ? "ok $N\n" : "not ok $N # $warn\n"; $good = 1; $N++; + print $a[0] eq "Malformed$:" ? "ok $N\n" : "not ok $N\n"; $N++; + print $good ? "ok $N\n" : "not ok $N # $warn\n"; $good = 1; $N++; +} + +sub setup_badly_terminated_file { + my $NTESTS = shift; + open F, "> $file" or die "Couldn't open $file: $!"; + binmode F; + print F $badrec; + close F; + unless (-s $file == length $badrec) { + for (1 .. $NTESTS) { + print "ok $N \# skipped - can't create improperly terminated file\n"; + $N++; + } + return; + } + return 1; +} + + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} END { undef $o; diff --git a/lib/Tie/File/t/09_gen_rs.t b/lib/Tie/File/t/09_gen_rs.t index 78d1a58..37a5bc9 100644 --- a/lib/Tie/File/t/09_gen_rs.t +++ b/lib/Tie/File/t/09_gen_rs.t @@ -2,7 +2,7 @@ my $file = "tf$$.txt"; -print "1..47\n"; +print "1..56\n"; my $N = 1; use Tie::File; @@ -57,22 +57,21 @@ check_contents("short0", "sh1", "short2"); $a[0] = 'sh0'; check_contents("sh0", "sh1", "short2"); -# file with holes +# (35-38) file with holes $a[4] = 'rec4'; check_contents("sh0", "sh1", "short2", "", "rec4"); $a[3] = 'rec3'; check_contents("sh0", "sh1", "short2", "rec3", "rec4"); -# (35-37) zero out file +# (39-40) zero out file @a = (); check_contents(); -# (38-40) insert into the middle of an empty file +# (41-42) insert into the middle of an empty file $a[3] = "rec3"; check_contents("", "", "", "rec3"); - -# (41-46) 20020326 You thought there would be a bug in STORE where if +# (43-47) 20020326 You thought there would be a bug in STORE where if # a cached record was false, STORE wouldn't see it at all. Yup, there is, # and adding the appropriate defined() test fixes the problem. undef $o; untie @a; 1 while unlink $file; @@ -87,6 +86,63 @@ $a[2] = "oops"; check_contents("", "", "oops"); $a[1] = "bah"; check_contents("", "bah", "oops"); +undef $o; untie @a; + +# (48-56) 20020331 Make sure we correctly handle the case where the final +# record of the file is not properly terminated, Through version 0.90, +# we would mangle the file. +my $badrec = "Malformed"; +$: = $RECSEP = Tie::File::_default_recsep(); +# (48-50) +if (setup_badly_terminated_file(3)) { + $o = tie @a, 'Tie::File', $file, + recsep => $RECSEP, autochomp => 0, autodefer => 0 + or die "Couldn't tie file: $!"; + my $z = $a[0]; + print $z eq "$badrec$:" ? "ok $N\n" : + "not ok $N \# got $z, expected $badrec\n"; + $N++; + push @a, "next"; + check_contents($badrec, "next"); +} +# (51-52) +if (setup_badly_terminated_file(2)) { + $o = tie @a, 'Tie::File', $file, + recsep => $RECSEP, autochomp => 0, autodefer => 0 + or die "Couldn't tie file: $!"; + splice @a, 1, 0, "x", "y"; + check_contents($badrec, "x", "y"); +} +# (53-56) +if (setup_badly_terminated_file(4)) { + $o = tie @a, 'Tie::File', $file, + recsep => $RECSEP, autochomp => 0, autodefer => 0 + or die "Couldn't tie file: $!"; + my @r = splice @a, 0, 1, "x", "y"; + my $n = @r; + print $n == 1 ? "ok $N\n" : "not ok $N \# expected 1 elt, got $n\n"; + $N++; + print $r[0] eq "$badrec$:" ? "ok $N\n" + : "not ok $N \# expected <$badrec>, got <$r[0]>\n"; + $N++; + check_contents("x", "y"); +} + +sub setup_badly_terminated_file { + my $NTESTS = shift; + open F, "> $file" or die "Couldn't open $file: $!"; + binmode F; + print F $badrec; + close F; + unless (-s $file == length $badrec) { + for (1 .. $NTESTS) { + print "ok $N \# skipped - can't create improperly terminated file\n"; + $N++; + } + return; + } + return 1; +} use POSIX 'SEEK_SET'; diff --git a/lib/Tie/File/t/16_handle.t b/lib/Tie/File/t/16_handle.t index 6d212a1..ed15384 100644 --- a/lib/Tie/File/t/16_handle.t +++ b/lib/Tie/File/t/16_handle.t @@ -56,7 +56,7 @@ check_contents("long0", "longer1", "long2"); $a[0] = 'longer0'; check_contents("longer0", "longer1", "long2"); -# 25-34 shortening alterations, including truncation +# 25-38 shortening alterations, including truncation $a[0] = 'short0'; check_contents("short0", "longer1", "long2"); $a[1] = 'short1'; @@ -78,8 +78,12 @@ close F; undef $o; untie @a; -# Does it correctly detect a non-seekable handle? -{ if ($^O =~ /^(MSWin32|dos)$/) { +if ($] < 5.006) { + print "ok 39 # skipped - 5.005_03 panics after this test\n"; + exit 0; +} +# (39) Does it correctly detect a non-seekable handle? +{ if ($^O =~ /^(MSWin32|dos|BeOS)$/) { print "ok $N # skipped ($^O has broken pipe semantics)\n"; last; } diff --git a/lib/Tie/File/t/24_cache_loop.t b/lib/Tie/File/t/24_cache_loop.t new file mode 100644 index 0000000..537856d --- /dev/null +++ b/lib/Tie/File/t/24_cache_loop.t @@ -0,0 +1,54 @@ +#!/usr/bin/perl +# +# Tests for various caching errors +# + +use Config; +unless ($Config{d_alarm}) { + print "1..0\n"; exit; +} + +my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); +my $data = join $:, "record0" .. "record9", ""; +my $V = $ENV{INTEGRITY}; # Verbose integrity checking? + +print "1..3\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; + +# Limit cache size to 30 bytes +my $MAX = 30; +# -- that's enough space for 3 records, but not 4, on both \n and \r\n systems +my $o = tie @a, 'Tie::File', $file, memory => $MAX, autodefer => 1; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (3) In 0.50 this goes into an infinite loop. Explanation: +# +# Suppose you overfill the defer buffer by so much that the memory +# limit is also exceeded. You'll go into _splice to prepare to +# write out the defer buffer, and _splice will call _fetch, which +# will then try to flush the read cache---but the read cache is +# already empty, so you're stuck in an infinite loop. +# +# Five seconds should be plenty of time for it to complete if it works. +alarm 5 unless $^P; +@a = "record0" .. "record9"; +print "ok 3\n"; + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + + + diff --git a/lib/Tie/File/t/25_gen_nocache.t b/lib/Tie/File/t/25_gen_nocache.t new file mode 100644 index 0000000..bafecf0 --- /dev/null +++ b/lib/Tie/File/t/25_gen_nocache.t @@ -0,0 +1,138 @@ +#!/usr/bin/perl +# +# Regular read-write tests with caching disabled +# (Same as 01_gen.t) +# + +my $file = "tf$$.txt"; + +print "1..68\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +my $o = tie @a, 'Tie::File', $file, autochomp => 0, autodefer => 0, memory => 0; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +$: = $o->{recsep}; + +# 3-5 create +$a[0] = 'rec0'; +check_contents("rec0"); + +# 6-11 append +$a[1] = 'rec1'; +check_contents("rec0", "rec1"); +$a[2] = 'rec2'; +check_contents("rec0", "rec1", "rec2"); + +# 12-20 same-length alterations +$a[0] = 'new0'; +check_contents("new0", "rec1", "rec2"); +$a[1] = 'new1'; +check_contents("new0", "new1", "rec2"); +$a[2] = 'new2'; +check_contents("new0", "new1", "new2"); + +# 21-35 lengthening alterations +$a[0] = 'long0'; +check_contents("long0", "new1", "new2"); +$a[1] = 'long1'; +check_contents("long0", "long1", "new2"); +$a[2] = 'long2'; +check_contents("long0", "long1", "long2"); +$a[1] = 'longer1'; +check_contents("long0", "longer1", "long2"); +$a[0] = 'longer0'; +check_contents("longer0", "longer1", "long2"); + +# 36-50 shortening alterations, including truncation +$a[0] = 'short0'; +check_contents("short0", "longer1", "long2"); +$a[1] = 'short1'; +check_contents("short0", "short1", "long2"); +$a[2] = 'short2'; +check_contents("short0", "short1", "short2"); +$a[1] = 'sh1'; +check_contents("short0", "sh1", "short2"); +$a[0] = 'sh0'; +check_contents("sh0", "sh1", "short2"); + +# (51-56) file with holes +$a[4] = 'rec4'; +check_contents("sh0", "sh1", "short2", "", "rec4"); +$a[3] = 'rec3'; +check_contents("sh0", "sh1", "short2", "rec3", "rec4"); + +# (57-59) zero out file +@a = (); +check_contents(); + +# (60-62) insert into the middle of an empty file +$a[3] = "rec3"; +check_contents("", "", "", "rec3"); + +# (63-68) 20020326 You thought there would be a bug in STORE where if +# a cached record was false, STORE wouldn't see it at all. But you +# forgot that records always come back from the cache with the record +# separator attached, so they are unlikely to be false. The only +# really weird case is when the cached record is empty and the record +# separator is "0". Test that in 09_gen_rs.t. +$a[1] = "0"; +check_contents("", "0", "", "rec3"); +$a[1] = "whoops"; +check_contents("", "whoops", "", "rec3"); + + +use POSIX 'SEEK_SET'; +sub check_contents { + my @c = @_; + my $x = join $:, @c, ''; + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; +# my $open = open FH, "< $file"; + my $a; + { local $/; $a = } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + ctrlfix($a, $x); + print "not ok $N\n# expected <$x>, got <$a>\n"; + } + $N++; + + # now check FETCH: + my $good = 1; + my $msg; + for (0.. $#c) { + my $aa = $a[$_]; + unless ($aa eq "$c[$_]$:") { + $msg = "expected <$c[$_]$:>, got <$aa>"; + ctrlfix($msg); + $good = 0; + } + } + print $good ? "ok $N\n" : "not ok $N # $msg\n"; + $N++; + + print $o->_check_integrity($file, $ENV{INTEGRITY}) + ? "ok $N\n" : "not ok $N\n"; + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/26_twrite.t b/lib/Tie/File/t/26_twrite.t new file mode 100644 index 0000000..e3d03a0 --- /dev/null +++ b/lib/Tie/File/t/26_twrite.t @@ -0,0 +1,358 @@ +#!/usr/bin/perl +# +# Unit tests of _twrite function +# +# _twrite($self, $data, $pos, $len) +# +# 't' here is for 'tail'. This writes $data at absolute position $pos +# in the file, overwriting exactly $len of the bytes at that position. +# Everything else is moved down or up, dependong on whether +# length($data) > $len or length($data) < $len. +# $len == 0 is a pure insert; $len == length($data) is a simple overwrite. +# + +my $file = "tf$$.txt"; + +print "1..181\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +$: = Tie::File::_default_recsep(); + +# (2) Peter Scott sent this one. It fails in 0.51 and works in 0.90 +# <4.3.2.7.2.20020331102819.00b913d0@shell2.webquarry.com> +# +# The problem was premature termination in the inner loop +# because you had $more_data scoped *inside* the block instead of outside. +# 20020331 +open F, "> $file" or die "Couldn't open $file: $!"; +binmode F; +for (1..100) { + print F "$_ ", 'a'x150, $: ; +} +close F; +# The file is now 15292 characters long on Unix, 15392 on Win32 +die -s $file unless -s $file == 15292 + 100 * length($:); + +tie my @lines, 'Tie::File', $file or die $!; +push @lines, "1001 ".('a' x 100); +splice @lines, 0, 1; +untie @lines; + +my $s = -s $file; +my $x = 15292 - 152 + 105 + 100*length($:); +print $s == $x + ? "ok $N\n" : "not ok $N # expected $x, got $s\n"; +$N++; + +my @subtests = qw(x x> 0); + +# (3-73) These were generated by 'gentests.pl' to cover all possible cases +# (I hope) +# Legend: +# x: data is entirely contained within one block +# x>: data runs from the middle to the end of the block +# : data occupies precisely one block +# x>: data runs from the middle of one block to the end of the next +# : data occupies two blocks exactly +# : data occupies three blocks exactly +# 0: data is null +# +# For each possible alignment of the old and new data, we investigate +# up to three situations: old data is shorter, old and new data are the +# same length, and new data is shorter. +# +# try($pos, $old, $new) means to run a test where the data starts at +# position $pos, the old data has length $old, +# and the new data has length $new. +try( 9659, 6635, 6691); # old=x , new=x ; old < new +try( 8605, 2394, 2394); # old=x , new=x ; old = new +try( 9768, 1361, 664); # old=x , new=x ; old > new +try( 9955, 6429, 6429); # old=x> , new=x ; old = new +try(10550, 5834, 4123); # old=x> , new=x ; old > new +try(14580, 6158, 851); # old=x> new +try(13442, 11134, 1572); # old=x> , new=x ; old > new +try( 8394, 0, 5742); # old=0 , new=x ; old < new +try( 8192, 2819, 6738); # old= new +try( 8192, 8192, 8192); # old= , new= , new= new +try( 8192, 10575, 6644); # old= new +try( 8192, 16384, 5616); # old= , new= new +try( 8192, 24576, 6253); # old=, new= new +try( 8192, 0, 6870); # old=0 , new= ; old < new +try( 9965, 6419, 6419); # old=x> , new=x> ; old = new +try(16059, 6102, 325); # old=x> ; old > new +try( 9503, 15073, 6881); # old=x> , new=x> ; old > new +try( 9759, 0, 6625); # old=0 , new=x> ; old < new +try( 8525, 2081, 8534); # old=x , new=x> , new=x> new +try(14739, 9837, 9837); # old=x> , new=x> , new=x> new +try(12602, 0, 8354); # old=0 , new=x> ; old < new +try( 8192, 8192, 8192); # old= , new= ; old = new +try( 8192, 14817, 8192); # old= ; old > new +try( 8192, 16384, 8192); # old= , new= ; old > new +try( 8192, 24576, 8192); # old=, new= ; old > new +try( 8192, 0, 8192); # old=0 , new= ; old < new +try( 8192, 6532, 10882); # old= , new= new +try( 8192, 16384, 10781); # old= , new= new +try( 8192, 24576, 9284); # old=, new= new +try( 8192, 0, 12488); # old=0 , new= ; old < new +try(13500, 2884, 11076); # old=x> , new=x> ; old < new +try(14069, 4334, 10507); # old=x> ; old < new +try(14761, 9815, 9815); # old=x> , new=x> ; old = new +try(10469, 0, 14107); # old=0 , new=x> ; old < new +try( 8192, 4181, 16384); # old= ; old < new +try( 8192, 8192, 16384); # old= , new= ; old < new +try( 8192, 12087, 16384); # old= ; old < new +try( 8192, 16384, 16384); # old= , new= ; old = new +try( 8192, 24576, 16384); # old=, new= ; old > new +try( 8192, 0, 16384); # old=0 , new= ; old < new +try( 8192, 4968, 24576); # old=; old < new +try( 8192, 8192, 24576); # old= , new=; old < new +try( 8192, 14163, 24576); # old=; old < new +try( 8192, 16384, 24576); # old= , new=; old < new +try( 8192, 24576, 24576); # old=, new=; old = new +try( 8192, 0, 24576); # old=0 , new=; old < new +try( 8771, 776, 0); # old=x , new=0 ; old > new +try( 8192, 2813, 0); # old= new +try(13945, 2439, 0); # old=x> , new=0 ; old > new +try(14493, 6090, 0); # old=x> new +try( 8192, 8192, 0); # old= , new=0 ; old > new +try( 8192, 10030, 0); # old= new +try(14983, 9593, 0); # old=x> , new=0 ; old > new +try( 8192, 16384, 0); # old= , new=0 ; old > new +try( 8192, 24576, 0); # old=, new=0 ; old > new +try(10489, 0, 0); # old=0 , new=0 ; old = new + +# (74-114) +# These tests all take place at the start of the file +try( 0, 771, 1593); # old= new +try( 0, 8192, 8192); # old= , new= , new= new +try( 0, 11891, 1917); # old= new +try( 0, 16384, 5155); # old= , new= new +try( 0, 24576, 2953); # old=, new= new +try( 0, 0, 1317); # old=0 , new= ; old < new +try( 0, 8192, 8192); # old= , new= ; old = new +try( 0, 11083, 8192); # old= ; old > new +try( 0, 16384, 8192); # old= , new= ; old > new +try( 0, 24576, 8192); # old=, new= ; old > new +try( 0, 0, 8192); # old=0 , new= ; old < new +try( 0, 6265, 9991); # old= , new= new +try( 0, 16384, 13258); # old= , new= new +try( 0, 24576, 14367); # old=, new= new +try( 0, 0, 10881); # old=0 , new= ; old < new +try( 0, 8192, 16384); # old= , new= ; old < new +try( 0, 15082, 16384); # old= ; old < new +try( 0, 16384, 16384); # old= , new= ; old = new +try( 0, 24576, 16384); # old=, new= ; old > new +try( 0, 0, 16384); # old=0 , new= ; old < new +try( 0, 2421, 24576); # old=; old < new +try( 0, 8192, 24576); # old= , new=; old < new +try( 0, 11655, 24576); # old=; old < new +try( 0, 16384, 24576); # old= , new=; old < new +try( 0, 24576, 24576); # old=, new=; old = new +try( 0, 0, 24576); # old=0 , new=; old < new +try( 0, 6530, 0); # old= new +try( 0, 8192, 0); # old= , new=0 ; old > new +try( 0, 14707, 0); # old= new +try( 0, 16384, 0); # old= , new=0 ; old > new +try( 0, 24576, 0); # old=, new=0 ; old > new +try( 0, 0, 0); # old=0 , new=0 ; old = new + +# (115-141) +# These tests all take place at the end of the file +$FLEN = 40960; # Force the file to be exactly 40960 bytes long +try(32768, 8192, 8192); # old= , new= , new= new +try(24576, 16384, 1917); # old= , new= new +try(16384, 24576, 3818); # old=, new= new +try(40960, 0, 2779); # old=0 , new= , new= ; old = new +try(24576, 16384, 8192); # old= , new= ; old > new +try(16384, 24576, 8192); # old=, new= ; old > new +try(40960, 0, 8192); # old=0 , new= ; old < new +try(32768, 8192, 10724); # old= , new= , new= new +try(16384, 24576, 15030); # old=, new= new +try(40960, 0, 11752); # old=0 , new= , new= ; old < new +try(24576, 16384, 16384); # old= , new= ; old = new +try(16384, 24576, 16384); # old=, new= ; old > new +try(40960, 0, 16384); # old=0 , new= ; old < new +try(32768, 8192, 24576); # old= , new=; old < new +try(24576, 16384, 24576); # old= , new=; old < new +try(16384, 24576, 24576); # old=, new=; old = new +try(40960, 0, 24576); # old=0 , new=; old < new +try(35973, 4987, 0); # old=x> , new=0 ; old > new +try(32768, 8192, 0); # old= , new=0 ; old > new +try(29932, 11028, 0); # old=x> , new=0 ; old > new +try(24576, 16384, 0); # old= , new=0 ; old > new +try(16384, 24576, 0); # old=, new=0 ; old > new +try(40960, 0, 0); # old=0 , new=0 ; old = new + +# (142-181) +# These tests all take place at the end of the file +$FLEN = 42000; # Force the file to be exactly 42000 bytes long +try(41275, 725, 4059); # old=x , new=x ; old < new +try(41683, 317, 317); # old=x , new=x ; old = new +try(41225, 775, 405); # old=x , new=x ; old > new +try(35709, 6291, 284); # old=x> new +try(42000, 0, 2434); # old=0 , new=x ; old < new +try(40960, 1040, 1608); # old= new +try(32768, 9232, 5604); # old= new +try(42000, 0, 6637); # old=0 , new= ; old < new +try(39994, 2006, 966); # old=x> ; old > new +try(42000, 0, 7152); # old=0 , new=x> ; old < new +try(41613, 387, 10601); # old=x , new=x> new +try(42000, 0, 9189); # old=0 , new=x> ; old < new +try(32768, 9232, 8192); # old= ; old > new +try(42000, 0, 8192); # old=0 , new= ; old < new +try(40960, 1040, 11778); # old= new +try(42000, 0, 8578); # old=0 , new= ; old < new +try(39618, 2382, 9534); # old=x> ; old < new +try(42000, 0, 15344); # old=0 , new=x> ; old < new +try(40960, 1040, 16384); # old= ; old < new +try(32768, 9232, 16384); # old= ; old < new +try(42000, 0, 16384); # old=0 , new= ; old < new +try(40960, 1040, 24576); # old=; old < new +try(32768, 9232, 24576); # old=; old < new +try(42000, 0, 24576); # old=0 , new=; old < new +try(41500, 500, 0); # old=x , new=0 ; old > new +try(40960, 1040, 0); # old= new +try(35272, 6728, 0); # old=x> new +try(32768, 9232, 0); # old= new +try(42000, 0, 0); # old=0 , new=0 ; old = new + +sub try { + my ($pos, $len, $newlen) = @_; + open F, "> $file" or die "Couldn't open file $file: $!"; + binmode F; + + # The record has exactly 17 characters. This will help ensure that + # even if _twrite screws up, the data doesn't coincidentally + # look good because the remainder accidentally lines up. + my $d = length($:) == 1 ? "0123456789abcdef$:" : "0123456789abcde$:"; + my $recs = defined($FLEN) ? + int($FLEN/length($d))+1 : # enough to make up at least $FLEN + int(8192*5/length($d))+1; # at least 5 blocks' worth + my $oldfile = $d x $recs; + my $flen = defined($FLEN) ? $FLEN : $recs * 17; + substr($oldfile, $FLEN) = "" if defined $FLEN; # truncate + print F $oldfile; + close F; + + die "wrong length!" unless -s $file == $flen; + + my $newdata = "-" x $newlen; + my $expected = $oldfile; + substr($expected, $pos, $len) = $newdata; + + my $o = tie my @lines, 'Tie::File', $file or die $!; + $o->_twrite($newdata, $pos, $len); + undef $o; untie @lines; + + open F, "< $file" or die "Couldn't open file $file: $!"; + my $actual; + { local $/; + $actual = ; + } + close F; + + my ($alen, $xlen) = (length $actual, length $expected); + unless ($alen == $xlen) { + print "# try(@_) expected file length $xlen, actual $alen!\n"; + } + print $actual eq $expected ? "ok $N\n" : "not ok $N\n"; + $N++; +} + + + +use POSIX 'SEEK_SET'; +sub check_contents { + my @c = @_; + my $x = join $:, @c, ''; + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; +# my $open = open FH, "< $file"; + my $a; + { local $/; $a = } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + ctrlfix($a, $x); + print "not ok $N\n# expected <$x>, got <$a>\n"; + } + $N++; + + # now check FETCH: + my $good = 1; + my $msg; + for (0.. $#c) { + my $aa = $a[$_]; + unless ($aa eq "$c[$_]$:") { + $msg = "expected <$c[$_]$:>, got <$aa>"; + ctrlfix($msg); + $good = 0; + } + } + print $good ? "ok $N\n" : "not ok $N # $msg\n"; + $N++; + + print $o->_check_integrity($file, $ENV{INTEGRITY}) + ? "ok $N\n" : "not ok $N\n"; + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} +