From: Abhijit Menon-Sen Date: Fri, 15 Mar 2002 17:37:52 +0000 (+0000) Subject: Upgrade to Tie::File 0.19. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=b3fe5a4cdcf4bcad47de92d4dfaa5a484780269b;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Tie::File 0.19. p4raw-id: //depot/perl@15245 --- diff --git a/MANIFEST b/MANIFEST index 0969cbc..aa0fce6 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1422,6 +1422,7 @@ lib/Tie/Array/splice.t Test for Tie::Array::SPLICE lib/Tie/Array/std.t Test for Tie::StdArray lib/Tie/Array/stdpush.t Test for Tie::StdArray lib/Tie/File.pm Files as tied arrays. +lib/Tie/File/t/00_version.t Test for Tie::File. lib/Tie/File/t/01_gen.t Test for Tie::File. lib/Tie/File/t/02_fetchsize.t Test for Tie::File. lib/Tie/File/t/03_longfetch.t Test for Tie::File. @@ -1439,6 +1440,10 @@ lib/Tie/File/t/14_lock.t Test for Tie::File. lib/Tie/File/t/15_pushpop.t Test for Tie::File. lib/Tie/File/t/16_handle.t Test for Tie::File. lib/Tie/File/t/17_misc_meth.t Test for Tie::File. +lib/Tie/File/t/18_rs_fixrec.t Test for Tie::File. +lib/Tie/File/t/19_cache.t Test for Tie::File. +lib/Tie/File/t/20_cache_full.t Test for Tie::File. +lib/Tie/File/t/21_win32.t Test for Tie::File. lib/Tie/Handle.pm Base class for tied handles lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle lib/Tie/Hash.pm Base class for tied hashes diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm index 15ccaa9..f0a864d 100644 --- a/lib/Tie/File.pm +++ b/lib/Tie/File.pm @@ -5,7 +5,7 @@ use POSIX 'SEEK_SET'; use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX'; require 5.005; -$VERSION = "0.17"; +$VERSION = "0.19"; # Idea: The object will always contain an array of byte offsets # this will be filled in as is necessary and convenient. @@ -19,7 +19,10 @@ $VERSION = "0.17"; # Record numbers start at ZERO. -my $DEFAULT_CACHE_SIZE = 1<<21; # 2 megabytes +my $DEFAULT_MEMORY_SIZE = 1<<21; # 2 megabytes + +my %good_opt = map {$_ => 1, "-$_" => 1} + qw(memory dw_size mode recsep discipline); sub TIEARRAY { if (@_ % 2 != 0) { @@ -29,13 +32,28 @@ sub TIEARRAY { # transform '-foo' keys into 'foo' keys for my $key (keys %opts) { + unless ($good_opt{$key}) { + croak("$pack: Unrecognized option '$key'\n"); + } my $okey = $key; if ($key =~ s/^-+//) { $opts{$key} = delete $opts{$okey}; } } - $opts{cachesize} ||= $DEFAULT_CACHE_SIZE; + unless (defined $opts{memory}) { + # default is the larger of the default cache size and the + # deferred-write buffer size (if specified) + $opts{memory} = $DEFAULT_MEMORY_SIZE; + $opts{memory} = $opts{dw_size} + if defined $opts{dw_size} && $opts{dw_size} > $DEFAULT_MEMORY_SIZE; + } + $opts{dw_size} = $opts{memory} unless defined $opts{dw_size}; + if ($opts{dw_size} > $opts{memory}) { + croak("$pack: dw_size may not be larger than total memory allocation\n"); + } + $opts{deferred} = {}; # no records presently deferred + $opts{deferred_s} = 0; # count of total bytes in ->{deferred} # the cache is a hash instead of an array because it is likely to be # sparsely populated @@ -45,7 +63,9 @@ sub TIEARRAY { $opts{offsets} = [0]; $opts{filename} = $file; - $opts{recsep} = $/ unless defined $opts{recsep}; + unless (defined $opts{recsep}) { + $opts{recsep} = _default_recsep(); + } $opts{recseplen} = length($opts{recsep}); if ($opts{recseplen} == 0) { croak "Empty record separator not supported by $pack"; @@ -67,6 +87,12 @@ sub TIEARRAY { binmode $fh; } { my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write + if (defined $opts{discipline} && $] >= 5.006) { + # This avoids a compile-time warning under 5.005 + eval 'binmode($fh, $opts{discipline})'; + croak $@ if $@ =~ /unknown discipline/i; + die if $@; + } $opts{fh} = $fh; bless \%opts => $pack; @@ -89,6 +115,19 @@ sub FETCH { my $fh = $self->{FH}; $self->_seek($n); # we can do this now that offsets is populated my $rec = $self->_read_record; + +# If we happen to have just read the first record, check to see if +# the length of the record matches what 'tell' says. If not, Tie::File +# won't work, and should drop dead. +# +# if ($n == 0 && defined($rec) && tell($self->{fh}) != length($rec)) { +# if (defined $self->{discipline}) { +# croak "I/O discipline $self->{discipline} not supported"; +# } else { +# croak "File encoding not supported"; +# } +# } + $self->_cache_insert($n, $rec) if defined $rec; $rec; } @@ -98,9 +137,7 @@ sub STORE { $self->_fixrecs($rec); - # TODO: what should we do about the cache? Install the new record - # in the cache only if the old version of the same record was - # already there? + return $self->_store_deferred($n, $rec) if $self->{defer}; # We need this to decide whether the new record will fit # It incidentally populates the offsets table @@ -109,8 +146,12 @@ sub STORE { # _check_cache promotes record $n to MRU. Is this correct behavior? if (my $cached = $self->_check_cache($n)) { + my $len_diff = length($rec) - length($cached); $self->{cache}{$n} = $rec; - $self->{cached} += length($rec) - length($cached); + $self->{cached} += $len_diff; + $self->_cache_flush + if $len_diff > 0 + && $self->{deferred_s} + $self->{cached} > $self->{memory}; } if (not defined $oldrec) { @@ -120,6 +161,7 @@ sub STORE { } my $len_diff = length($rec) - length($oldrec); + # length($oldrec) here is not consistent with text mode TODO XXX BUG $self->_twrite($rec, $self->{offsets}[$n], length($oldrec)); # now update the offsets @@ -130,6 +172,21 @@ sub STORE { } } +sub _store_deferred { + my ($self, $n, $rec) = @_; + $self->_uncache($n); + my $old_deferred = $self->{deferred}{$n}; + $self->{deferred}{$n} = $rec; + $self->{deferred_s} += length($rec); + $self->{deferred_s} -= length($old_deferred) if defined $old_deferred; + if ($self->{deferred_s} > $self->{dw_size}) { + $self->flush; + $self->defer; # flush clears the 'defer' flag + } elsif ($self->{deferred_s} + $self->{cached} > $self->{memory}) { + $self->_cache_flush; + } +} + sub FETCHSIZE { my $self = shift; my $n = $#{$self->{offsets}}; @@ -154,6 +211,7 @@ sub STORESIZE { $self->_seek($len); $self->_chop_file; $#{$self->{offsets}} = $len; +# $self->{offsets}[0] = 0; # in case we just chopped this my @cached = grep $_ >= $len, keys %{$self->{cache}}; $self->_uncache(@cached); } @@ -222,6 +280,16 @@ sub EXISTS { } sub SPLICE { + my $self = shift; + $self->_flush if $self->{defer}; + $self->_splice(@_); +} + +sub DESTROY { + $self->flush if $self->{defer}; +} + +sub _splice { my ($self, $pos, $nrecs, @data) = @_; my @result; @@ -308,11 +376,19 @@ sub SPLICE { # moved records - records past the site of the change # need to be renumbered # Maybe merge this with the previous block? - for (keys %{$self->{cache}}) { - next unless $_ >= $pos + $nrecs; - $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_}; + { + my %adjusted; + for (keys %{$self->{cache}}) { + next unless $_ >= $pos + $nrecs; + $adjusted{$_-$nrecs+@data} = delete $self->{cache}{$_}; + } + @{$self->{cache}}{keys %adjusted} = values %adjusted; +# for (keys %{$self->{cache}}) { +# next unless $_ >= $pos + $nrecs; +# $self->{cache}{$_-$nrecs+@data} = delete $self->{cache}{$_}; +# } } - + # fix the LRU queue my(@new, @changed); for (@{$self->{lru}}) { @@ -326,6 +402,11 @@ sub SPLICE { } @{$self->{lru}} = (@new, @changed); + # Now there might be too much data in the cache, if we spliced out + # some short records and spliced in some long ones. If so, flush + # the cache. + $self->_cache_flush; + # Yes, the return value of 'splice' *is* actually this complicated wantarray ? @result : @result ? $result[-1] : undef; } @@ -460,13 +541,13 @@ sub _cache_insert { my ($self, $n, $rec) = @_; # Do not cache records that are too big to fit in the cache. - return unless length $rec <= $self->{cachesize}; + return unless length $rec <= $self->{memory}; $self->{cache}{$n} = $rec; $self->{cached} += length $rec; push @{$self->{lru}}, $n; # most-recently-used is at the END - $self->_cache_flush if $self->{cached} > $self->{cachesize}; + $self->_cache_flush if $self->{cached} > $self->{memory}; } sub _uncache { @@ -492,10 +573,10 @@ sub _check_cache { sub _cache_flush { my ($self) = @_; - while ($self->{cached} > $self->{cachesize}) { + while ($self->{cached} + $self->{deferred_s} > $self->{memory}) { my $lru = shift @{$self->{lru}}; - $self->{cached} -= length $lru; - delete $self->{cache}{$lru}; + my $rec = delete $self->{cache}{$lru}; + $self->{cached} -= length $rec; } } @@ -551,18 +632,76 @@ sub flock { flock $fh, $op; } +# Defer writes +sub defer { + my $self = shift; + $self->{defer} = 1; +} + +# Flush deferred writes +# +# This could be better optimized to write the file in one pass, instead +# of one pass per block of records. But that will require modifications +# to _twrite, so I should have a good _twite test suite first. +sub flush { + my $self = shift; + + $self->_flush; + $self->{defer} = 0; +} + +sub _flush { + my $self = shift; + my @writable = sort {$a<=>$b} (keys %{$self->{deferred}}); + + while (@writable) { + # gather all consecutive records from the front of @writable + my $first_rec = shift @writable; + my $last_rec = $first_rec+1; + ++$last_rec, shift @writable while @writable && $last_rec == $writable[0]; + --$last_rec; + $self->_fill_offsets_to($last_rec); + $self->_extend_file_to($last_rec); + $self->_splice($first_rec, $last_rec-$first_rec+1, + @{$self->{deferred}}{$first_rec .. $last_rec}); + } + + $self->discard; # clear out defered-write-cache +} + +# Discard deferred writes +sub discard { + my $self = shift; + undef $self->{deferred}; + $self->{deferred_s} = 0; + $self->{defer} = 0; +} + +# Not yet implemented +sub autodefer { } + +sub _default_recsep { + my $recsep = $/; + if ($^O eq 'MSWin32') { + # Windows users expect files to be terminated with \r\n + # But $/ is set to \n instead + # Note that this also transforms \n\n into \r\n\r\n. + # That is a feature. + $recsep =~ s/\n/\r\n/g; + } + $recsep; +} + # Given a file, make sure the cache is consistent with the # file contents sub _check_integrity { my ($self, $file, $warn) = @_; my $good = 1; - if (not defined $self->{offsets}[0]) { $warn && print STDERR "# offset 0 is missing!\n"; $good = 0; } elsif ($self->{offsets}[0] != 0) { - $warn && print STDERR "# offset 0 is missing!\n"; $warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n"; $good = 0; } @@ -589,15 +728,15 @@ sub _check_integrity { } } - my $cachesize = 0; + my $memory = 0; while (my ($n, $r) = each %{$self->{cache}}) { - $cachesize += length($r); + $memory += length($r); next if $n+1 <= $.; # checked this already $warn && print STDERR "# spurious caching of record $n\n"; $good = 0; } - if ($cachesize != $self->{cached}) { - $warn && print STDERR "# cache size is $self->{cached}, should be $cachesize\n"; + if ($memory != $self->{cached}) { + $warn && print STDERR "# cache size is $self->{cached}, should be $memory\n"; $good = 0; } @@ -605,7 +744,7 @@ sub _check_integrity { for (@{$self->{lru}}) { $seen{$_}++; if (not exists $self->{cache}{$_}) { - print "# $_ is mentioned in the LRU queue, but not in the cache\n"; + $warn && print "# $_ is mentioned in the LRU queue, but not in the cache\n"; $good = 0; } } @@ -613,7 +752,7 @@ sub _check_integrity { if (@duplicate) { my $records = @duplicate == 1 ? 'Record' : 'Records'; my $appear = @duplicate == 1 ? 'appears' : 'appear'; - print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n"; + $warn && print "# $records @duplicate $appear multiple times in LRU queue: @{$self->{lru}}\n"; $good = 0; } for (keys %{$self->{cache}}) { @@ -634,7 +773,7 @@ Tie::File - Access the lines of a disk file via a Perl array =head1 SYNOPSIS - # This file documents Tie::File version 0.17 + # This file documents Tie::File version 0.19 tie @array, 'Tie::File', filename or die ...; @@ -666,17 +805,21 @@ gigantic files. Changes to the array are reflected in the file immediately. +Lazy people may now stop reading the manual. + =head2 C What is a 'record'? By default, the meaning is the same as for the C...E> operator: It's a string terminated by C<$/>, which is -probably C<"\n">. You may change the definition of "record" by -supplying the C option in the C call: - +probably C<"\n">. (Minor exception: on dos and Win32 systems, a +'record' is a string terminated by C<"\r\n">.) You may change the +definition of "record" by supplying the C option in the C +call: tie @array, 'Tie::File', $file, recsep => 'es'; -This says that records are delimited by the string C. If the file contained the following data: +This says that records are delimited by the string C. If the file +contained the following data: Curse these pesky flies!\n @@ -687,9 +830,6 @@ then the C<@array> would appear to have four elements: "ky flies" "!\n" -Windows users will probably want to use C "\r\n"> to get -files terminated with the usual CRLF sequence. - An undefined value is not permitted as a record separator. Perl's special "paragraph mode" semantics (E la C<$/ = "">) are not emulated. @@ -741,28 +881,34 @@ For example: Opening the data file in write-only or append mode is not supported. -=head2 C +=head2 C + +This is an (inexact) upper limit on the amount of memory that +C will consume at any time while managing the file. +At present, this is used as a bound on the size of the read cache. Records read in from the file are cached, to avoid having to re-read them repeatedly. If you read the same record twice, the first time it will be stored in memory, and the second time it will be fetched from -memory. +the I. The amount of data in the read cache will not +exceed the value you specified for C. If C wants +to cache a new record, but the read cache is full, it will make room +by expiring the least-recently visited records from the read cache. -The cache has a bounded size; when it exceeds this size, the -least-recently visited records will be purged from the cache. The -default size is 2Mib. You can adjust the amount of space used for the -cache by supplying the C option. The argument is the desired cache size, in bytes. +The default memory limit is 2Mib. You can adjust the maximum read +cache size by supplying the C option. The argument is the +desired cache size, in bytes. # I have a lot of memory, so use a large cache to speed up access - tie @array, 'Tie::File', $file, cachesize => 20_000_000; + tie @array, 'Tie::File', $file, memory => 20_000_000; -Setting the cache size to 0 will inhibit caching; records will be +Setting the memory limit to 0 will inhibit caching; records will be fetched from disk every time you examine them. =head2 Option Format C<-mode> is a synonym for C. C<-recsep> is a synonym for -C. C<-cachesize> is a synonym for C. You get the +C. C<-memory> is a synonym for C. You get the idea. =head1 Public Methods @@ -772,7 +918,9 @@ The C call returns an object, say C<$o>. You may call $rec = $o->FETCH($n); $o->STORE($n, $rec); -to fetch or store the record at line C<$n>, respectively. The only other public method in this package is: +to fetch or store the record at line C<$n>, respectively; similarly +the other tied array methods. (See L for details.) You may +also call the following methods on this object: =head2 C @@ -817,45 +965,73 @@ C, you may use: Handles that were opened write-only won't work. Handles that were opened read-only will work as long as you don't try to write to them. Handles must be attached to seekable sources of data---that means no -pipes or sockets. If you try to supply a non-seekable handle, the -C call will try to abort your program. This feature is not yet -supported under VMS. +pipes or sockets. If you supply a non-seekable handle, the C +call will try to abort your program. =head1 CAVEATS (That's Latin for 'warnings'.) -=head2 Efficiency Note +=over 4 + +=item * + +This is BETA RELEASE SOFTWARE. It may have bugs. See the discussion +below about the (lack of any) warranty. + +=item * Every effort was made to make this module efficient. Nevertheless, changing the size of a record in the middle of a large file will -always be slow, because everything after the new record must be moved. +always be fairly slow, because everything after the new record must be +moved. -In particular, note that: +In particular, note that the following innocent-looking loop has very +bad behavior: - # million-line file - for (@file_array) { - $_ .= 'x'; - } + # million-line file + for (@file_array) { + $_ .= 'x'; + } -is likely to be very slow, because the first iteration must relocate -lines 1 through 999,999; the second iteration must relocate lines 2 -through 999,999, and so on. The relocation is done using block -writes, however, so it's not as slow as it might be. +This is likely to be very slow, because the first iteration must +relocate lines 1 through 999,999; the second iteration must relocate +lines 2 through 999,999, and so on. The relocation is done using +block writes, however, so it's not as slow as it might be. A soon-to-be-released version of this module will provide a mechanism for getting better performance in such cases, by deferring the writing -until it can be done all at once. +until it can be done all at once. This deferred writing feature might +be enabled automagically if C guesses that you are about to write many consecutive records. To disable this feature, use + + (tied @o)->autodefer(0); + +(At present, this call does nothing.) + +=item * + +The behavior of tied arrays is not precisely the same as for regular +arrays. For example: -=head2 Efficiency Note 2 + undef $a[10]; print "How unusual!\n" if $a[10]; + +C-ing a C array element just blanks out the +corresponding record in the file. When you read it back again, you'll +see the record separator (typically, $a[10] will appear to contain +"\n") so the supposedly-C'ed value will be true. -Not every effort was made to make this module as efficient as +There are other minor differences, but in general, the correspondence +is extremely close. + +=item * + +Not quite every effort was made to make this module as efficient as possible. C should use binary search instead of linear search. The cache's LRU queue should be a heap instead of a list. These defects are probably minor; in any event, they will be fixed in a later version of the module. -=head2 Efficiency Note 3 +=item * The author has supposed that since this module is concerned with file I/O, almost all normal use of it will be heavily I/O bound, and that @@ -865,22 +1041,91 @@ suggests, for example, that an LRU read-cache is a good tradeoff, even if it requires substantial adjustment following a C operation. -=head1 CAVEATS +=back -(That's Latin for 'warnings'.) +=head1 WHAT ABOUT C? -The behavior of tied arrays is not precisely the same as for regular -arrays. For example: +C's C feature does something similar to +C, but there are a number of reasons that you might prefer +C. C is a great piece of software, but the +C part is less great than the rest of it. - undef $a[10]; print "How unusual!\n" if $a[10]; +=over 4 -C-ing a C array element just blanks out the -corresponding record in the file. When you read it back again, you'll -see the record separator (typically, $a[10] will appear to contain -"\n") so the supposedly-C'ed value will be true. +=item * -There are other minor differences, but in general, the correspondence -is extremely close. +C reads your entire file into memory, modifies it in memory, +and the writes out the entire file again when you untie the file. +This is completely impractical for large files. + +C does not do any of those things. It doesn't try to read +the entire file into memory; instead it uses a lazy approach and +caches recently-used records. The cache size is strictly bounded by +the C option. DB_File's C<-E{cachesize}> doesn't prevent +your process from blowing up when reading a big file. + +=item * + +C has an extremely poor writing strategy. If you have a +ten-megabyte file and tie it with C, and then use + + $a[0] =~ s/PERL/Perl/; + +C will then read the entire ten-megabyte file into memory, do +the change, and write the entire file back to disk, reading ten +megabytes and writing ten megabytes. C will read and write +only the first record. + +If you have a million-record file and tie it with C, and then +use + + $a[999998] =~ s/Larry/Larry Wall/; + +C will read the entire million-record file into memory, do +the change, and write the entire file back to disk. C will +only rewrite records 999998 and 999999. During the writing process, +it will never have more than a few kilobytes of data in memory at any +time, even if the two records are very large. + +=item * + +Since changes to C files only appear when you do C, it +can be inconvenient to arrange for concurrent access to the same file +by two or more processes. Each process needs to call C<$db-Esync> +after every write. When you change a C array, the changes +are reflected in the file immediately; no explicit C<-Esync> call +is required. (The forthcoming "deferred writing" mode will allow you +to request that writes be held in memory until explicitly C'ed.) + +=item * + +C is only installed by default if you already have the C +library on your system; C is pure Perl and is installed by +default no matter what. Starting with Perl 5.7.3 you can be +absolutely sure it will be everywhere. You will never have that +surety with C. If you don't have C yet, it requires +a C compiler. You can install C from CPAN in five minutes +with no compiler. + +=item * + +C is written in C, so if you aren't allowed to install +modules on your system, it is useless. C is written in Perl, +so even if you aren't allowed to install modules, you can look into +the source code, see how it works, and copy the subroutines or the +ideas from the subroutines directly into your own Perl program. + +=item * + +Except in very old, unsupported versions, C's free license +requires that you distribute the source code for your entire +application. If you are not able to distribute the source code for +your application, you must negotiate an alternative license from +Sleepycat, possibly for a fee. Tie::File is under the Perl Artistic +license and can be distributed free under the same terms as Perl +itself. + +=back =head1 AUTHOR @@ -894,7 +1139,7 @@ C. =head1 LICENSE -C version 0.17 is copyright (C) 2002 Mark Jason Dominus. +C version 0.19 is copyright (C) 2002 Mark Jason Dominus. This library is free software; you may redistribute it and/or modify it under the same terms as Perl itself. @@ -922,7 +1167,7 @@ For licensing inquiries, contact the author at: =head1 WARRANTY -C version 0.17 comes with ABSOLUTELY NO WARRANTY. +C version 0.19 comes with ABSOLUTELY NO WARRANTY. For details, see the license. =head1 THANKS @@ -933,14 +1178,18 @@ supportive, and competent. (Usually the rule is "choose any one.") Also big thanks to Abhijit Menon-Sen for all of the same things. Special thanks to Craig Berry (for VMS portability help), Randy Kobes -(for Win32 portability help), the rest of the CPAN testers (for -testing). +(for Win32 portability help), Clinton Pierce and Autrijus Tang (for +heroic eleventh-hour Win32 testing above and beyond the call of duty), +and the rest of the CPAN testers (for testing generally). More thanks to: +Edward Avis / Gerrit Haase / +Nikola Knezevic / Nick Ing-Simmons / Tassilo von Parseval / H. Dieter Pearcey / +Slaven Rezic / Peter Somu / Tels @@ -948,14 +1197,11 @@ Tels Test DELETE machinery more carefully. -More tests. (Configuration options, cache flushery. _twrite should -be tested separately, because there are a lot of weird special cases -lurking in there.) +More tests. (C option. _twrite should be tested separately, +because there are a lot of weird special cases lurking in there.) More tests. (Stuff I didn't think of yet.) -Deferred writing. (!!!) - Paragraph mode? More tests. @@ -964,5 +1210,14 @@ Fixed-length mode. Maybe an autolocking mode? +Finish deferred writing. + +Autodeferment. + +Record locking with fcntl()? Then you might support an undo log and +get real transactions. What a coup that would be. + +Leave-blanks mode + =cut diff --git a/lib/Tie/File/t/00_version.t b/lib/Tie/File/t/00_version.t new file mode 100644 index 0000000..565651a --- /dev/null +++ b/lib/Tie/File/t/00_version.t @@ -0,0 +1,19 @@ +#!/usr/bin/perl + +print "1..1\n"; + +use Tie::File; + +if ($Tie::File::VERSION != 0.19) { + print STDERR " +WHOA THERE!! + +You seem to be running version $Tie::File::VERSION of the module against +version 0.19 of the test suite! + +None of the other test results will be reliable. +"; + exit 1; +} + +print "ok 1\n"; diff --git a/lib/Tie/File/t/01_gen.t b/lib/Tie/File/t/01_gen.t index e383b7f..5be638b 100644 --- a/lib/Tie/File/t/01_gen.t +++ b/lib/Tie/File/t/01_gen.t @@ -2,7 +2,7 @@ my $file = "tf$$.txt"; -print "1..56\n"; +print "1..62\n"; my $N = 1; use Tie::File; @@ -12,6 +12,8 @@ my $o = tie @a, 'Tie::File', $file; print $o ? "ok $N\n" : "not ok $N\n"; $N++; +$: = $o->{recsep}; + # 3-5 create $a[0] = 'rec0'; check_contents("rec0"); @@ -60,13 +62,18 @@ check_contents("sh0", "sh1", "short2", "", "rec4"); $a[3] = 'rec3'; check_contents("sh0", "sh1", "short2", "rec3", "rec4"); +# (57-59) zero out file +@a = (); +check_contents(); -# try inserting a record into the middle of an empty file +# (60-62) insert into the middle of an empty file +$a[3] = "rec3"; +check_contents("", "", "", "rec3"); use POSIX 'SEEK_SET'; sub check_contents { my @c = @_; - my $x = join $/, @c, ''; + my $x = join $:, @c, ''; local *FH = $o->{fh}; seek FH, 0, SEEK_SET; # my $open = open FH, "< $file"; @@ -76,7 +83,7 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; + ctrlfix($a, $x); print "not ok $N\n# expected <$x>, got <$a>\n"; } $N++; @@ -85,9 +92,9 @@ sub check_contents { my $good = 1; my $msg; for (0.. $#c) { - unless ($a[$_] eq "$c[$_]$/") { - $msg = "expected $c[$_]$/, got $a[$_]"; - $msg =~ s{$/}{\\n}g; + unless ($a[$_] eq "$c[$_]$:") { + $msg = "expected $c[$_]$:, got $a[$_]"; + ctrlfix($msg); $good = 0; } } @@ -99,6 +106,13 @@ sub check_contents { $N++; } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/02_fetchsize.t b/lib/Tie/File/t/02_fetchsize.t index 78fcea8..08ac9cb 100644 --- a/lib/Tie/File/t/02_fetchsize.t +++ b/lib/Tie/File/t/02_fetchsize.t @@ -1,7 +1,8 @@ #!/usr/bin/perl my $file = "tf$$.txt"; -my $data = "rec1$/rec2$/rec3$/"; +$: = Tie::File::_default_recsep(); +my $data = "rec1$:rec2$:rec3$:"; print "1..6\n"; @@ -19,6 +20,8 @@ my $o = tie @a, 'Tie::File', $file; print $o ? "ok $N\n" : "not ok $N\n"; $N++; +$: = $o->{recsep}; + my $n; # 3 test array element count diff --git a/lib/Tie/File/t/03_longfetch.t b/lib/Tie/File/t/03_longfetch.t index a84890a..265de93 100644 --- a/lib/Tie/File/t/03_longfetch.t +++ b/lib/Tie/File/t/03_longfetch.t @@ -7,7 +7,8 @@ # my $file = "tf$$.txt"; -my $data = "rec0$/rec1$/rec2$/"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; print "1..5\n"; @@ -25,11 +26,13 @@ my $o = tie @a, 'Tie::File', $file; print $o ? "ok $N\n" : "not ok $N\n"; $N++; +$: = $o->{recsep}; + my $n; # 3-5 for (2, 1, 0) { - print $a[$_] eq "rec$_$/" ? "ok $N\n" : "not ok $N # rec=$a[$_] ?\n"; + print $a[$_] eq "rec$_$:" ? "ok $N\n" : "not ok $N # rec=$a[$_] ?\n"; $N++; } diff --git a/lib/Tie/File/t/04_splice.t b/lib/Tie/File/t/04_splice.t index 08e001b..f6effa4 100644 --- a/lib/Tie/File/t/04_splice.t +++ b/lib/Tie/File/t/04_splice.t @@ -1,4 +1,5 @@ #!/usr/bin/perl + # # Check SPLICE function's effect on the file # (07_rv_splice.t checks its return value) @@ -11,7 +12,8 @@ # contents. my $file = "tf$$.txt"; -my $data = "rec0$/rec1$/rec2$/"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; print "1..101\n"; @@ -25,104 +27,105 @@ my $o = tie @a, 'Tie::File', $file; print $o ? "ok $N\n" : "not ok $N\n"; $N++; +$: = $o->{recsep}; my $n; # (3-22) splicing at the beginning splice(@a, 0, 0, "rec4"); -check_contents("rec4$/$data"); +check_contents("rec4$:$data"); splice(@a, 0, 1, "rec5"); # same length -check_contents("rec5$/$data"); +check_contents("rec5$:$data"); splice(@a, 0, 1, "record5"); # longer -check_contents("record5$/$data"); +check_contents("record5$:$data"); splice(@a, 0, 1, "r5"); # shorter -check_contents("r5$/$data"); +check_contents("r5$:$data"); splice(@a, 0, 1); # removal check_contents("$data"); splice(@a, 0, 0); # no-op check_contents("$data"); splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one -check_contents("r7$/rec8$/$data"); +check_contents("r7$:rec8$:$data"); splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete -check_contents("rec7$/record8$/rec9$/$data"); +check_contents("rec7$:record8$:rec9$:$data"); splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert -check_contents("record9$/rec10$/$data"); +check_contents("record9$:rec10$:$data"); splice(@a, 0, 2); # delete more than one check_contents("$data"); # (23-42) splicing in the middle splice(@a, 1, 0, "rec4"); -check_contents("rec0$/rec4$/rec1$/rec2$/"); +check_contents("rec0$:rec4$:rec1$:rec2$:"); splice(@a, 1, 1, "rec5"); # same length -check_contents("rec0$/rec5$/rec1$/rec2$/"); +check_contents("rec0$:rec5$:rec1$:rec2$:"); splice(@a, 1, 1, "record5"); # longer -check_contents("rec0$/record5$/rec1$/rec2$/"); +check_contents("rec0$:record5$:rec1$:rec2$:"); splice(@a, 1, 1, "r5"); # shorter -check_contents("rec0$/r5$/rec1$/rec2$/"); +check_contents("rec0$:r5$:rec1$:rec2$:"); splice(@a, 1, 1); # removal check_contents("$data"); splice(@a, 1, 0); # no-op check_contents("$data"); splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one -check_contents("rec0$/r7$/rec8$/rec1$/rec2$/"); +check_contents("rec0$:r7$:rec8$:rec1$:rec2$:"); splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete -check_contents("rec0$/rec7$/record8$/rec9$/rec1$/rec2$/"); +check_contents("rec0$:rec7$:record8$:rec9$:rec1$:rec2$:"); splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert -check_contents("rec0$/record9$/rec10$/rec1$/rec2$/"); +check_contents("rec0$:record9$:rec10$:rec1$:rec2$:"); splice(@a, 1, 2); # delete more than one check_contents("$data"); # (43-62) splicing at the end splice(@a, 3, 0, "rec4"); -check_contents("$ {data}rec4$/"); +check_contents("$ {data}rec4$:"); splice(@a, 3, 1, "rec5"); # same length -check_contents("$ {data}rec5$/"); +check_contents("$ {data}rec5$:"); splice(@a, 3, 1, "record5"); # longer -check_contents("$ {data}record5$/"); +check_contents("$ {data}record5$:"); splice(@a, 3, 1, "r5"); # shorter -check_contents("$ {data}r5$/"); +check_contents("$ {data}r5$:"); splice(@a, 3, 1); # removal check_contents("$data"); splice(@a, 3, 0); # no-op check_contents("$data"); splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one -check_contents("$ {data}r7$/rec8$/"); +check_contents("$ {data}r7$:rec8$:"); splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete -check_contents("$ {data}rec7$/record8$/rec9$/"); +check_contents("$ {data}rec7$:record8$:rec9$:"); splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert -check_contents("$ {data}record9$/rec10$/"); +check_contents("$ {data}record9$:rec10$:"); splice(@a, 3, 2); # delete more than one check_contents("$data"); # (63-82) splicing with negative subscript splice(@a, -1, 0, "rec4"); -check_contents("rec0$/rec1$/rec4$/rec2$/"); +check_contents("rec0$:rec1$:rec4$:rec2$:"); splice(@a, -1, 1, "rec5"); # same length -check_contents("rec0$/rec1$/rec4$/rec5$/"); +check_contents("rec0$:rec1$:rec4$:rec5$:"); splice(@a, -1, 1, "record5"); # longer -check_contents("rec0$/rec1$/rec4$/record5$/"); +check_contents("rec0$:rec1$:rec4$:record5$:"); splice(@a, -1, 1, "r5"); # shorter -check_contents("rec0$/rec1$/rec4$/r5$/"); +check_contents("rec0$:rec1$:rec4$:r5$:"); splice(@a, -1, 1); # removal -check_contents("rec0$/rec1$/rec4$/"); +check_contents("rec0$:rec1$:rec4$:"); splice(@a, -1, 0); # no-op -check_contents("rec0$/rec1$/rec4$/"); +check_contents("rec0$:rec1$:rec4$:"); splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one -check_contents("rec0$/rec1$/r7$/rec8$/rec4$/"); +check_contents("rec0$:rec1$:r7$:rec8$:rec4$:"); splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete -check_contents("rec0$/rec1$/r7$/rec8$/rec7$/record8$/rec9$/"); +check_contents("rec0$:rec1$:r7$:rec8$:rec7$:record8$:rec9$:"); splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert -check_contents("rec0$/rec1$/r7$/rec8$/record9$/rec10$/"); +check_contents("rec0$:rec1$:r7$:rec8$:record9$:rec10$:"); splice(@a, -4, 3); # delete more than one -check_contents("rec0$/rec1$/rec10$/"); +check_contents("rec0$:rec1$:rec10$:"); # (83-84) scrub it all out splice(@a, 0, 3); @@ -130,7 +133,7 @@ check_contents(""); # (85-86) put some back in splice(@a, 0, 0, "rec0", "rec1"); -check_contents("rec0$/rec1$/"); +check_contents("rec0$:rec1$:"); # (87-88) what if we remove too many records? splice(@a, 0, 17); @@ -146,9 +149,9 @@ check_contents(""); # (93-96) Also we did not emulate splice's freaky behavior when inserting # past the end of the array (1.14) splice(@a, 89, 0, "I", "like", "pie"); -check_contents("I$/like$/pie$/"); +check_contents("I$:like$:pie$:"); splice(@a, 89, 0, "pie pie pie"); -check_contents("I$/like$/pie$/pie pie pie$/"); +check_contents("I$:like$:pie$:pie pie pie$:"); # (97) Splicing with too large a negative number should be fatal # This test ignored because it causes 5.6.1 and 5.7.2 to dump core @@ -165,7 +168,7 @@ $N++; # (98-101) Test default arguments splice @a, 0, 0, (0..11); splice @a, 4; -check_contents("0$/1$/2$/3$/"); +check_contents("0$:1$:2$:3$:"); splice @a; check_contents(""); @@ -192,12 +195,20 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; + ctrlfix($a, $x); print "not ok $N\n# expected <$x>, got <$a>\n"; } $N++; } + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/05_size.t b/lib/Tie/File/t/05_size.t index 6cdd4e5..8f62c2a 100644 --- a/lib/Tie/File/t/05_size.t +++ b/lib/Tie/File/t/05_size.t @@ -7,7 +7,6 @@ use POSIX 'SEEK_SET'; my $file = "tf$$.txt"; -my $data = "rec0$/rec1$/rec2$/"; my ($o, $n); print "1..15\n"; @@ -23,6 +22,9 @@ close F; $o = tie @a, 'Tie::File', $file; print $o ? "ok $N\n" : "not ok $N\n"; $N++; + +$: = $o->{recsep}; + $n = @a; print $n == 0 ? "ok $N\n" : "not ok $N # $n, s/b 0\n"; $N++; @@ -31,14 +33,17 @@ $N++; undef $o; untie @a; -# 4-5 FETCHSIZE positive-length file +my $data = "rec0$:rec1$:rec2$:"; open F, "> $file" or die $!; binmode F; print F $data; close F; + $o = tie @a, 'Tie::File', $file; print $o ? "ok $N\n" : "not ok $N\n"; $N++; + +# 4-5 FETCHSIZE positive-length file $n = @a; print $n == 3 ? "ok $N\n" : "not ok $N # $n, s/b 0\n"; $N++; @@ -47,17 +52,17 @@ $N++; # (6-7) Make it longer: populate(); $#a = 4; -check_contents("$data$/$/"); +check_contents("$data$:$:"); # (8-9) Make it longer again: populate(); $#a = 6; -check_contents("$data$/$/$/$/"); +check_contents("$data$:$:$:$:"); # (10-11) Make it shorter: populate(); $#a = 4; -check_contents("$data$/$/"); +check_contents("$data$:$:"); # (12-13) Make it shorter again: populate(); @@ -88,7 +93,7 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; + ctrlfix($a, $x); print "not ok $N\n# expected <$x>, got <$a>\n"; } $N++; @@ -98,6 +103,13 @@ sub check_contents { } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/06_fixrec.t b/lib/Tie/File/t/06_fixrec.t index 62e5579..b03af09 100644 --- a/lib/Tie/File/t/06_fixrec.t +++ b/lib/Tie/File/t/06_fixrec.t @@ -2,6 +2,7 @@ use POSIX 'SEEK_SET'; my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); print "1..5\n"; @@ -14,11 +15,11 @@ print $o ? "ok $N\n" : "not ok $N\n"; $N++; $a[0] = 'rec0'; -check_contents("rec0$/"); -$a[1] = "rec1$/"; -check_contents("rec0$/rec1$/"); -$a[2] = "rec2$/$/"; # should we detect this? -check_contents("rec0$/rec1$/rec2$/$/"); +check_contents("rec0$:"); +$a[1] = "rec1$:"; +check_contents("rec0$:rec1$:"); +$a[2] = "rec2$:$:"; # should we detect this? +check_contents("rec0$:rec1$:rec2$:$:"); sub check_contents { my $x = shift; @@ -30,12 +31,19 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + my $msg = "not ok $N # expected <$x>, got <$a>"; + ctrlfix($msg); + print "$msg\n"; } $N++; } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} END { undef $o; diff --git a/lib/Tie/File/t/07_rv_splice.t b/lib/Tie/File/t/07_rv_splice.t index f5da174..69858b2 100644 --- a/lib/Tie/File/t/07_rv_splice.t +++ b/lib/Tie/File/t/07_rv_splice.t @@ -5,7 +5,8 @@ # my $file = "tf$$.txt"; -my $data = "rec0$/rec1$/rec2$/"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; print "1..50\n"; @@ -138,11 +139,11 @@ print !defined($r) ? "ok $N\n" : "not ok $N \# return should have been undef\n"; $N++; $r = splice(@a, 2, 1); -print $r eq "pie$/" ? "ok $N\n" : "not ok $N \# return should have been 'pie'\n"; +print $r eq "pie$:" ? "ok $N\n" : "not ok $N \# return should have been 'pie'\n"; $N++; $r = splice(@a, 0, 2); -print $r eq "like$/" ? "ok $N\n" : "not ok $N \# return should have been 'like'\n"; +print $r eq "like$:" ? "ok $N\n" : "not ok $N \# return should have been 'like'\n"; $N++; # (49-50) Test default arguments @@ -164,7 +165,7 @@ sub init_file { # expected results are in @_ sub check_result { my @x = @_; - chomp @r; + s/$:$// for @r; my $good = 1; $good = 0 unless @r == @x; for my $i (0 .. $#r) { diff --git a/lib/Tie/File/t/08_ro.t b/lib/Tie/File/t/08_ro.t index 245b16f..218a4e4 100644 --- a/lib/Tie/File/t/08_ro.t +++ b/lib/Tie/File/t/08_ro.t @@ -4,6 +4,7 @@ # my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); print "1..9\n"; @@ -13,7 +14,7 @@ use Fcntl 'O_RDONLY'; print "ok $N\n"; $N++; my @items = qw(Gold Frankincense Myrrh Ivory Apes Peacocks); -init_file(join $/, @items, ''); +init_file(join $:, @items, ''); my $o = tie @a, 'Tie::File', $file, mode => O_RDONLY; print $o ? "ok $N\n" : "not ok $N\n"; @@ -23,7 +24,7 @@ $#a == $#items ? print "ok $N\n" : print "not ok $N\n"; $N++; for my $i (0..$#items) { - ("$items[$i]$/" eq $a[$i]) ? print "ok $N\n" : print "not ok $N\n"; + ("$items[$i]$:" eq $a[$i]) ? print "ok $N\n" : print "not ok $N\n"; $N++; } diff --git a/lib/Tie/File/t/09_gen_rs.t b/lib/Tie/File/t/09_gen_rs.t index bb2fb26..120080b 100644 --- a/lib/Tie/File/t/09_gen_rs.t +++ b/lib/Tie/File/t/09_gen_rs.t @@ -77,8 +77,9 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + my $msg = "# expected <$x>, got <$a>"; + ctrlfix($msg); + print "not ok $N $msg\n"; } $N++; @@ -87,7 +88,7 @@ sub check_contents { for (0.. $#c) { unless ($a[$_] eq "$c[$_]blah") { $msg = "expected $c[$_]blah, got $a[$_]"; - $msg =~ s{$/}{\\n}g; + ctrlfix($msg); $good = 0; } } @@ -95,6 +96,15 @@ sub check_contents { $N++; } + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/10_splice_rs.t b/lib/Tie/File/t/10_splice_rs.t index aa33bcf..4db1443 100644 --- a/lib/Tie/File/t/10_splice_rs.t +++ b/lib/Tie/File/t/10_splice_rs.t @@ -193,12 +193,19 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + ctrlfix(my $msg = "# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; } $N++; } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/13_size_rs.t b/lib/Tie/File/t/13_size_rs.t index 284d2d3..a2a8d53 100644 --- a/lib/Tie/File/t/13_size_rs.t +++ b/lib/Tie/File/t/13_size_rs.t @@ -73,13 +73,20 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + ctrlfix(my $msg = "# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; } $N++; } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/15_pushpop.t b/lib/Tie/File/t/15_pushpop.t index e57764b..d6c379b 100644 --- a/lib/Tie/File/t/15_pushpop.t +++ b/lib/Tie/File/t/15_pushpop.t @@ -13,7 +13,8 @@ use POSIX 'SEEK_SET'; my $file = "tf$$.txt"; 1 while unlink $file; -my $data = "rec0$/rec1$/rec2$/"; +$: = Tie::File::_default_recsep(); +my $data = "rec0$:rec1$:rec2$:"; print "1..38\n"; @@ -34,28 +35,28 @@ check_contents($data); print $n == 3 ? "ok $N\n" : "not ok $N # size is $n, should be 3\n"; $N++; -$n = push @a, "rec3", "rec4\n"; -check_contents("$ {data}rec3$/rec4$/"); +$n = push @a, "rec3", "rec4$:"; +check_contents("$ {data}rec3$:rec4$:"); print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; $N++; # Trivial push -$n = push(@a, ()); -check_contents("$ {data}rec3$/rec4$/"); +$n = push @a, (); +check_contents("$ {data}rec3$:rec4$:"); print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; $N++; # (12-20) POP tests $n = pop @a; -check_contents("$ {data}rec3$/"); -print $n eq "rec4$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec4\n"; +check_contents("$ {data}rec3$:"); +print $n eq "rec4$:" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec4\n"; $N++; # Presumably we have already tested this to death splice(@a, 1, 3); $n = pop @a; check_contents(""); -print $n eq "rec0$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec0\n"; +print $n eq "rec0$:" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec0\n"; $N++; $n = pop @a; @@ -70,28 +71,28 @@ check_contents($data); print $n == 3 ? "ok $N\n" : "not ok $N # size is $n, should be 3\n"; $N++; -$n = unshift @a, "rec3", "rec4\n"; -check_contents("rec3$/rec4$/$data"); +$n = unshift @a, "rec3", "rec4$:"; +check_contents("rec3$:rec4$:$data"); print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; $N++; # Trivial unshift -$n = unshift(@a, ()); -check_contents("rec3$/rec4$/$data"); +$n = unshift @a, (); +check_contents("rec3$:rec4$:$data"); print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; $N++; # (30-38) SHIFT tests $n = shift @a; -check_contents("rec4$/$data"); -print $n eq "rec3$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec3\n"; +check_contents("rec4$:$data"); +print $n eq "rec3$:" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec3\n"; $N++; # Presumably we have already tested this to death splice(@a, 1, 3); $n = shift @a; check_contents(""); -print $n eq "rec4$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec4\n"; +print $n eq "rec4$:" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec4\n"; $N++; $n = shift @a; @@ -114,12 +115,19 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + ctrlfix(my $msg = "# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; } $N++; } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/16_handle.t b/lib/Tie/File/t/16_handle.t index cb9aa60..3c9b327 100644 --- a/lib/Tie/File/t/16_handle.t +++ b/lib/Tie/File/t/16_handle.t @@ -4,6 +4,7 @@ # instead of from a filename my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); if ($^O =~ /vms/i) { print "1..0\n"; @@ -19,7 +20,7 @@ print "ok $N\n"; $N++; use Fcntl 'O_CREAT', 'O_RDWR'; sysopen F, $file, O_CREAT | O_RDWR or die "Couldn't create temp file $file: $!; aborting"; -binmode(F); +binmode F; my $o = tie @a, 'Tie::File', \*F; print $o ? "ok $N\n" : "not ok $N\n"; @@ -78,42 +79,38 @@ undef $o; untie @a; # Does it correctly detect a non-seekable handle? - -{ - if ($^O =~ /^(MSWin32|dos)$/) { - print "ok $N \# skipped ($^O has broken pipe semantics)\n"; - last; - } - my $pipe_succeeded = eval {pipe *R, *W}; - if ($@) { - chomp $@; - print "ok $N \# skipped (no pipes: $@)\n"; - last; - } elsif (! $pipe_succeeded) { - print "ok $N \# skipped (pipe call failed: $!)\n"; - last; - } - close R; - $o = eval {tie @a, 'Tie::File', \*W}; - if ($@) { - if ($@ =~ /filehandle does not appear to be seekable/) { - print "ok $N\n"; - } else { - chomp $@; - print "not ok $N \# \$\@ is $@\n"; - } - } else { - print "not ok $N \# passing pipe to TIEARRAY didn't abort program\n"; - } - $N++; +{ if ($^O =~ /^(MSWin32|dos)$/) { + print "ok $N # skipped ($^O has broken pipe semantics)\n"; + last; + } + my $pipe_succeeded = eval {pipe *R, *W}; + if ($@) { + chomp $@; + print "ok $N # skipped (no pipes: $@)\n"; + last; + } elsif (! $pipe_succeeded) { + print "ok $N # skipped (pipe call failed: $!)\n"; + last; + } + close R; + $o = eval {tie @a, 'Tie::File', \*W}; + if ($@) { + if ($@ =~ /filehandle does not appear to be seekable/) { + print "ok $N\n"; + } else { + chomp $@; + print "not ok $N \# \$\@ is $@\n"; + } + } else { + print "not ok $N \# passing pipe to TIEARRAY didn't abort program\n"; + } + $N++; } -# try inserting a record into the middle of an empty file - use POSIX 'SEEK_SET'; sub check_contents { my @c = @_; - my $x = join $/, @c, ''; + my $x = join $:, @c, ''; local *FH = $o->{fh}; seek FH, 0, SEEK_SET; # my $open = open FH, "< $file"; @@ -123,8 +120,8 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + ctrlfix(my $msg = "# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; } $N++; @@ -132,9 +129,9 @@ sub check_contents { my $good = 1; my $msg; for (0.. $#c) { - unless ($a[$_] eq "$c[$_]$/") { - $msg = "expected $c[$_]$/, got $a[$_]"; - $msg =~ s{$/}{\\n}g; + unless ($a[$_] eq "$c[$_]$:") { + $msg = "expected $c[$_]$:, got $a[$_]"; + ctrlfix($msg); $good = 0; } } @@ -142,6 +139,14 @@ sub check_contents { $N++; } + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/17_misc_meth.t b/lib/Tie/File/t/17_misc_meth.t index 55b694b..8774961 100644 --- a/lib/Tie/File/t/17_misc_meth.t +++ b/lib/Tie/File/t/17_misc_meth.t @@ -5,6 +5,7 @@ # my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); 1 while unlink $file; print "1..24\n"; @@ -19,17 +20,19 @@ $N++; # (3-8) EXTEND $o->EXTEND(3); -check_contents("$/$/$/"); +check_contents("$:$:$:"); $o->EXTEND(4); -check_contents("$/$/$/$/"); +check_contents("$:$:$:$:"); $o->EXTEND(3); -check_contents("$/$/$/$/"); +check_contents("$:$:$:$:"); # (9-10) CLEAR @a = (); check_contents(""); # (11-16) EXISTS +if ($] >= 5.006) { + eval << 'TESTS'; print !exists $a[0] ? "ok $N\n" : "not ok $N\n"; $N++; $a[0] = "I like pie."; @@ -45,17 +48,32 @@ print exists $a[1] ? "ok $N\n" : "ok $N\n"; $N++; print exists $a[2] ? "ok $N\n" : "not ok $N\n"; $N++; +TESTS + } else { # perl 5.005 doesn't have exists $array[1] + for (11..16) { + print "ok $_ \# skipped (no exists for arrays)\n"; + $N++; + } + } # (17-24) DELETE +if ($] >= 5.006) { + eval << 'TESTS'; delete $a[0]; -check_contents("$/$/GIVE ME PIE$/"); +check_contents("$:$:GIVE ME PIE$:"); delete $a[2]; -check_contents("$/$/"); +check_contents("$:$:"); delete $a[0]; -check_contents("$/$/"); +check_contents("$:$:"); delete $a[1]; -check_contents("$/"); - +check_contents("$:"); +TESTS + } else { # perl 5.005 doesn't have delete $array[1] + for (17..24) { + print "ok $_ \# skipped (no delete for arrays)\n"; + $N++; + } + } use POSIX 'SEEK_SET'; sub check_contents { @@ -68,14 +86,21 @@ sub check_contents { if ($a eq $x) { print "ok $N\n"; } else { - s{$/}{\\n}g for $a, $x; - print "not ok $N\n# expected <$x>, got <$a>\n"; + ctrlfix(my $msg = "# expected <$x>, got <$a>"); + print "not ok $N\n$msg\n"; } $N++; print $o->_check_integrity($file, $ENV{INTEGRITY}) ? "ok $N\n" : "not ok $N\n"; $N++; } +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + END { undef $o; untie @a; diff --git a/lib/Tie/File/t/18_rs_fixrec.t b/lib/Tie/File/t/18_rs_fixrec.t new file mode 100644 index 0000000..ec0dec6 --- /dev/null +++ b/lib/Tie/File/t/18_rs_fixrec.t @@ -0,0 +1,53 @@ +#!/usr/bin/perl + +use POSIX 'SEEK_SET'; +my $file = "tf$$.txt"; +$/ = "blah"; + +print "1..5\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +$a[0] = 'rec0'; +check_contents("rec0blah"); +$a[1] = "rec1blah"; +check_contents("rec0blahrec1blah"); +$a[2] = "rec2blahblah"; # should we detect this? +check_contents("rec0blahrec1blahrec2blahblah"); + +sub check_contents { + my $x = shift; + local *FH = $o->{fh}; + seek FH, 0, SEEK_SET; + my $a; + { local $/; $a = } + $a = "" unless defined $a; + if ($a eq $x) { + print "ok $N\n"; + } else { + my $msg = "not ok $N # expected <$x>, got <$a>"; + ctrlfix($msg); + print "$msg\n"; + } + $N++; +} + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + diff --git a/lib/Tie/File/t/19_cache.t b/lib/Tie/File/t/19_cache.t new file mode 100644 index 0000000..518a01b --- /dev/null +++ b/lib/Tie/File/t/19_cache.t @@ -0,0 +1,202 @@ +#!/usr/bin/perl +# +# Tests for various caching errors +# + +my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); +my $data = join $:, "rec0" .. "rec9", ""; +my $V = $ENV{INTEGRITY}; # Verbose integrity checking? + +print "1..54\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (3) Through 0.18, this 'splice' call would corrupt the cache. +my @z = @a; # force cache to contain all ten records +splice @a, 0, 0, "x"; +print $o->_check_integrity($file, $V) ? "ok $N\n" : "not ok $N\n"; +$N++; + +# Here we redo *all* the splice tests, with populate() +# calls before each one, to make sure that splice() does not botch the cache. + +# (4-14) splicing at the beginning +check(); +splice(@a, 0, 0, "rec4"); +check(); +splice(@a, 0, 1, "rec5"); # same length +check(); +splice(@a, 0, 1, "record5"); # longer +check(); +splice(@a, 0, 1, "r5"); # shorter +check(); +splice(@a, 0, 1); # removal +check(); +splice(@a, 0, 0); # no-op +check(); + +splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, 0, 2); # delete more than one +check(); + + +# (15-24) splicing in the middle +splice(@a, 1, 0, "rec4"); +check(); +splice(@a, 1, 1, "rec5"); # same length +check(); +splice(@a, 1, 1, "record5"); # longer +check(); +splice(@a, 1, 1, "r5"); # shorter +check(); +splice(@a, 1, 1); # removal +check(); +splice(@a, 1, 0); # no-op +check(); + +splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, 1, 2); # delete more than one +check(); + +# (25-34) splicing at the end +splice(@a, 3, 0, "rec4"); +check(); +splice(@a, 3, 1, "rec5"); # same length +check(); +splice(@a, 3, 1, "record5"); # longer +check(); +splice(@a, 3, 1, "r5"); # shorter +check(); +splice(@a, 3, 1); # removal +check(); +splice(@a, 3, 0); # no-op +check(); + +splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, 3, 2); # delete more than one +check(); + +# (35-44) splicing with negative subscript +splice(@a, -1, 0, "rec4"); +check(); +splice(@a, -1, 1, "rec5"); # same length +check(); +splice(@a, -1, 1, "record5"); # longer +check(); +splice(@a, -1, 1, "r5"); # shorter +check(); +splice(@a, -1, 1); # removal +check(); +splice(@a, -1, 0); # no-op +check(); + +splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, -4, 3); # delete more than one +check(); + +# (45) scrub it all out +splice(@a, 0, 3); +check(); + +# (46) put some back in +splice(@a, 0, 0, "rec0", "rec1"); +check(); + +# (47) what if we remove too many records? +splice(@a, 0, 17); +check(); + +# (48-49) In the past, splicing past the end was not correctly detected +# (1.14) +splice(@a, 89, 3); +check(); +splice(@a, @a, 3); +check(); + +# (50-51) Also we did not emulate splice's freaky behavior when inserting +# past the end of the array (1.14) +splice(@a, 89, 0, "I", "like", "pie"); +check(); +splice(@a, 89, 0, "pie pie pie"); +check(); + +# (52-54) Test default arguments +splice @a, 0, 0, (0..11); +check(); +splice @a, 4; +check(); +splice @a; +check(); + + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + binmode F; + print F $data; + close F; +} + +use POSIX 'SEEK_SET'; +sub check { + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + repopulate(); +} + + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +sub repopulate { + %{$o->{cache}} = (); # scrub out the cache + @{$o->{lru}} = (); # and the LRU queue + $o->{cached} = 0; # and the cache size + my @z = @a; # refill the cache with correct data +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + + + diff --git a/lib/Tie/File/t/20_cache_full.t b/lib/Tie/File/t/20_cache_full.t new file mode 100644 index 0000000..8d8a5cd --- /dev/null +++ b/lib/Tie/File/t/20_cache_full.t @@ -0,0 +1,227 @@ +#!/usr/bin/perl +# +# Tests for various caching errors +# + +my $file = "tf$$.txt"; +$: = Tie::File::_default_recsep(); +my $data = join $:, "record0" .. "record9", ""; +my $V = $ENV{INTEGRITY}; # Verbose integrity checking? + +print "1..111\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +open F, "> $file" or die $!; +binmode F; +print F $data; +close F; + +# Limit cache size to 30 bytes +my $MAX = 30; +# -- that's enough space for 3 records, but not 4, on both \n and \r\n systems +my $o = tie @a, 'Tie::File', $file, memory => $MAX; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +# (3-5) Let's see if data was properly expired from the cache +my @z = @a; # force cache to contain all ten records +# It should now contain only the *last* three records, 7, 8, and 9 +{ + my $x = "7 8 9"; + my $a = join " ", sort keys %{$o->{cache}}; + if ($a eq $x) { print "ok $N\n" } + else { print "not ok $N # cache keys were <$a>; expected <$x>\n" } + $N++; +} +check(); + +# Here we redo *all* the splice tests, with populate() +# calls before each one, to make sure that splice() does not botch the cache. + +# (6-25) splicing at the beginning +splice(@a, 0, 0, "rec4"); +check(); +splice(@a, 0, 1, "rec5"); # same length +check(); +splice(@a, 0, 1, "record5"); # longer +check(); +splice(@a, 0, 1, "r5"); # shorter +check(); +splice(@a, 0, 1); # removal +check(); +splice(@a, 0, 0); # no-op +check(); + +splice(@a, 0, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, 0, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, 0, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, 0, 2); # delete more than one +check(); + + +# (26-45) splicing in the middle +splice(@a, 1, 0, "rec4"); +check(); +splice(@a, 1, 1, "rec5"); # same length +check(); +splice(@a, 1, 1, "record5"); # longer +check(); +splice(@a, 1, 1, "r5"); # shorter +check(); +splice(@a, 1, 1); # removal +check(); +splice(@a, 1, 0); # no-op +check(); + +splice(@a, 1, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, 1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, 1, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, 1, 2); # delete more than one +check(); + +# (46-65) splicing at the end +splice(@a, 3, 0, "rec4"); +check(); +splice(@a, 3, 1, "rec5"); # same length +check(); +splice(@a, 3, 1, "record5"); # longer +check(); +splice(@a, 3, 1, "r5"); # shorter +check(); +splice(@a, 3, 1); # removal +check(); +splice(@a, 3, 0); # no-op +check(); + +splice(@a, 3, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, 3, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, 3, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, 3, 2); # delete more than one +check(); + +# (66-85) splicing with negative subscript +splice(@a, -1, 0, "rec4"); +check(); +splice(@a, -1, 1, "rec5"); # same length +check(); +splice(@a, -1, 1, "record5"); # longer +check(); +splice(@a, -1, 1, "r5"); # shorter +check(); +splice(@a, -1, 1); # removal +check(); +splice(@a, -1, 0); # no-op +check(); + +splice(@a, -1, 0, 'r7', 'rec8'); # insert more than one +check(); +splice(@a, -1, 2, 'rec7', 'record8', 'rec9'); # insert more than delete +check(); +splice(@a, -3, 3, 'record9', 'rec10'); # delete more than insert +check(); +splice(@a, -4, 3); # delete more than one +check(); + +# (86-87) scrub it all out +splice(@a, 0, 3); +check(); + +# (88-89) put some back in +splice(@a, 0, 0, "rec0", "rec1"); +check(); + +# (90-91) what if we remove too many records? +splice(@a, 0, 17); +check(); + +# (92-95) In the past, splicing past the end was not correctly detected +# (1.14) +splice(@a, 89, 3); +check(); +splice(@a, @a, 3); +check(); + +# (96-99) Also we did not emulate splice's freaky behavior when inserting +# past the end of the array (1.14) +splice(@a, 89, 0, "I", "like", "pie"); +check(); +splice(@a, 89, 0, "pie pie pie"); +check(); + +# (100-105) Test default arguments +splice @a, 0, 0, (0..11); +check(); +splice @a, 4; +check(); +splice @a; +check(); + +# (106-111) One last set of tests. I don't know what state the cache +# is in now. But if I read any three records, those three records are +# what should be in the cache, and nothing else. +@a = "record0" .. "record9"; +check(); # In 0.18 #107 fails here--STORE was not flushing the cache when + # replacing an old cached record with a longer one +for (5, 6, 1) { my $z = $a[$_] } +{ + my $x = "5 6 1"; + my $a = join " ", @{$o->{lru}}; + if ($a eq $x) { print "ok $N\n" } + else { print "not ok $N # LRU was <$a>; expected <$x>\n" } + $N++; + $x = "1 5 6"; + $a = join " ", sort keys %{$o->{cache}}; + if ($a eq $x) { print "ok $N\n" } + else { print "not ok $N # cache keys were <$a>; expected <$x>\n" } + $N++; +} +check(); + + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + binmode F; + print F $data; + close F; +} + +sub check { + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + + print $o->{cached} <= $MAX + ? "ok $N\n" + : "not ok $N # $o->{cached} bytes cached, should be <= $MAX\n"; + $N++; +} + + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + +END { + undef $o; + untie @a; + 1 while unlink $file; +} + + + diff --git a/lib/Tie/File/t/21_win32.t b/lib/Tie/File/t/21_win32.t new file mode 100644 index 0000000..85a5733 --- /dev/null +++ b/lib/Tie/File/t/21_win32.t @@ -0,0 +1,61 @@ +#!/usr/bin/perl +# +# Formerly, on a Win32 system, Tie::File would create files with +# \n-terminated records instead of \r\n-terminated. The tests never +# picked this up because they were using $/ everywhere, and $/ is \n +# on windows systems. +# +# These tests (Win32 only) make sure that the file had \r\n as it should. + +my $file = "tf$$.txt"; + +unless ($^O =~ /^(MSWin32|dos)$/) { + print "1..0\n"; + exit; +} + + +print "1..3\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +my $n; + +# (3) Make sure that on Win32 systems, the file is written with \r\n by default +@a = qw(fish dog carrot); +undef $o; +untie @a; +open F, "< $file" or die "Couldn't open file $file: $!"; +binmode F; +my $a = do {local $/ ; }; +my $x = "fish\r\ndog\r\ncarrot\r\n" ; +if ($a eq $x) { + print "ok $N\n"; +} else { + ctrlfix(my $msg = "expected <$x>, got <$a>"); + print "not ok $N # $msg\n"; +} + +close F; + +sub ctrlfix { + for (@_) { + s/\n/\\n/g; + s/\r/\\r/g; + } +} + + + +END { + undef $o; + untie @a; + 1 while unlink $file; +} +