From: Abhijit Menon-Sen Date: Sat, 2 Mar 2002 13:43:06 +0000 (+0000) Subject: Upgrade to Tie::File 0.14. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=51efdd021730c26e86025564e60d0f686c2ddb4c;p=p5sagit%2Fp5-mst-13.2.git Upgrade to Tie::File 0.14. p4raw-id: //depot/perl@14943 --- diff --git a/MANIFEST b/MANIFEST index edb69f8..c064b84 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1433,6 +1433,8 @@ lib/Tie/File/10_splice_rs.t Test for Tie::File. lib/Tie/File/11_rv_splice_rs.t Test for Tie::File. lib/Tie/File/12_longfetch_rs.t Test for Tie::File. lib/Tie/File/13_size_rs.t Test for Tie::File. +lib/Tie/File/14_lock.t Test for Tie::File. +lib/Tie/File/15_pushpop.t Test for Tie::File. lib/Tie/Handle.pm Base class for tied handles lib/Tie/Handle/stdhandle.t Test for Tie::StdHandle lib/Tie/Hash.pm Base class for tied hashes diff --git a/lib/Tie/File.pm b/lib/Tie/File.pm index 2b6c9a5..9fc7eab 100644 --- a/lib/Tie/File.pm +++ b/lib/Tie/File.pm @@ -2,10 +2,10 @@ package Tie::File; use Carp; use POSIX 'SEEK_SET'; -use Fcntl 'O_CREAT', 'O_RDWR'; +use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX'; require 5.005; -$VERSION = "0.13"; +$VERSION = "0.14"; # Idea: The object will always contain an array of byte offsets # this will be filled in as is necessary and convenient. @@ -102,7 +102,7 @@ sub STORE { if (not defined $oldrec) { # We're storing a record beyond the end of the file - $self->_extend_file_to($n); + $self->_extend_file_to($n+1); $oldrec = $self->{recsep}; } my $len_diff = length($rec) - length($oldrec); @@ -133,7 +133,7 @@ sub STORESIZE { # file gets longer if ($len > $olen) { - $self->_extend_file_to($len-1); # record numbers from 0 .. $len-1 + $self->_extend_file_to($len); return; } @@ -145,11 +145,84 @@ sub STORESIZE { delete @{$self->{cache}}{@cached} if @cached; } +sub PUSH { + my $self = shift; + $self->SPLICE($self->FETCHSIZE, scalar(@_), @_); + $self->FETCHSIZE; +} + +sub POP { + my $self = shift; + scalar $self->SPLICE(-1, 1); +} + +sub SHIFT { + my $self = shift; + scalar $self->SPLICE(0, 1); +} + +sub UNSHIFT { + my $self = shift; + $self->SPLICE(0, 0, @_); + $self->FETCHSIZE; +} + +sub CLEAR { + # And enable auto-defer mode, since it's likely that they just + # did @a = (...); + my $self = shift; + $self->_seekb(0); + $self->_chop_file; + %{$self->{cache}} = (); + $self->{cached} = 0; + @{$self->{lru}} = (); + @{$self->{offsets}} = (0); +} + +sub EXTEND { + my ($self, $n) = @_; + $self->_fill_offsets_to($n); + $self->_extend_file_to($n); +} + +sub DELETE { + my ($self, $n) = @_; + my $lastrec = $self->FETCHSIZE-1; + if ($n == $lastrec) { + $self->_seek($n); + $self->_chop_file; + # perhaps in this case I should also remove trailing null records? + } else { + $self->STORE($n, ""); + } +} + +sub EXISTS { + my ($self, $n) = @_; + $self->_fill_offsets_to($n); + 0 <= $n && $n < $self->FETCHSIZE; +} + sub SPLICE { my ($self, $pos, $nrecs, @data) = @_; my @result; - $pos += $self->FETCHSIZE if $pos < 0; + { + my $oldsize = $self->FETCHSIZE; + my $oldpos = $pos; + + if ($pos < 0) { + $pos += $oldsize; + if ($pos < 0) { + croak "Modification of non-creatable array value attempted, subscript $oldpos"; + } + } + + if ($pos > $oldsize) { + return unless @data; + $pos = $oldsize; # This is what perl does for normal arrays + } + } $self->_fixrecs(@data); my $data = join '', @data; @@ -157,6 +230,7 @@ sub SPLICE { my $oldlen = 0; # compute length of data being removed + # Incidentally fills offsets table for ($pos .. $pos+$nrecs-1) { my $rec = $self->FETCH($_); last unless defined $rec; @@ -164,7 +238,7 @@ sub SPLICE { $oldlen += length($rec); } - $self->_fill_offsets_to($pos); + # Modify the file $self->_twrite($data, $self->{offsets}[$pos], $oldlen); # update the offsets table part 1 @@ -187,6 +261,12 @@ sub SPLICE { # that knows that the file does indeed start at 0. $self->{offsets}[0] = 0 unless @{$self->{offsets}}; + # Perhaps the following cache foolery could be factored out + # into a bunch of mor opaque cache functions. For example, + # it's odd to delete a record from the cache and then remove + # it from the LRU queue later on; there should be a function to + # do both at once. + # update the read cache, part 1 # modified records # Consider this carefully for correctness @@ -224,7 +304,8 @@ sub SPLICE { } @{$self->{lru}} = (@new, @changed); - @result; + # Yes, the return value of 'splice' *is* actually this complicated + wantarray ? @result : @result ? $result[-1] : undef; } # write data into the file @@ -256,24 +337,23 @@ sub _twrite { # $bufsize is required to be at least as large as the data we're overwriting my $bufsize = _bufsize($len_diff); my ($writepos, $readpos) = ($pos, $pos+$len); + my $next_block; # Seems like there ought to be a way to avoid the repeated code # and the special case here. The read(1) is also a little weird. # Think about this. do { $self->_seekb($readpos); - my $br = read $self->{fh}, my($next_block), $bufsize; + my $br = read $self->{fh}, $next_block, $bufsize; my $more_data = read $self->{fh}, my($dummy), 1; $self->_seekb($writepos); $self->_write_record($data); $readpos += $br; $writepos += length $data; $data = $next_block; - unless ($more_data) { - $self->_seekb($writepos); - $self->_write_record($next_block); - } } while $more_data; + $self->_seekb($writepos); + $self->_write_record($next_block); # There might be leftover data at the end of the file $self->_chop_file if $len_diff < 0; @@ -324,7 +404,7 @@ sub _fill_offsets_to { $self->_seek(-1); # tricky -- see comment at _seek $rec = $self->_read_record; if (defined $rec) { - push @OFF, $o+length($rec); + push @OFF, tell $fh; } else { return; # It turns out there is no such record } @@ -391,14 +471,16 @@ sub _cache_flush { # entirely populated. Now we need to write a new record beyond # the end of the file. We prepare for this by writing # empty records into the file up to the position we want -# $n here is the record number of the last record we're going to write +# +# assumes that the offsets table already contains the offset of record $n, +# if it exists, and extends to the end of the file if not. sub _extend_file_to { my ($self, $n) = @_; $self->_seek(-1); # position after the end of the last record my $pos = $self->{offsets}[-1]; # the offsets table has one entry more than the total number of records - $extras = $n - ($#{$self->{offsets}} - 1); + $extras = $n - $#{$self->{offsets}}; # Todo : just use $self->{recsep} x $extras here? while ($extras-- > 0) { @@ -426,6 +508,17 @@ sub _bufsize { $b; } +# Lock the file +sub flock { + my ($self, $op) = @_; + unless (@_ <= 3) { + my $pack = ref $self; + croak "Usage: $pack\->flock([OPERATION])"; + } + my $fh = $self->{fh}; + $op = LOCK_EX unless defined $op; + flock $fh, $op; +} # Given a file, make sure the cache is consistent with the # file contents @@ -499,7 +592,7 @@ Tie::File - Access the lines of a disk file via a Perl array =head1 SYNOPSIS - # This file documents Tie::File version 0.13 + # This file documents Tie::File version 0.14 tie @array, 'Tie::File', filename or die ...; @@ -509,7 +602,12 @@ Tie::File - Access the lines of a disk file via a Perl array $n_recs = @array; # how many records are in the file? $#array = $n_recs - 2; # chop records off the end - # As you would expect + # As you would expect: + + push @array, new recs...; + my $r1 = pop @array; + unshift @array, new recs...; + my $r1 = shift @array; @old_recs = splice @array, 3, 7, new recs...; untie @array; # all finished @@ -628,8 +726,35 @@ The C call returns an object, say C<$o>. You may call $rec = $o->FETCH($n); $o->STORE($n, $rec); -to fetch or store the record at line C<$n>, respectively. There are -no other public methods in this package. +to fetch or store the record at line C<$n>, respectively. The only other public method in this package is: + +=head2 C + + $o->flock(MODE) + +will lock the tied file. C has the same meaning as the second +argument to the Perl built-in C function; for example +C or C. (These constants are provided by +the C declaration.) + +C is optional; C<< $o->flock >> simply locks the file with +C. + +The best way to unlock a file is to discard the object and untie the +array. It is probably unsafe to unlock the file without also untying +it, because if you do, changes may remain unwritten inside the object. +That is why there is no shortcut for unlocking. If you really want to +unlock the file prematurely, you know what to do; if you don't know +what to do, then don't do it. + +All the usual warnings about file locking apply here. In particular, +note that file locking in Perl is B, which means that +holding a lock will not prevent anyone else from reading, writing, or +erasing the file; it only prevents them from getting another lock at +the same time. Locks are analogous to green traffic lights: If you +have a green light, that does not prevent the idiot coming the other +way from plowing into you sideways; it merely guarantees to you that +the idiot does not also have a green light at the same time. =head1 CAVEATS @@ -675,11 +800,22 @@ suggests, for example, that and LRU read-cache is a good tradeoff, even if it requires substantial adjustment following a C operation. -=head2 Missing Methods +=head1 CAVEATS + +(That's Latin for 'warnings'.) + +The behavior of tied arrays is not precisely the same as for regular +arrays. For example: -The tied array does not yet support C, C, C, -C, C, or size-setting via C<$#array = $n>. I will -put these in soon. + undef $a[10]; print "How unusual!\n" if $a[10]; + +C-ing a C array element just blanks out the +corresponding record in the file. When you read it back again, you'll +see the record separator (typically, $a[10] will appear to contain +"\n") so the supposedly-C'ed value will be true. + +There are other minor differences, but in general, the correspondence +is extremely close. =head1 AUTHOR @@ -693,7 +829,7 @@ C. =head1 LICENSE -C version 0.13 is copyright (C) 2002 Mark Jason Dominus. +C version 0.14 is copyright (C) 2002 Mark Jason Dominus. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -718,21 +854,20 @@ For licensing inquiries, contact the author at: =head1 WARRANTY -C version 0.13 comes with ABSOLUTELY NO WARRANTY. +C version 0.14 comes with ABSOLUTELY NO WARRANTY. For details, see the license. =head1 TODO -C, C, C, C. +Tests for default arguments to SPLICE. Tests for CLEAR/EXTEND. +Tests for DELETE/EXISTS. -More tests. (Configuration options, cache flushery. _twrite shoule -be tested separately, because there are a lot of weird special cases -lurking in there.) +More tests. (Configuration options, cache flushery, locking. _twrite +should be tested separately, because there are a lot of weird special +cases lurking in there.) More tests. (Stuff I didn't think of yet.) -File locking. - Deferred writing. (!!!) Paragraph mode? diff --git a/lib/Tie/File/01_gen.t b/lib/Tie/File/01_gen.t index 58c7a97..d69d232 100644 --- a/lib/Tie/File/01_gen.t +++ b/lib/Tie/File/01_gen.t @@ -77,7 +77,7 @@ sub check_contents { # now check FETCH: my $good = 1; for (0.. $#c) { - $good = 0 unless $a[$_] eq "$c[$_]\n"; + $good = 0 unless $a[$_] eq "$c[$_]$/"; } print (($open && $good) ? "ok $N\n" : "not ok $N # fetch @c\n"); $N++; diff --git a/lib/Tie/File/04_splice.t b/lib/Tie/File/04_splice.t index aae678f..f8628a2 100644 --- a/lib/Tie/File/04_splice.t +++ b/lib/Tie/File/04_splice.t @@ -10,11 +10,10 @@ # Then, it checks the actual contents of the file against the expected # contents. -use lib '/home/mjd/src/perl/Tie-File2/lib'; my $file = "tf$$.txt"; my $data = "rec0$/rec1$/rec2$/"; -print "1..88\n"; +print "1..97\n"; my $N = 1; use Tie::File; @@ -137,6 +136,34 @@ check_contents("rec0$/rec1$/"); splice(@a, 0, 17); check_contents(""); +# (89-92) In the past, splicing past the end was not correctly detected +# (1.14) +splice(@a, 89, 3); +check_contents(""); +splice(@a, @a, 3); +check_contents(""); + +# (93-96) Also we did not emulate splice's freaky behavior when inserting +# past the end of the array (1.14) +splice(@a, 89, 0, "I", "like", "pie"); +check_contents("I$/like$/pie$/"); +splice(@a, 89, 0, "pie pie pie"); +check_contents("I$/like$/pie$/pie pie pie$/"); + +# (97) Splicing with too large a negative number should be fatal +# This test ignored because it causes 5.6.1 and 5.7.2 to dump core +# NOT MY FAULT +if ($] < 5.006 || $] > 5.007002) { + eval { splice(@a, -7, 0) }; + print $@ =~ /^Modification of non-creatable array value attempted, subscript -7/ + ? "ok $N\n" : "not ok $N \# \$\@ was '$@'\n"; +} else { + print "ok $N \# skipped (5.6.0 through 5.7.2 dump core here.)\n"; +} +$N++; + + + sub init_file { my $data = shift; open F, "> $file" or die $!; diff --git a/lib/Tie/File/07_rv_splice.t b/lib/Tie/File/07_rv_splice.t index aaab1f7..75c8a3a 100644 --- a/lib/Tie/File/07_rv_splice.t +++ b/lib/Tie/File/07_rv_splice.t @@ -7,7 +7,7 @@ my $file = "tf$$.txt"; my $data = "rec0$/rec1$/rec2$/"; -print "1..45\n"; +print "1..48\n"; my $N = 1; use Tie::File; @@ -130,6 +130,22 @@ check_result(); @r = splice(@a, 0, 17); check_result('rec0', 'rec1'); +# (46-48) Now check the scalar context return +splice(@a, 0, 0, qw(I like pie)); +my $r; +$r = splice(@a, 0, 0); +print !defined($r) ? "ok $N\n" : "not ok $N \# return should have been undef\n"; +$N++; + +$r = splice(@a, 2, 1); +print $r eq "pie$/" ? "ok $N\n" : "not ok $N \# return should have been 'pie'\n"; +$N++; + +$r = splice(@a, 0, 2); +print $r eq "like$/" ? "ok $N\n" : "not ok $N \# return should have been 'like'\n"; +$N++; + + sub init_file { my $data = shift; open F, "> $file" or die $!; diff --git a/lib/Tie/File/14_lock.t b/lib/Tie/File/14_lock.t new file mode 100644 index 0000000..a771d8d --- /dev/null +++ b/lib/Tie/File/14_lock.t @@ -0,0 +1,40 @@ +#!/usr/bin/perl +# +# Check flock() feature +# +# This isn't a real test; it just checks to make sure we can call the method. +# It doesn't even check to make sure that the default behavior +# (LOCK_EX) is occurring. This is because I don't know how to write a good +# portable test for flocking. I checked the Perl core distribution, +# and found that Perl doesn't test flock either! + +use Fcntl ':flock'; # This works at least back to 5.004_04 + +my $file = "tf$$.txt"; +my ($o, $n); +my @a; + +print "1..4\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; + +# 2-4 Who the heck knows? +open F, "> $file" or die $!; +close F; +$o = tie @a, 'Tie::File', $file, recsep => 'blah'; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; + +print $o->flock() ? "ok $N\n" : "not ok $N\n"; +$N++; + +print $o->flock(LOCK_UN) ? "ok $N\n" : "not ok $N\n"; +$N++; + + +END { + 1 while unlink $file; +} + diff --git a/lib/Tie/File/15_pushpop.t b/lib/Tie/File/15_pushpop.t new file mode 100644 index 0000000..76fe4c1 --- /dev/null +++ b/lib/Tie/File/15_pushpop.t @@ -0,0 +1,127 @@ +#!/usr/bin/perl +# +# Check PUSH, POP, SHIF, and UNSHIFT +# +# Each call to 'check_contents' actually performs two tests. +# First, it calls the tied object's own 'check_integrity' method, +# which makes sure that the contents of the read cache and offset tables +# accurately reflect the contents of the file. +# Then, it checks the actual contents of the file against the expected +# contents. + +use lib '/home/mjd/src/perl/Tie-File2/lib'; +my $file = "tf$$.txt"; +1 while unlink $file; +my $data = "rec0$/rec1$/rec2$/"; + +print "1..38\n"; + +my $N = 1; +use Tie::File; +print "ok $N\n"; $N++; # partial credit just for showing up + +my $o = tie @a, 'Tie::File', $file; +print $o ? "ok $N\n" : "not ok $N\n"; +$N++; +my ($n, @r); + + + +# (3-11) PUSH tests +$n = push @a, "rec0", "rec1", "rec2"; +check_contents($data); +print $n == 3 ? "ok $N\n" : "not ok $N # size is $n, should be 3\n"; +$N++; + +$n = push @a, "rec3", "rec4\n"; +check_contents("$ {data}rec3$/rec4$/"); +print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; +$N++; + +# Trivial push +$n = push @a; +check_contents("$ {data}rec3$/rec4$/"); +print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; +$N++; + +# (12-20) POP tests +$n = pop @a; +check_contents("$ {data}rec3$/"); +print $n eq "rec4$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec4\n"; +$N++; + +# Presumably we have already tested this to death +splice(@a, 1, 3); +$n = pop @a; +check_contents(""); +print $n eq "rec0$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec0\n"; +$N++; + +$n = pop @a; +check_contents(""); +print ! defined $n ? "ok $N\n" : "not ok $N # last rec should be undef, is $n\n"; +$N++; + + +# (21-29) UNSHIFT tests +$n = unshift @a, "rec0", "rec1", "rec2"; +check_contents($data); +print $n == 3 ? "ok $N\n" : "not ok $N # size is $n, should be 3\n"; +$N++; + +$n = unshift @a, "rec3", "rec4\n"; +check_contents("rec3$/rec4$/$data"); +print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; +$N++; + +# Trivial unshift +$n = unshift @a; +check_contents("rec3$/rec4$/$data"); +print $n == 5 ? "ok $N\n" : "not ok $N # size is $n, should be 5\n"; +$N++; + +# (30-38) SHIFT tests +$n = shift @a; +check_contents("rec4$/$data"); +print $n eq "rec3$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec3\n"; +$N++; + +# Presumably we have already tested this to death +splice(@a, 1, 3); +$n = shift @a; +check_contents(""); +print $n eq "rec4$/" ? "ok $N\n" : "not ok $N # last rec is $n, should be rec4\n"; +$N++; + +$n = shift @a; +check_contents(""); +print ! defined $n ? "ok $N\n" : "not ok $N # last rec should be undef, is $n\n"; +$N++; + + +sub init_file { + my $data = shift; + open F, "> $file" or die $!; + binmode F; + print F $data; + close F; +} + +sub check_contents { + my $x = shift; + local *FH; + my $integrity = $o->_check_integrity($file, $ENV{INTEGRITY}); + print $integrity ? "ok $N\n" : "not ok $N\n"; + $N++; + my $open = open FH, "< $file"; + binmode FH; + my $a; + { local $/; $a = } + print (($open && $a eq $x) ? "ok $N\n" : "not ok $N\n"); + $N++; +} + +END { + 1 while unlink $file; +} +