use Fcntl 'O_CREAT', 'O_RDWR', 'LOCK_EX';
require 5.005;
-$VERSION = "0.15";
+$VERSION = "0.16";
# Idea: The object will always contain an array of byte offsets
# this will be filled in as is necessary and convenient.
}
my $mode = defined($opts{mode}) ? $opts{mode} : O_CREAT|O_RDWR;
+ my $fh;
- my $fh = \do { local *FH }; # only works in 5.005 and later
- sysopen $fh, $file, $mode, 0666 or return;
- binmode $fh;
+ if (UNIVERSAL::isa($file, 'GLOB')) {
+ unless (seek $file, 0, SEEK_SET) {
+ croak "$pack: your filehandle does not appear to be seekable";
+ }
+ $fh = $file;
+ } elsif (ref $file) {
+ croak "usage: tie \@array, $pack, filename, [option => value]...";
+ } else {
+ $fh = \do { local *FH }; # only works in 5.005 and later
+ sysopen $fh, $file, $mode, 0666 or return;
+ binmode $fh;
+ }
{ my $ofh = select $fh; $| = 1; select $ofh } # autoflush on write
$opts{fh} = $fh;
my $oldrec = $self->FETCH($n);
# _check_cache promotes record $n to MRU. Is this correct behavior?
- $self->{cache}{$n} = $rec if $self->_check_cache($n);
+ if (my $cached = $self->_check_cache($n)) {
+ $self->{cache}{$n} = $rec;
+ $self->{cached} += length($rec) - length($cached);
+ }
if (not defined $oldrec) {
# We're storing a record beyond the end of the file
if ($n == $lastrec) {
$self->_seek($n);
$self->_chop_file;
+ $#{$self->{offsets}}--;
+ delete $self->{cached}{$n};
+ @{$self->{lru}} = grep $_ != $n, @{$self->{lru}};
# perhaps in this case I should also remove trailing null records?
} else {
$self->STORE($n, "");
# Todo : just use $self->{recsep} x $extras here?
while ($extras-- > 0) {
$self->_write_record($self->{recsep});
- $pos += $self->{recseplen};
- push @{$self->{offsets}}, $pos;
+ push @{$self->{offsets}}, tell $self->{fh};
}
}
sub _check_integrity {
my ($self, $file, $warn) = @_;
my $good = 1;
- local *F = $self->{fh};
- seek F, 0, SEEK_SET;
-# open F, $file or die "Couldn't open file $file: $!";
-# binmode F;
- local $/ = $self->{recsep};
+
unless ($self->{offsets}[0] == 0) {
$warn && print STDERR "# rec 0: offset <$self->{offsets}[0]> s/b 0!\n";
$good = 0;
}
+
+ local *F = $self->{fh};
+ seek F, 0, SEEK_SET;
+ local $/ = $self->{recsep};
+ $. = 0;
+
while (<F>) {
my $n = $. - 1;
my $cached = $self->{cache}{$n};
my $ao = tell F;
if (defined $offset && $offset != $ao) {
$warn && print STDERR "# rec $n: offset <$offset> actual <$ao>\n";
+ $good = 0;
}
if (defined $cached && $_ ne $cached) {
$good = 0;
$good;
}
+"Cogito, ergo sum."; # don't forget to return a true value from the file
+
=head1 NAME
Tie::File - Access the lines of a disk file via a Perl array
=head1 SYNOPSIS
- # This file documents Tie::File version 0.15
+ # This file documents Tie::File version 0.16
tie @array, 'Tie::File', filename or die ...;
C<LOCK_SH> or C<LOCK_EX | LOCK_NB>. (These constants are provided by
the C<use Fcntl ':flock'> declaration.)
-C<MODE> is optional; C<< $o->flock >> simply locks the file with
+C<MODE> is optional; C<$o-E<gt>flock> simply locks the file with
C<LOCK_EX>.
The best way to unlock a file is to discard the object and untie the
way from plowing into you sideways; it merely guarantees to you that
the idiot does not also have a green light at the same time.
+=head2 Tying to an already-opened filehandle
+
+If C<$fh> is a filehandle, such as is returned by C<IO::File> or one
+of the other C<IO> modules, you may use:
+
+ tie @array, 'Tie::File', $fh, ...;
+
+Similarly if you opened that handle C<FH> with regular C<open> or
+C<sysopen>, you may use:
+
+ tie @array, 'Tie::File', \*FH, ...;
+
+Handles that were opened write-only won't work. Handles that were
+opened read-only will work as long as you don't try to write to them.
+Handles must be attached to seekable sources of data---that means no
+pipes or sockets. If you try to supply a non-seekable handle, the
+C<tie> call will abort your program.
+
=head1 CAVEATS
(That's Latin for 'warnings'.)
Every effort was made to make this module efficient. Nevertheless,
changing the size of a record in the middle of a large file will
-always be slow, because everything after the new record must be move.
+always be slow, because everything after the new record must be moved.
In particular, note that:
I/O, almost all normal use of it will be heavily I/O bound, and that
the time to maintain complicated data structures inside the module
will be dominated by the time to actually perform the I/O. This
-suggests, for example, that and LRU read-cache is a good tradeoff,
+suggests, for example, that an LRU read-cache is a good tradeoff,
even if it requires substantial adjustment following a C<splice>
operation.
=head1 LICENSE
-C<Tie::File> version 0.15 is copyright (C) 2002 Mark Jason Dominus.
+C<Tie::File> version 0.16 is copyright (C) 2002 Mark Jason Dominus.
This library is free software; you may redistribute it and/or modify
it under the same terms as Perl itself.
=head1 WARRANTY
-C<Tie::File> version 0.15 comes with ABSOLUTELY NO WARRANTY.
+C<Tie::File> version 0.16 comes with ABSOLUTELY NO WARRANTY.
For details, see the license.
-=head1 TODO
+=head1 THANKS
+
+Gigantic thanks to Jarkko Hietaniemi, for agreeing to put this in the
+core when I hadn't written it yet, and for generally being helpful,
+supportive, and competent. (Usually the rule is "choose any one.")
+Also big thanks to Abhijit Menon-Sen for all of the same things.
+
+Special thanks to Craig Berry (for VMS portability help), Randy Kobes
+(for Win32 portability help), the rest of the CPAN testers (for
+testing).
-Allow tie to seekable filehandle rather than named file.
+More thanks to:
+Gerrit Haase /
+Tassilo von Parseval /
+H. Dieter Pearcey /
+Peter Somu /
+Tels
-Tests for default arguments to SPLICE. Tests for CLEAR/EXTEND.
-Tests for DELETE/EXISTS.
+=head1 TODO
+
+Test DELETE machinery more carefully.
-More tests. (Configuration options, cache flushery, locking. _twrite
-should be tested separately, because there are a lot of weird special
-cases lurking in there.)
+More tests. (Configuration options, cache flushery. _twrite should
+be tested separately, because there are a lot of weird special cases
+lurking in there.)
More tests. (Stuff I didn't think of yet.)
Fixed-length mode.
+Maybe an autolocking mode?
+
=cut
my $file = "tf$$.txt";
-print "1..38\n";
+print "1..56\n";
my $N = 1;
use Tie::File;
print $o ? "ok $N\n" : "not ok $N\n";
$N++;
-# 3-4 create
+# 3-5 create
$a[0] = 'rec0';
check_contents("rec0");
-# 5-8 append
+# 6-11 append
$a[1] = 'rec1';
check_contents("rec0", "rec1");
$a[2] = 'rec2';
check_contents("rec0", "rec1", "rec2");
-# 9-14 same-length alterations
+# 12-20 same-length alterations
$a[0] = 'new0';
check_contents("new0", "rec1", "rec2");
$a[1] = 'new1';
$a[2] = 'new2';
check_contents("new0", "new1", "new2");
-# 15-24 lengthening alterations
+# 21-35 lengthening alterations
$a[0] = 'long0';
check_contents("long0", "new1", "new2");
$a[1] = 'long1';
$a[0] = 'longer0';
check_contents("longer0", "longer1", "long2");
-# 25-34 shortening alterations, including truncation
+# 36-50 shortening alterations, including truncation
$a[0] = 'short0';
check_contents("short0", "longer1", "long2");
$a[1] = 'short1';
$a[0] = 'sh0';
check_contents("sh0", "sh1", "short2");
-# file with holes
+# (51-56) file with holes
$a[4] = 'rec4';
check_contents("sh0", "sh1", "short2", "", "rec4");
$a[3] = 'rec3';
}
print $good ? "ok $N\n" : "not ok $N # $msg\n";
$N++;
+
+ print $o->_check_integrity($file, $ENV{INTEGRITY})
+ ? "ok $N\n" : "not ok $N\n";
+ $N++;
}
END {
# (97) Splicing with too large a negative number should be fatal
# This test ignored because it causes 5.6.1 and 5.7.2 to dump core
# NOT MY FAULT
-if ($] < 5.006 || $] > 5.007002) {
+if ($] < 5.006 || $] > 5.007003) {
eval { splice(@a, -7, 0) };
print $@ =~ /^Modification of non-creatable array value attempted, subscript -7/
? "ok $N\n" : "not ok $N \# \$\@ was '$@'\n";
} else {
- print "ok $N \# skipped (5.6.0 through 5.7.2 dump core here.)\n";
+ print "ok $N \# skipped (5.6.0 through 5.7.3 dump core here.)\n";
}
$N++;
check_contents("Iblahlikeblahpieblahpie pie pieblah");
# (97) Splicing with too large a negative number should be fatal
-# This test ignored because it causes 5.6.1 and 5.7.2 to dump core
+# This test ignored because it causes 5.6.1 and 5.7.3 to dump core
# NOT MY FAULT
-if ($] < 5.006 || $] > 5.007002) {
+if ($] < 5.006 || $] > 5.007003) {
eval { splice(@a, -7, 0) };
print $@ =~ /^Modification of non-creatable array value attempted, subscript -7/
? "ok $N\n" : "not ok $N \# \$\@ was '$@'\n";
} else {
- print "ok $N \# skipped (5.6.0 through 5.7.2 dump core here.)\n";
+ print "ok $N \# skipped (5.6.0 through 5.7.3 dump core here.)\n";
}
$N++;
--- /dev/null
+#!/usr/bin/perl
+#
+# Basic operation, initializing the object from an already-open handle
+# instead of from a filename
+
+my $file = "tf$$.txt";
+
+print "1..39\n";
+
+my $N = 1;
+use Tie::File;
+print "ok $N\n"; $N++;
+
+use Fcntl 'O_CREAT', 'O_RDWR';
+sysopen F, $file, O_CREAT | O_RDWR
+ or die "Couldn't create temp file $file: $!; aborting";
+
+my $o = tie @a, 'Tie::File', \*F;
+print $o ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# 3-4 create
+$a[0] = 'rec0';
+check_contents("rec0");
+
+# 5-8 append
+$a[1] = 'rec1';
+check_contents("rec0", "rec1");
+$a[2] = 'rec2';
+check_contents("rec0", "rec1", "rec2");
+
+# 9-14 same-length alterations
+$a[0] = 'new0';
+check_contents("new0", "rec1", "rec2");
+$a[1] = 'new1';
+check_contents("new0", "new1", "rec2");
+$a[2] = 'new2';
+check_contents("new0", "new1", "new2");
+
+# 15-24 lengthening alterations
+$a[0] = 'long0';
+check_contents("long0", "new1", "new2");
+$a[1] = 'long1';
+check_contents("long0", "long1", "new2");
+$a[2] = 'long2';
+check_contents("long0", "long1", "long2");
+$a[1] = 'longer1';
+check_contents("long0", "longer1", "long2");
+$a[0] = 'longer0';
+check_contents("longer0", "longer1", "long2");
+
+# 25-34 shortening alterations, including truncation
+$a[0] = 'short0';
+check_contents("short0", "longer1", "long2");
+$a[1] = 'short1';
+check_contents("short0", "short1", "long2");
+$a[2] = 'short2';
+check_contents("short0", "short1", "short2");
+$a[1] = 'sh1';
+check_contents("short0", "sh1", "short2");
+$a[0] = 'sh0';
+check_contents("sh0", "sh1", "short2");
+
+# file with holes
+$a[4] = 'rec4';
+check_contents("sh0", "sh1", "short2", "", "rec4");
+$a[3] = 'rec3';
+check_contents("sh0", "sh1", "short2", "rec3", "rec4");
+
+close F;
+undef $o;
+untie @a;
+
+# Does it correctly detect a non-seekable handle?
+{ eval {pipe *R, *W};
+ close R;
+ if ($@) {
+ print "ok $N # skipped\n";
+ last;
+ }
+ $o = eval {tie @a, 'Tie::File', \*W};
+ if ($@ && $@ =~ /filehandle does not appear to be seekable/) {
+ print "ok $N\n";
+ } else {
+ print "not ok $N\n";
+ }
+ $N++;
+}
+
+# try inserting a record into the middle of an empty file
+
+use POSIX 'SEEK_SET';
+sub check_contents {
+ my @c = @_;
+ my $x = join $/, @c, '';
+ local *FH = $o->{fh};
+ seek FH, 0, SEEK_SET;
+# my $open = open FH, "< $file";
+ my $a;
+ { local $/; $a = <FH> }
+ $a = "" unless defined $a;
+ if ($a eq $x) {
+ print "ok $N\n";
+ } else {
+ s{$/}{\\n}g for $a, $x;
+ print "not ok $N\n# expected <$x>, got <$a>\n";
+ }
+ $N++;
+
+ # now check FETCH:
+ my $good = 1;
+ my $msg;
+ for (0.. $#c) {
+ unless ($a[$_] eq "$c[$_]$/") {
+ $msg = "expected $c[$_]$/, got $a[$_]";
+ $msg =~ s{$/}{\\n}g;
+ $good = 0;
+ }
+ }
+ print $good ? "ok $N\n" : "not ok $N # $msg\n";
+ $N++;
+}
+
+END {
+ undef $o;
+ untie @a;
+ 1 while unlink $file;
+}
+
+
--- /dev/null
+#!/usr/bin/perl
+#
+# Check miscellaneous tied-array interface methods
+# EXTEND, CLEAR, DELETE, EXISTS
+#
+
+use lib '/home/mjd/src/perl/Tie-File2/lib';
+my $file = "tf$$.txt";
+1 while unlink $file;
+
+print "1..24\n";
+
+my $N = 1;
+use Tie::File;
+print "ok $N\n"; $N++;
+
+my $o = tie @a, 'Tie::File', $file;
+print $o ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# (3-8) EXTEND
+$o->EXTEND(3);
+check_contents("$/$/$/");
+$o->EXTEND(4);
+check_contents("$/$/$/$/");
+$o->EXTEND(3);
+check_contents("$/$/$/$/");
+
+# (9-10) CLEAR
+@a = ();
+check_contents("");
+
+# (11-16) EXISTS
+print !exists $a[0] ? "ok $N\n" : "not ok $N\n";
+$N++;
+$a[0] = "I like pie.";
+print exists $a[0] ? "ok $N\n" : "not ok $N\n";
+$N++;
+print !exists $a[1] ? "ok $N\n" : "not ok $N\n";
+$N++;
+$a[2] = "GIVE ME PIE";
+print exists $a[0] ? "ok $N\n" : "not ok $N\n";
+$N++;
+# exists $a[1] is not defined by this module under these circumstances
+print exists $a[1] ? "ok $N\n" : "ok $N\n";
+$N++;
+print exists $a[2] ? "ok $N\n" : "not ok $N\n";
+$N++;
+
+# (17-24) DELETE
+delete $a[0];
+check_contents("$/$/GIVE ME PIE$/");
+delete $a[2];
+check_contents("$/$/");
+delete $a[0];
+check_contents("$/$/");
+delete $a[1];
+check_contents("$/");
+
+
+use POSIX 'SEEK_SET';
+sub check_contents {
+ my $x = shift;
+ local *FH = $o->{fh};
+ seek FH, 0, SEEK_SET;
+ my $a;
+ { local $/; $a = <FH> }
+ $a = "" unless defined $a;
+ if ($a eq $x) {
+ print "ok $N\n";
+ } else {
+ s{$/}{\\n}g for $a, $x;
+ print "not ok $N\n# expected <$x>, got <$a>\n";
+ }
+ $N++;
+ print $o->_check_integrity($file, $ENV{INTEGRITY}) ? "ok $N\n" : "not ok $N\n";
+ $N++;
+}
+
+END {
+ undef $o;
+ untie @a;
+ 1 while unlink $file;
+}
+
+