-Revision history for Perl extension File::FastSlurp.
+Revision history File::Slurp
+
+9999.14
+ - Added special faster code to slurp in small text files which
+ is a common case
+ - Rewrote the extras/slurp_bench.pl script. It has a full
+ legend, better CLI options, size is selectable, benchmark
+ entries have more consistant names and it compares the new
+ fast slurp for small files to the general slurp code.
+ Thanks to Mark Friendlich
+ - Added t/error.t to actually test error conditions. Previous
+ error.t was renamed to error_mode.t which better reflects its
+ tests.
+ - t/error.t uses a new test driver module. this may get used by
+ other tests in the future.
+ - Fixed check for SEEK_SET and other constant subs being defined
9999.13 Tue Oct 10 02:04:51 EDT 2006
- Refactored the extras/slurp_bench.pl script. It has options,
- Added error check on atomic rename and test for it
Thanks to Daniel Scott Sterling
-
9999.12 Thu Feb 2 02:26:31 EST 2006
- Fixed bug on windows with classic slurping and File::Slurp not
agreeing on newline conversion.
end if in append mode.n
Thanks to Chris Dolan <cdolan@cpan.org>
-
9999.08 Sat Apr 16 01:01:27 EDT 2005
- read_dir returns an array ref in scalar context
- read_dir keeps . and .. if keep_dot_dot option is set.
- slurp() is an optional exported alias to read_file
Thanks to Damian Conway <damian@conway.org>
-
-
9999.07 Tue Jan 25 01:33:11 EST 2005
- Slurping in pseudo files (as in /proc) which show a size of 0
but actually have data works. This seems to be the case on
linux but on Solaris those files show their proper size.
Thanks to Juerd Waalboer <juerd@cpan.org>
-
9999.06 Mon Sep 20 01:57:00 EDT 2004
- Slurping the DATA handle now works without the workaround.
tests are in t/data_scalar.t and t/data_list.t
split on multiple newlines (/\n\n+/).
Thanks to Geoffrey Leach <geoff@direcway.com>
-
9999.05 Tue Feb 24 21:14:55 EST 2004
- skip handle tests where socketpair is not supported (pre 5.8
on windows)
Thanks to Mike Arms <marms@sandia.gov>
-
9999.04 Mon Feb 23 14:20:52 EST 2004
- fixed DATA handle bug in t/handle.t (not seen on most OS's)
Thanks to James Willmore <jwillmore@adelphia.net>
-
9999.03 Mon Dec 22 01:44:43 EST 2003
- fixed DATA handle bugs in t/handle.t on osx (should be fixed
on BSD as well)
DFSEP = $(DIRFILESEP)
NAME = File::Slurp
NAME_SYM = File_Slurp
-VERSION = 9999.12
+VERSION = 9999.13
VERSION_MACRO = VERSION
-VERSION_SYM = 9999_12
+VERSION_SYM = 9999_13
DEFINE_VERSION = -D$(VERSION_MACRO)=\"$(VERSION)\"
-XS_VERSION = 9999.12
+XS_VERSION = 9999.13
XS_VERSION_MACRO = XS_VERSION
XS_DEFINE_VERSION = -D$(XS_VERSION_MACRO)=\"$(XS_VERSION)\"
INST_ARCHLIB = blib/arch
O_FILES =
H_FILES =
MAN1PODS =
-MAN3PODS = Slurp.pm \
- lib/File/Slurp.pm
+MAN3PODS = lib/File/Slurp.pm
# Where is the Config information that we are using/depend on
CONFIGDEP = $(PERL_ARCHLIB)$(DFSEP)Config.pm $(PERL_INC)$(DFSEP)config.h
PERL_ARCHIVE_AFTER =
-TO_INST_PM = Slurp.pm \
- lib/File/Slurp.pm
+TO_INST_PM = lib/File/Slurp.pm
-PM_TO_BLIB = Slurp.pm \
- $(INST_LIB)/File/Slurp.pm \
- lib/File/Slurp.pm \
+PM_TO_BLIB = lib/File/Slurp.pm \
blib/lib/File/Slurp.pm
DIST_CP = best
DIST_DEFAULT = tardist
DISTNAME = File-Slurp
-DISTVNAME = File-Slurp-9999.12
+DISTVNAME = File-Slurp-9999.13
# --- MakeMaker macro section:
manifypods : pure_all \
- Slurp.pm \
lib/File/Slurp.pm
$(NOECHO) $(POD2MAN) --section=$(MAN3EXT) --perm_rw=$(PERM_RW) \
- Slurp.pm $(INST_MAN3DIR)/File::Slurp.$(MAN3EXT) \
lib/File/Slurp.pm $(INST_MAN3DIR)/File::Slurp.$(MAN3EXT)
$(NOECHO) $(ECHO) Generating META.yml
$(NOECHO) $(ECHO) '--- #YAML:1.0' > META_new.yml
$(NOECHO) $(ECHO) 'name: File-Slurp' >> META_new.yml
- $(NOECHO) $(ECHO) 'version: 9999.12' >> META_new.yml
+ $(NOECHO) $(ECHO) 'version: 9999.13' >> META_new.yml
$(NOECHO) $(ECHO) 'abstract: Efficient Reading/Writing of Complete Files' >> META_new.yml
$(NOECHO) $(ECHO) 'license: ~' >> META_new.yml
$(NOECHO) $(ECHO) 'author: ' >> META_new.yml
# --- MakeMaker ppd section:
# Creates a PPD (Perl Package Description) for a binary distribution.
ppd :
- $(NOECHO) $(ECHO) '<SOFTPKG NAME="$(DISTNAME)" VERSION="9999,12,0,0">' > $(DISTNAME).ppd
+ $(NOECHO) $(ECHO) '<SOFTPKG NAME="$(DISTNAME)" VERSION="9999,13,0,0">' > $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <TITLE>$(DISTNAME)</TITLE>' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <ABSTRACT>Efficient Reading/Writing of Complete Files</ABSTRACT>' >> $(DISTNAME).ppd
$(NOECHO) $(ECHO) ' <AUTHOR>Uri Guttman <uri@stemsystems.com></AUTHOR>' >> $(DISTNAME).ppd
pm_to_blib : $(TO_INST_PM)
$(NOECHO) $(ABSPERLRUN) -MExtUtils::Install -e 'pm_to_blib({@ARGV}, '\''$(INST_LIB)/auto'\'', '\''$(PM_FILTER)'\'')' -- \
- Slurp.pm $(INST_LIB)/File/Slurp.pm \
lib/File/Slurp.pm blib/lib/File/Slurp.pm
$(NOECHO) $(TOUCH) pm_to_blib
--- /dev/null
+..
\ No newline at end of file
--- /dev/null
+package FileSlurp_12;
+
+use strict;
+
+use Carp ;
+use Fcntl qw( :DEFAULT ) ;
+use POSIX qw( :fcntl_h ) ;
+use Symbol ;
+
+use base 'Exporter' ;
+use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ;
+
+%EXPORT_TAGS = ( 'all' => [
+ qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
+
+@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
+@EXPORT_OK = qw( slurp ) ;
+
+$VERSION = '9999.13';
+
+my $is_win32 = $^O =~ /win32/i ;
+
+# Install subs for various constants that aren't set in older perls
+# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
+# () prototype These can't be overridden with the constant pragma or
+# we get a prototype mismatch. Hence this less than aesthetically
+# appealing BEGIN block:
+
+BEGIN {
+ unless( eval { defined SEEK_SET() } ) {
+ *SEEK_SET = sub { 0 };
+ *SEEK_CUR = sub { 1 };
+ *SEEK_END = sub { 2 };
+ }
+
+ unless( eval { defined O_BINARY() } ) {
+ *O_BINARY = sub { 0 };
+ *O_RDONLY = sub { 0 };
+ *O_WRONLY = sub { 1 };
+ }
+
+ unless ( eval { defined O_APPEND() } ) {
+
+ if ( $^O =~ /olaris/ ) {
+ *O_APPEND = sub { 8 };
+ *O_CREAT = sub { 256 };
+ *O_EXCL = sub { 1024 };
+ }
+ elsif ( $^O =~ /inux/ ) {
+ *O_APPEND = sub { 1024 };
+ *O_CREAT = sub { 64 };
+ *O_EXCL = sub { 128 };
+ }
+ elsif ( $^O =~ /BSD/i ) {
+ *O_APPEND = sub { 8 };
+ *O_CREAT = sub { 512 };
+ *O_EXCL = sub { 2048 };
+ }
+ }
+}
+
+# print "OS [$^O]\n" ;
+
+# print "O_BINARY = ", O_BINARY(), "\n" ;
+# print "O_RDONLY = ", O_RDONLY(), "\n" ;
+# print "O_WRONLY = ", O_WRONLY(), "\n" ;
+# print "O_APPEND = ", O_APPEND(), "\n" ;
+# print "O_CREAT ", O_CREAT(), "\n" ;
+# print "O_EXCL ", O_EXCL(), "\n" ;
+
+
+*slurp = \&read_file ;
+
+sub read_file {
+
+ my( $file_name, %args ) = @_ ;
+
+# set the buffer to either the passed in one or ours and init it to the null
+# string
+
+ my $buf ;
+ my $buf_ref = $args{'buf_ref'} || \$buf ;
+ ${$buf_ref} = '' ;
+
+ my( $read_fh, $size_left, $blk_size ) ;
+
+# check if we are reading from a handle (glob ref or IO:: object)
+
+ if ( ref $file_name ) {
+
+# slurping a handle so use it and don't open anything.
+# set the block size so we know it is a handle and read that amount
+
+ $read_fh = $file_name ;
+ $blk_size = $args{'blk_size'} || 1024 * 1024 ;
+ $size_left = $blk_size ;
+
+# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
+# glob/handle. only the DATA handle is untainted (since it is from
+# trusted data in the source file). this allows us to test if this is
+# the DATA handle and then to do a sysseek to make sure it gets
+# slurped correctly. on some systems, the buffered i/o pointer is not
+# left at the same place as the fd pointer. this sysseek makes them
+# the same so slurping with sysread will work.
+
+ eval{ require B } ;
+
+ if ( $@ ) {
+
+ @_ = ( \%args, <<ERR ) ;
+Can't find B.pm with this Perl: $!.
+That module is needed to slurp the DATA handle.
+ERR
+ goto &_error ;
+ }
+
+ if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) {
+
+# set the seek position to the current tell.
+
+ sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) ||
+ croak "sysseek $!" ;
+ }
+ }
+ else {
+
+# a regular file. set the sysopen mode
+
+ my $mode = O_RDONLY ;
+ $mode |= O_BINARY if $args{'binmode'} ;
+
+#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
+
+# open the file and handle any error
+
+ $read_fh = gensym ;
+ unless ( sysopen( $read_fh, $file_name, $mode ) ) {
+ @_ = ( \%args, "read_file '$file_name' - sysopen: $!");
+ goto &_error ;
+ }
+
+# get the size of the file for use in the read loop
+
+ $size_left = -s $read_fh ;
+
+ unless( $size_left ) {
+
+ $blk_size = $args{'blk_size'} || 1024 * 1024 ;
+ $size_left = $blk_size ;
+ }
+ }
+
+# infinite read loop. we exit when we are done slurping
+
+ while( 1 ) {
+
+# do the read and see how much we got
+
+ my $read_cnt = sysread( $read_fh, ${$buf_ref},
+ $size_left, length ${$buf_ref} ) ;
+
+ if ( defined $read_cnt ) {
+
+# good read. see if we hit EOF (nothing left to read)
+
+ last if $read_cnt == 0 ;
+
+# loop if we are slurping a handle. we don't track $size_left then.
+
+ next if $blk_size ;
+
+# count down how much we read and loop if we have more to read.
+ $size_left -= $read_cnt ;
+ last if $size_left <= 0 ;
+ next ;
+ }
+
+# handle the read error
+
+ @_ = ( \%args, "read_file '$file_name' - sysread: $!");
+ goto &_error ;
+ }
+
+# fix up cr/lf to be a newline if this is a windows text file
+
+ ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ;
+
+# this is the 5 returns in a row. each handles one possible
+# combination of caller context and requested return type
+
+ my $sep = $/ ;
+ $sep = '\n\n+' if defined $sep && $sep eq '' ;
+
+# caller wants to get an array ref of lines
+
+# this split doesn't work since it tries to use variable length lookbehind
+# the m// line works.
+# return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ;
+ return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ]
+ if $args{'array_ref'} ;
+
+# caller wants a list of lines (normal list context)
+
+# same problem with this split as before.
+# return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ;
+ return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : ()
+ if wantarray ;
+
+# caller wants a scalar ref to the slurped text
+
+ return $buf_ref if $args{'scalar_ref'} ;
+
+# caller wants a scalar with the slurped text (normal scalar context)
+
+ return ${$buf_ref} if defined wantarray ;
+
+# caller passed in an i/o buffer by reference (normal void context)
+
+ return ;
+}
+
+
+# error handling section
+#
+# all the error handling uses magic goto so the caller will get the
+# error message as if from their code and not this module. if we just
+# did a call on the error code, the carp/croak would report it from
+# this module since the error sub is one level down on the call stack
+# from read_file/write_file/read_dir.
+
+
+my %err_func = (
+ 'carp' => \&carp,
+ 'croak' => \&croak,
+) ;
+
+sub _error {
+
+ my( $args, $err_msg ) = @_ ;
+
+# get the error function to use
+
+ my $func = $err_func{ $args->{'err_mode'} || 'croak' } ;
+
+# if we didn't find it in our error function hash, they must have set
+# it to quiet and we don't do anything.
+
+ return unless $func ;
+
+# call the carp/croak function
+
+ $func->($err_msg) ;
+
+# return a hard undef (in list context this will be a single value of
+# undef which is not a legal in-band value)
+
+ return undef ;
+}
+
+1;
--- /dev/null
+1000
+
+FS::read_file 1829/s
+FS12::read_file 1879/s
+
+100
+
+FS12::read_file 1943/s
+FS::read_file 2047/s
+
+0
+
+FS12::read_file 2036/s
+FS::read_file 2074/s
+
+after error handling moved up
+
+1000
+
+FS::read_file 1845/s
+FS12::read_file 1896/s
\ No newline at end of file
@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
@EXPORT_OK = qw( slurp ) ;
-$VERSION = '9999.13';
+$VERSION = '9999.14';
my $is_win32 = $^O =~ /win32/i ;
# appealing BEGIN block:
BEGIN {
- unless( eval { defined SEEK_SET() } ) {
+ unless( defined &SEEK_SET ) {
*SEEK_SET = sub { 0 };
*SEEK_CUR = sub { 1 };
*SEEK_END = sub { 2 };
}
- unless( eval { defined O_BINARY() } ) {
+ unless( defined &O_BINARY ) {
*O_BINARY = sub { 0 };
*O_RDONLY = sub { 0 };
*O_WRONLY = sub { 1 };
}
- unless ( eval { defined O_APPEND() } ) {
+ unless ( defined O_APPEND ) {
if ( $^O =~ /olaris/ ) {
*O_APPEND = sub { 8 };
my( $file_name, %args ) = @_ ;
-# my $file_size = -s $file_name ;
-
- if ( !ref $file_name && -s $file_name < 10000 && ! %args && !wantarray ) {
+ if ( !ref $file_name && 0 &&
+ -e $file_name && -s _ < 10000 && ! %args && !wantarray ) {
local( *FH ) ;
-# open( FH, $file_name ) ;
-
unless( open( FH, $file_name ) ) {
@_ = ( \%args, "read_file '$file_name' - sysopen: $!");
goto &_error ;
}
-#print "OPT\n" and $printed++ unless $printed ;
-
-# sysread( FH, my $buf, -s _ ) ;
-# return $buf ;
-
-# }
my $read_cnt = sysread( FH, my $buf, -s _ ) ;
unless ( defined $read_cnt ) {
# handle the read error
- @_ = ( \%args, "read_file '$file_name' - sysread: $!");
+ @_ = ( \%args,
+ "read_file '$file_name' - small sysread: $!");
goto &_error ;
}
$size_left = -s $read_fh ;
+print "SIZE $size_left\n" ;
+
### TEST
# blk_size is not needed if we have a real file size > 0. for 0 size who cares?
# so test this deletion
}
- if ( $size_left < 10000 && keys %args == 0 && !wantarray ) {
+# if ( $size_left < 10000 && keys %args == 0 && !wantarray ) {
-#print "OPT\n" and $printed++ unless $printed ;
+# #print "OPT\n" and $printed++ unless $printed ;
- my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
+# my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
- unless ( defined $read_cnt ) {
+# unless ( defined $read_cnt ) {
-# handle the read error
+# # handle the read error
- @_ = ( \%args, "read_file '$file_name' - sysread: $!");
- goto &_error ;
- }
+# @_ = ( \%args, "read_file '$file_name' - small2 sysread: $!");
+# goto &_error ;
+# }
- return $buf ;
- }
+# return $buf ;
+# }
# infinite read loop. we exit when we are done slurping
# handle the read error
- @_ = ( \%args, "read_file '$file_name' - sysread: $!");
+ @_ = ( \%args, "read_file '$file_name' - loop sysread: $!");
goto &_error ;
}
--- /dev/null
+# common.pm - common test driver code
+
+use Test::More ;
+
+sub tester {
+
+ my( $tests ) = @_ ;
+
+use Data::Dumper ;
+
+# plan for one expected ok() call per test
+
+ plan( tests => scalar @{$tests} ) ;
+
+# loop over all the tests
+
+ foreach my $test ( @{$tests} ) {
+
+#print Dumper $test ;
+
+
+ if ( $test->{skip} ) {
+ ok( 1, "SKIPPING $test->{name}" ) ;
+ next ;
+ }
+
+# run any setup sub before this test. this can is used to modify the
+# object for this test (e.g. delete templates from the cache).
+
+ if( my $pretest = $test->{pretest} ) {
+
+ $pretest->($test) ;
+ }
+
+ my $sub = $test->{sub} ;
+ my $args = $test->{args} ;
+
+ my $result = eval {
+ $sub->( @{$args} ) ;
+ } ;
+
+# if we had an error and expected it, we pass this test
+
+ if ( $@ ) {
+
+ if ( $test->{error} && $@ =~ /$test->{error}/ ) {
+
+ ok( 1, $test->{name} ) ;
+ }
+ else {
+
+ print "unexpected error: $@\n" ;
+ ok( 0, $test->{name} ) ;
+ }
+ }
+
+ if( my $posttest = $test->{posttest} ) {
+
+ $posttest->($test) ;
+ }
+ }
+}
+
+1 ;
--- /dev/null
+# driver.pm - common test driver code
+
+use Test::More ;
+
+BEGIN {
+ *CORE::GLOBAL::syswrite =
+ sub(*\$$;$) { my( $h, $b, $s ) = @_; CORE::syswrite $h, $b, $s } ;
+
+ *CORE::GLOBAL::sysread =
+ sub(*\$$;$) { my( $h, $b, $s ) = @_; CORE::sysread $h, $b, $s } ;
+}
+
+sub test_driver {
+
+ my( $tests ) = @_ ;
+
+use Data::Dumper ;
+
+# plan for one expected ok() call per test
+
+ plan( tests => scalar @{$tests} ) ;
+
+# loop over all the tests
+
+ foreach my $test ( @{$tests} ) {
+
+#print Dumper $test ;
+
+ if ( $test->{skip} ) {
+ ok( 1, "SKIPPING $test->{name}" ) ;
+ next ;
+ }
+
+ my $override = $test->{override} ;
+
+# run any setup sub before this test. this can is used to modify the
+# object for this test (e.g. delete templates from the cache).
+
+ if( my $pretest = $test->{pretest} ) {
+
+ $pretest->($test) ;
+ }
+
+ my $sub = $test->{sub} ;
+ my $args = $test->{args} ;
+
+local( $^W) ;
+ local *{"CORE::GLOBAL::$override"} = sub {} if $override ;
+
+ my $result = eval {
+ $sub->( @{$args} ) ;
+ } ;
+
+# if we had an error and expected it, we pass this test
+
+ if ( $@ ) {
+
+ if ( $test->{error} && $@ =~ /$test->{error}/ ) {
+
+ ok( 1, $test->{name} ) ;
+#print "ERR [$@]\n" ;
+ }
+ else {
+
+ print "unexpected error: $@\n" ;
+ ok( 0, $test->{name} ) ;
+ }
+ }
+
+ if( my $posttest = $test->{posttest} ) {
+
+ $posttest->($test) ;
+ }
+ }
+}
+
+1 ;
##!/usr/local/bin/perl -w
use lib qw(t) ;
-
use strict ;
-use File::Slurp qw( :all ) ;
+use driver ;
-use common ;
+use File::Slurp qw( :all ) ;
my $file_name = 'test_file' ;
my $dir_name = 'test_dir' ;
args => [ $file_name ],
error => qr/open/,
+
+ skip => 1,
},
{
},
error => qr/open/,
+ skip => 1,
+ },
+
+ {
+ name => 'write_file syswrite error',
+ sub => \&write_file,
+ args => [ $file_name, '' ],
+ override => 'syswrite',
+
+ posttest => sub {
+ unlink( $file_name ) ;
+ },
+
+
+ error => qr/write/,
+ skip => 1,
+ },
+
+ {
+ name => 'read_file small sysread error',
+ sub => \&read_file,
+ args => [ $file_name ],
+ override => 'sysread',
+
+ pretest => sub {
+ write_file( $file_name, '' ) ;
+ },
+
+ posttest => sub {
+ unlink( $file_name ) ;
+ },
+
+
+ error => qr/read/,
+ },
+
+ {
+ name => 'read_file loop sysread error',
+ sub => \&read_file,
+ args => [ $file_name ],
+ override => 'sysread',
+
+ pretest => sub {
+ write_file( $file_name, 'x' x 100_000 ) ;
+ },
+
+ posttest => sub {
+ unlink( $file_name ) ;
+ },
+
+
+ error => qr/read/,
},
{
},
error => qr/rename/,
+ skip => 1,
},
{
- name => 'read_dir open error',
+ name => 'read_dir opendir error',
sub => \&read_dir,
args => [ $dir_name ],
error => qr/open/,
+ skip => 1,
},
-
] ;
-tester( $tests ) ;
+test_driver( $tests ) ;
exit ;
use Test::More tests => 9 ;
my $file = 'missing/file' ;
-unlink $file ;
+#unlink $file ;
my %modes = (
--- /dev/null
+
+print prototype( 'CORE::sysread' ), "\n" ;
+
+BEGIN {
+
+ *CORE::GLOBAL::time = sub { CORE::time };
+}
+
+print time(), "\n" ;
+
+BEGIN{
+local *CORE::GLOBAL::time = sub { 123 };
+
+print time(), "\n" ;
+}
+
+print time(), "\n" ;
use Carp ;
use Test::More ;
-plan( tests => 1 ) ;
+plan( tests => 1 ) ;
my $proc_file = "/proc/$$/auxv" ;
my $data_do = do{ local( @ARGV, $/ ) = $proc_file; <> } ;
+print "LEN: ", length $data_do, "\n" ;
+
my $data_slurp = read_file( $proc_file ) ;
+print "LEN2: ", length $data_slurp, "\n" ;
+print "LEN3: ", -s $proc_file, "\n" ;
is( $data_do, $data_slurp, 'pseudo' ) ;
}