8 use Fcntl qw( :DEFAULT ) ;
9 use POSIX qw( :fcntl_h ) ;
13 use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ;
15 %EXPORT_TAGS = ( 'all' => [
16 qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
18 @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
19 @EXPORT_OK = qw( slurp ) ;
23 my $is_win32 = $^O =~ /win32/i ;
25 # Install subs for various constants that aren't set in older perls
26 # (< 5.005). Fcntl on old perls uses Exporter to define subs without a
27 # () prototype These can't be overridden with the constant pragma or
28 # we get a prototype mismatch. Hence this less than aesthetically
29 # appealing BEGIN block:
32 unless( eval { defined SEEK_SET() } ) {
33 *SEEK_SET = sub { 0 };
34 *SEEK_CUR = sub { 1 };
35 *SEEK_END = sub { 2 };
38 unless( eval { defined O_BINARY() } ) {
39 *O_BINARY = sub { 0 };
40 *O_RDONLY = sub { 0 };
41 *O_WRONLY = sub { 1 };
44 unless ( eval { defined O_APPEND() } ) {
46 if ( $^O =~ /olaris/ ) {
47 *O_APPEND = sub { 8 };
48 *O_CREAT = sub { 256 };
49 *O_EXCL = sub { 1024 };
51 elsif ( $^O =~ /inux/ ) {
52 *O_APPEND = sub { 1024 };
53 *O_CREAT = sub { 64 };
54 *O_EXCL = sub { 128 };
56 elsif ( $^O =~ /BSD/i ) {
57 *O_APPEND = sub { 8 };
58 *O_CREAT = sub { 512 };
59 *O_EXCL = sub { 2048 };
64 # print "OS [$^O]\n" ;
66 # print "O_BINARY = ", O_BINARY(), "\n" ;
67 # print "O_RDONLY = ", O_RDONLY(), "\n" ;
68 # print "O_WRONLY = ", O_WRONLY(), "\n" ;
69 # print "O_APPEND = ", O_APPEND(), "\n" ;
70 # print "O_CREAT ", O_CREAT(), "\n" ;
71 # print "O_EXCL ", O_EXCL(), "\n" ;
74 *slurp = \&read_file ;
78 my( $file_name, %args ) = @_ ;
80 # my $file_size = -s $file_name ;
82 if ( !ref $file_name && -s $file_name < 10000 && ! %args && !wantarray ) {
86 # open( FH, $file_name ) ;
88 unless( open( FH, $file_name ) ) {
90 @_ = ( \%args, "read_file '$file_name' - sysopen: $!");
94 #print "OPT\n" and $printed++ unless $printed ;
96 # sysread( FH, my $buf, -s _ ) ;
100 my $read_cnt = sysread( FH, my $buf, -s _ ) ;
102 unless ( defined $read_cnt ) {
104 # handle the read error
106 @_ = ( \%args, "read_file '$file_name' - sysread: $!");
113 # set the buffer to either the passed in one or ours and init it to the null
117 my $buf_ref = $args{'buf_ref'} || \$buf ;
120 my( $read_fh, $size_left, $blk_size ) ;
122 # check if we are reading from a handle (glob ref or IO:: object)
124 if ( ref $file_name ) {
126 # slurping a handle so use it and don't open anything.
127 # set the block size so we know it is a handle and read that amount
129 $read_fh = $file_name ;
130 $blk_size = $args{'blk_size'} || 1024 * 1024 ;
131 $size_left = $blk_size ;
133 # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
134 # glob/handle. only the DATA handle is untainted (since it is from
135 # trusted data in the source file). this allows us to test if this is
136 # the DATA handle and then to do a sysseek to make sure it gets
137 # slurped correctly. on some systems, the buffered i/o pointer is not
138 # left at the same place as the fd pointer. this sysseek makes them
139 # the same so slurping with sysread will work.
145 @_ = ( \%args, <<ERR ) ;
146 Can't find B.pm with this Perl: $!.
147 That module is needed to slurp the DATA handle.
152 if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) {
154 # set the seek position to the current tell.
156 sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) ||
162 # a regular file. set the sysopen mode
164 my $mode = O_RDONLY ;
165 $mode |= O_BINARY if $args{'binmode'} ;
167 #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
169 # open the file and handle any error
172 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
173 @_ = ( \%args, "read_file '$file_name' - sysopen: $!");
177 # get the size of the file for use in the read loop
179 $size_left = -s $read_fh ;
182 # blk_size is not needed if we have a real file size > 0. for 0 size who cares?
183 # so test this deletion
185 # unless( $size_left ) {
187 # $blk_size = $args{'blk_size'} || 1024 * 1024 ;
188 # $size_left = $blk_size ;
193 if ( $size_left < 10000 && keys %args == 0 && !wantarray ) {
195 #print "OPT\n" and $printed++ unless $printed ;
197 my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
199 unless ( defined $read_cnt ) {
201 # handle the read error
203 @_ = ( \%args, "read_file '$file_name' - sysread: $!");
210 # infinite read loop. we exit when we are done slurping
214 # do the read and see how much we got
216 my $read_cnt = sysread( $read_fh, ${$buf_ref},
217 $size_left, length ${$buf_ref} ) ;
219 unless ( defined $read_cnt ) {
221 # handle the read error
223 @_ = ( \%args, "read_file '$file_name' - sysread: $!");
227 # good read. see if we hit EOF (nothing left to read)
229 last if $read_cnt == 0 ;
231 # loop if we are slurping a handle. we don't track $size_left then.
235 # count down how much we read and loop if we have more to read.
237 $size_left -= $read_cnt ;
238 last if $size_left <= 0 ;
241 # fix up cr/lf to be a newline if this is a windows text file
243 ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ;
245 # this is the 5 returns in a row. each handles one possible
246 # combination of caller context and requested return type
249 $sep = '\n\n+' if defined $sep && $sep eq '' ;
251 # caller wants to get an array ref of lines
253 # this split doesn't work since it tries to use variable length lookbehind
254 # the m// line works.
255 # return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ;
256 return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ]
257 if $args{'array_ref'} ;
259 # caller wants a list of lines (normal list context)
261 # same problem with this split as before.
262 # return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ;
263 return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : ()
266 # caller wants a scalar ref to the slurped text
268 return $buf_ref if $args{'scalar_ref'} ;
270 # caller wants a scalar with the slurped text (normal scalar context)
272 return ${$buf_ref} if defined wantarray ;
274 # caller passed in an i/o buffer by reference (normal void context)
281 my $file_name = shift ;
283 # get the optional argument hash ref from @_ or an empty hash ref.
285 my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
287 my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
289 # get the buffer ref - it depends on how the data is passed into write_file
290 # after this if/else $buf_ref will have a scalar ref to the data.
292 if ( ref $args->{'buf_ref'} eq 'SCALAR' ) {
294 # a scalar ref passed in %args has the data
295 # note that the data was passed by ref
297 $buf_ref = $args->{'buf_ref'} ;
300 elsif ( ref $_[0] eq 'SCALAR' ) {
302 # the first value in @_ is the scalar ref to the data
303 # note that the data was passed by ref
308 elsif ( ref $_[0] eq 'ARRAY' ) {
310 # the first value in @_ is the array ref to the data so join it.
312 ${$buf_ref} = join '', @{$_[0]} ;
316 # good old @_ has all the data so join it.
318 ${$buf_ref} = join '', @_ ;
321 # see if we were passed a open handle to spew to.
323 if ( ref $file_name ) {
325 # we have a handle. make sure we don't call truncate on it.
327 $write_fh = $file_name ;
332 # spew to regular file.
334 if ( $args->{'atomic'} ) {
336 # in atomic mode, we spew to a temp file so make one and save the original
338 $orig_file_name = $file_name ;
339 $file_name .= ".$$" ;
342 # set the mode for the sysopen
344 my $mode = O_WRONLY | O_CREAT ;
345 $mode |= O_BINARY if $args->{'binmode'} ;
346 $mode |= O_APPEND if $args->{'append'} ;
347 $mode |= O_EXCL if $args->{'no_clobber'} ;
349 #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
351 # open the file and handle any error.
354 unless ( sysopen( $write_fh, $file_name, $mode ) ) {
355 @_ = ( $args, "write_file '$file_name' - sysopen: $!");
360 sysseek( $write_fh, 0, SEEK_END ) if $args->{'append'} ;
363 #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
365 # fix up newline to write cr/lf if this is a windows text file
367 if ( $is_win32 && !$args->{'binmode'} ) {
369 # copy the write data if it was passed by ref so we don't clobber the
371 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
372 ${$buf_ref} =~ s/\n/\015\012/g ;
375 #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
377 # get the size of how much we are writing and init the offset into that buffer
379 my $size_left = length( ${$buf_ref} ) ;
382 # loop until we have no more data left to write
386 # do the write and track how much we just wrote
388 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
389 $size_left, $offset ) ;
391 unless ( defined $write_cnt ) {
394 @_ = ( $args, "write_file '$file_name' - syswrite: $!");
398 # track much left to write and where to write from in the buffer
400 $size_left -= $write_cnt ;
401 $offset += $write_cnt ;
403 } while( $size_left > 0 ) ;
405 # we truncate regular files in case we overwrite a long file with a shorter file
406 # so seek to the current position to get it (same as tell()).
409 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
413 # handle the atomic mode - move the temp file to the original filename.
415 if ( $args->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
418 @_ = ( $args, "write_file '$file_name' - rename: $!" ) ;
425 # this is for backwards compatibility with the previous File::Slurp module.
426 # write_file always overwrites an existing file
428 *overwrite_file = \&write_file ;
430 # the current write_file has an append mode so we use that. this
431 # supports the same API with an optional second argument which is a
432 # hash ref of options.
436 # get the optional args hash ref
438 if ( ref $args eq 'HASH' ) {
440 # we were passed an args ref so just mark the append mode
442 $args->{append} = 1 ;
446 # no args hash so insert one with the append mode
448 splice( @_, 1, 0, { append => 1 } ) ;
451 # magic goto the main write_file sub. this overlays the sub without touching
457 # basic wrapper around opendir/readdir
461 my ($dir, %args ) = @_;
463 # this handle will be destroyed upon return
467 # open the dir and handle any errors
469 unless ( opendir( DIRH, $dir ) ) {
471 @_ = ( \%args, "read_dir '$dir' - opendir: $!" ) ;
475 my @dir_entries = readdir(DIRH) ;
477 @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
478 unless $args{'keep_dot_dot'} ;
480 return @dir_entries if wantarray ;
481 return \@dir_entries ;
484 # error handling section
486 # all the error handling uses magic goto so the caller will get the
487 # error message as if from their code and not this module. if we just
488 # did a call on the error code, the carp/croak would report it from
489 # this module since the error sub is one level down on the call stack
490 # from read_file/write_file/read_dir.
500 my( $args, $err_msg ) = @_ ;
502 # get the error function to use
504 my $func = $err_func{ $args->{'err_mode'} || 'croak' } ;
506 # if we didn't find it in our error function hash, they must have set
507 # it to quiet and we don't do anything.
509 return unless $func ;
511 # call the carp/croak function
515 # return a hard undef (in list context this will be a single value of
516 # undef which is not a legal in-band value)
526 File::Slurp - Efficient Reading/Writing of Complete Files
532 my $text = read_file( 'filename' ) ;
533 my @lines = read_file( 'filename' ) ;
535 write_file( 'filename', @lines ) ;
537 use File::Slurp qw( slurp ) ;
539 my $text = slurp( 'filename' ) ;
544 This module provides subs that allow you to read or write entire files
545 with one simple call. They are designed to be simple to use, have
546 flexible ways to pass in or get the file contents and to be very
547 efficient. There is also a sub to read in all the files in a
548 directory other than C<.> and C<..>
550 These slurp/spew subs work for files, pipes and
551 sockets, and stdio, pseudo-files, and DATA.
555 This sub reads in an entire file and returns its contents to the
556 caller. In list context it will return a list of lines (using the
557 current value of $/ as the separator including support for paragraph
558 mode when it is set to ''). In scalar context it returns the entire
559 file as a single scalar.
561 my $text = read_file( 'filename' ) ;
562 my @lines = read_file( 'filename' ) ;
564 The first argument to C<read_file> is the filename and the rest of the
565 arguments are key/value pairs which are optional and which modify the
566 behavior of the call. Other than binmode the options all control how
567 the slurped file is returned to the caller.
569 If the first argument is a file handle reference or I/O object (if ref
570 is true), then that handle is slurped in. This mode is supported so
571 you slurp handles such as C<DATA>, C<STDIN>. See the test handle.t
572 for an example that does C<open( '-|' )> and child process spews data
573 to the parant which slurps it in. All of the options that control how
574 the data is returned to the caller still work in this case.
576 NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
577 handle. It used to need a sysseek workaround but that is now handled
578 when needed by the module itself.
580 You can optionally request that C<slurp()> is exported to your code. This
581 is an alias for read_file and is meant to be forward compatible with
582 Perl 6 (which will have slurp() built-in).
588 If you set the binmode option, then the file will be slurped in binary
591 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
593 NOTE: this actually sets the O_BINARY mode flag for sysopen. It
594 probably should call binmode and pass its argument to support other
599 If this boolean option is set, the return value (only in scalar
600 context) will be an array reference which contains the lines of the
601 slurped file. The following two calls are equivalent:
603 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
604 my $lines_ref = [ read_file( $bin_file ) ] ;
608 If this boolean option is set, the return value (only in scalar
609 context) will be an scalar reference to a string which is the contents
610 of the slurped file. This will usually be faster than returning the
613 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
617 You can use this option to pass in a scalar reference and the slurped
618 file contents will be stored in the scalar. This can be used in
619 conjunction with any of the other options.
621 my $text_ref = read_file( $bin_file, buf_ref => \$buffer,
623 my @lines = read_file( $bin_file, buf_ref => \$buffer ) ;
627 You can use this option to set the block size used when slurping from an already open handle (like \*STDIN). It defaults to 1MB.
629 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
634 You can use this option to control how read_file behaves when an error
635 occurs. This option defaults to 'croak'. You can set it to 'carp' or
636 to 'quiet to have no error handling. This code wants to carp and then
637 read abother file if it fails.
639 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
640 unless ( $text_ref ) {
642 # read a different file but croak if not found
643 $text_ref = read_file( $another_file ) ;
646 # process ${$text_ref}
650 This sub writes out an entire file in one call.
652 write_file( 'filename', @data ) ;
654 The first argument to C<write_file> is the filename. The next argument
655 is an optional hash reference and it contains key/values that can
656 modify the behavior of C<write_file>. The rest of the argument list is
657 the data to be written to the file.
659 write_file( 'filename', {append => 1 }, @data ) ;
660 write_file( 'filename', {binmode => ':raw' }, $buffer ) ;
662 As a shortcut if the first data argument is a scalar or array
663 reference, it is used as the only data to be written to the file. Any
664 following arguments in @_ are ignored. This is a faster way to pass in
665 the output to be written to the file and is equivilent to the
666 C<buf_ref> option. These following pairs are equivilent but the pass
667 by reference call will be faster in most cases (especially with larger
670 write_file( 'filename', \$buffer ) ;
671 write_file( 'filename', $buffer ) ;
673 write_file( 'filename', \@lines ) ;
674 write_file( 'filename', @lines ) ;
676 If the first argument is a file handle reference or I/O object (if ref
677 is true), then that handle is slurped in. This mode is supported so
678 you spew to handles such as \*STDOUT. See the test handle.t for an
679 example that does C<open( '-|' )> and child process spews data to the
680 parant which slurps it in. All of the options that control how the
681 data is passes into C<write_file> still work in this case.
683 C<write_file> returns 1 upon successfully writing the file or undef if
684 it encountered an error.
690 If you set the binmode option, then the file will be written in binary
693 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
695 NOTE: this actually sets the O_BINARY mode flag for sysopen. It
696 probably should call binmode and pass its argument to support other
701 You can use this option to pass in a scalar reference which has the
702 data to be written. If this is set then any data arguments (including
703 the scalar reference shortcut) in @_ will be ignored. These are
706 write_file( $bin_file, { buf_ref => \$buffer } ) ;
707 write_file( $bin_file, \$buffer ) ;
708 write_file( $bin_file, $buffer ) ;
712 If you set this boolean option, the file will be written to in an
713 atomic fashion. A temporary file name is created by appending the pid
714 ($$) to the file name argument and that file is spewed to. After the
715 file is closed it is renamed to the original file name (and rename is
716 an atomic operation on most OS's). If the program using this were to
717 crash in the middle of this, then the file with the pid suffix could
722 If you set this boolean option, the data will be written at the end of
725 write_file( $file, {append => 1}, @data ) ;
727 C<write_file> croaks if it cannot open the file. It returns true if it
728 succeeded in writing out the file and undef if there was an
729 error. (Yes, I know if it croaks it can't return anything but that is
730 for when I add the options to select the error handling mode).
734 If you set this boolean option, an existing file will not be overwritten.
736 write_file( $file, {no_clobber => 1}, @data ) ;
740 You can use this option to control how C<write_file> behaves when an
741 error occurs. This option defaults to 'croak'. You can set it to
742 'carp' or to 'quiet' to have no error handling other than the return
743 value. If the first call to C<write_file> fails it will carp and then
744 write to another file. If the second call to C<write_file> fails, it
747 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
749 # write a different file but croak if not found
750 write_file( $other_file, \$data ) ;
753 =head2 overwrite_file
755 This sub is just a typeglob alias to write_file since write_file
756 always overwrites an existing file. This sub is supported for
757 backwards compatibility with the original version of this module. See
758 write_file for its API and behavior.
762 This sub will write its data to the end of the file. It is a wrapper
763 around write_file and it has the same API so see that for the full
764 documentation. These calls are equivilent:
766 append_file( $file, @data ) ;
767 write_file( $file, {append => 1}, @data ) ;
771 This sub reads all the file names from directory and returns them to
772 the caller but C<.> and C<..> are removed by default.
774 my @files = read_dir( '/path/to/dir' ) ;
776 It croaks if it cannot open the directory.
778 In a list context C<read_dir> returns a list of the entries in the
779 directory. In a scalar context it returns an array reference which has
784 If this boolean option is set, C<.> and C<..> are not removed from the
787 my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
791 read_file write_file overwrite_file append_file read_dir
795 An article on file slurping in extras/slurp_article.pod. There is
796 also a benchmarking script in extras/slurp_bench.pl.
800 If run under Perl 5.004, slurping from the DATA handle will fail as
801 that requires B.pm which didn't get into core until 5.005.
805 Uri Guttman, E<lt>uri@stemsystems.comE<gt>