changed version number
[urisagit/Perl-Docs.git] / lib / File / Slurp.pm
CommitLineData
635c7876 1package File::Slurp;
2
e2c51d31 3my $printed ;
4
635c7876 5use strict;
6
7use Carp ;
b3b7ff4e 8use Exporter ;
635c7876 9use Fcntl qw( :DEFAULT ) ;
e2c51d31 10use POSIX qw( :fcntl_h ) ;
635c7876 11use Symbol ;
b3b7ff4e 12use UNIVERSAL ;
635c7876 13
b3b7ff4e 14use vars qw( @ISA %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ;
15@ISA = qw( Exporter ) ;
e2c51d31 16
17%EXPORT_TAGS = ( 'all' => [
18 qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
19
20@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
b3b7ff4e 21@EXPORT_OK = qw( slurp prepend_file ) ;
e2c51d31 22
5aaacad5 23$VERSION = '9999.16';
e2c51d31 24
b3b7ff4e 25my $max_fast_slurp_size = 1024 * 100 ;
9aab46ab 26
635c7876 27my $is_win32 = $^O =~ /win32/i ;
28
29# Install subs for various constants that aren't set in older perls
30# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
31# () prototype These can't be overridden with the constant pragma or
32# we get a prototype mismatch. Hence this less than aesthetically
33# appealing BEGIN block:
34
35BEGIN {
8ed110f9 36 unless( defined &SEEK_SET ) {
635c7876 37 *SEEK_SET = sub { 0 };
38 *SEEK_CUR = sub { 1 };
39 *SEEK_END = sub { 2 };
40 }
41
8ed110f9 42 unless( defined &O_BINARY ) {
635c7876 43 *O_BINARY = sub { 0 };
44 *O_RDONLY = sub { 0 };
45 *O_WRONLY = sub { 1 };
46 }
47
f02156f2 48 unless ( defined &O_APPEND ) {
635c7876 49
50 if ( $^O =~ /olaris/ ) {
51 *O_APPEND = sub { 8 };
52 *O_CREAT = sub { 256 };
53 *O_EXCL = sub { 1024 };
54 }
55 elsif ( $^O =~ /inux/ ) {
56 *O_APPEND = sub { 1024 };
57 *O_CREAT = sub { 64 };
58 *O_EXCL = sub { 128 };
59 }
60 elsif ( $^O =~ /BSD/i ) {
61 *O_APPEND = sub { 8 };
62 *O_CREAT = sub { 512 };
63 *O_EXCL = sub { 2048 };
64 }
65 }
66}
67
68# print "OS [$^O]\n" ;
69
70# print "O_BINARY = ", O_BINARY(), "\n" ;
71# print "O_RDONLY = ", O_RDONLY(), "\n" ;
72# print "O_WRONLY = ", O_WRONLY(), "\n" ;
73# print "O_APPEND = ", O_APPEND(), "\n" ;
74# print "O_CREAT ", O_CREAT(), "\n" ;
75# print "O_EXCL ", O_EXCL(), "\n" ;
76
635c7876 77
78*slurp = \&read_file ;
79
80sub read_file {
81
b3b7ff4e 82 my $file_name = shift ;
83 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 84
8ed110f9 85 if ( !ref $file_name && 0 &&
b3b7ff4e 86 -e $file_name && -s _ < $max_fast_slurp_size && ! %{$opts} && !wantarray ) {
e2c51d31 87
88 local( *FH ) ;
89
e2c51d31 90 unless( open( FH, $file_name ) ) {
91
b3b7ff4e 92 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
e2c51d31 93 goto &_error ;
94 }
95
e2c51d31 96 my $read_cnt = sysread( FH, my $buf, -s _ ) ;
97
98 unless ( defined $read_cnt ) {
99
100# handle the read error
101
b3b7ff4e 102 @_ = ( $opts,
8ed110f9 103 "read_file '$file_name' - small sysread: $!");
e2c51d31 104 goto &_error ;
105 }
106
107 return $buf ;
108 }
109
635c7876 110# set the buffer to either the passed in one or ours and init it to the null
111# string
112
113 my $buf ;
b3b7ff4e 114 my $buf_ref = $opts->{'buf_ref'} || \$buf ;
635c7876 115 ${$buf_ref} = '' ;
116
117 my( $read_fh, $size_left, $blk_size ) ;
118
b3b7ff4e 119# deal with ref for a file name
120# it could be an open handle or an overloaded object
635c7876 121
122 if ( ref $file_name ) {
123
b3b7ff4e 124 my $ref_result = _check_ref( $file_name ) ;
635c7876 125
b3b7ff4e 126 if ( ref $ref_result ) {
635c7876 127
b3b7ff4e 128# we got an error, deal with it
635c7876 129
b3b7ff4e 130 @_ = ( $opts, $ref_result ) ;
635c7876 131 goto &_error ;
132 }
133
b3b7ff4e 134 if ( $ref_result ) {
635c7876 135
b3b7ff4e 136# we got an overloaded object and the result is the stringified value
137# use it as the file name
138
139 $file_name = $ref_result ;
140 }
141 else {
142
143# here we have just an open handle. set $read_fh so we don't do a sysopen
635c7876 144
b3b7ff4e 145 $read_fh = $file_name ;
146 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
147 $size_left = $blk_size ;
635c7876 148 }
149 }
b3b7ff4e 150
151# see if we have a path we need to open
152
153 unless ( $read_fh ) {
635c7876 154
155# a regular file. set the sysopen mode
156
157 my $mode = O_RDONLY ;
635c7876 158
159#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
160
161# open the file and handle any error
162
163 $read_fh = gensym ;
164 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
b3b7ff4e 165 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
635c7876 166 goto &_error ;
167 }
168
b3b7ff4e 169 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 170 binmode( $read_fh, $binmode ) ;
171 }
172
635c7876 173# get the size of the file for use in the read loop
174
175 $size_left = -s $read_fh ;
176
f9940db7 177#print "SIZE $size_left\n" ;
8ed110f9 178
635c7876 179
f9940db7 180# we need a blk_size if the size is 0 so we can handle pseudofiles like in
181# /proc. these show as 0 size but have data to be slurped.
182
183 unless( $size_left ) {
184
b3b7ff4e 185 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
f9940db7 186 $size_left = $blk_size ;
187 }
e2c51d31 188 }
189
190
b3b7ff4e 191# if ( $size_left < 10000 && keys %{$opts} == 0 && !wantarray ) {
e2c51d31 192
8ed110f9 193# #print "OPT\n" and $printed++ unless $printed ;
e2c51d31 194
8ed110f9 195# my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
e2c51d31 196
8ed110f9 197# unless ( defined $read_cnt ) {
e2c51d31 198
8ed110f9 199# # handle the read error
e2c51d31 200
b3b7ff4e 201# @_ = ( $opts, "read_file '$file_name' - small2 sysread: $!");
8ed110f9 202# goto &_error ;
203# }
e2c51d31 204
8ed110f9 205# return $buf ;
206# }
635c7876 207
208# infinite read loop. we exit when we are done slurping
209
210 while( 1 ) {
211
212# do the read and see how much we got
213
214 my $read_cnt = sysread( $read_fh, ${$buf_ref},
215 $size_left, length ${$buf_ref} ) ;
216
e2c51d31 217 unless ( defined $read_cnt ) {
218
219# handle the read error
220
b3b7ff4e 221 @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
e2c51d31 222 goto &_error ;
223 }
635c7876 224
225# good read. see if we hit EOF (nothing left to read)
226
e2c51d31 227 last if $read_cnt == 0 ;
635c7876 228
229# loop if we are slurping a handle. we don't track $size_left then.
230
e2c51d31 231 next if $blk_size ;
635c7876 232
233# count down how much we read and loop if we have more to read.
635c7876 234
e2c51d31 235 $size_left -= $read_cnt ;
236 last if $size_left <= 0 ;
635c7876 237 }
238
239# fix up cr/lf to be a newline if this is a windows text file
240
b3b7ff4e 241 ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
635c7876 242
243# this is the 5 returns in a row. each handles one possible
244# combination of caller context and requested return type
245
246 my $sep = $/ ;
247 $sep = '\n\n+' if defined $sep && $sep eq '' ;
248
b3b7ff4e 249# see if caller wants lines
250
251 if( wantarray || $opts->{'array_ref'} ) {
635c7876 252
b3b7ff4e 253 my @parts = split m/($sep)/, ${$buf_ref}, -1;
635c7876 254
b3b7ff4e 255 my @lines ;
635c7876 256
b3b7ff4e 257 while( @parts > 2 ) {
258
259 my( $line, $sep ) = splice( @parts, 0, 2 ) ;
260 push @lines, "$line$sep" ;
261 }
262
263 push @lines, shift @parts if @parts && length $parts[0] ;
264
265 return \@lines if $opts->{'array_ref'} ;
266 return @lines ;
267 }
635c7876 268
269# caller wants a scalar ref to the slurped text
270
b3b7ff4e 271 return $buf_ref if $opts->{'scalar_ref'} ;
635c7876 272
273# caller wants a scalar with the slurped text (normal scalar context)
274
275 return ${$buf_ref} if defined wantarray ;
276
277# caller passed in an i/o buffer by reference (normal void context)
278
279 return ;
b3b7ff4e 280
281
282# # caller wants to get an array ref of lines
283
284# # this split doesn't work since it tries to use variable length lookbehind
285# # the m// line works.
286# # return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $opts->{'array_ref'} ;
287# return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ]
288# if $opts->{'array_ref'} ;
289
290# # caller wants a list of lines (normal list context)
291
292# # same problem with this split as before.
293# # return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ;
294# return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : ()
295# if wantarray ;
296
297# # caller wants a scalar ref to the slurped text
298
299# return $buf_ref if $opts->{'scalar_ref'} ;
300
301# # caller wants a scalar with the slurped text (normal scalar context)
302
303# return ${$buf_ref} if defined wantarray ;
304
305# # caller passed in an i/o buffer by reference (normal void context)
306
307# return ;
635c7876 308}
309
b3b7ff4e 310
311# errors in this sub are returned as scalar refs
312# a normal IO/GLOB handle is an empty return
313# an overloaded object returns its stringified as a scalarfilename
314
315sub _check_ref {
316
317 my( $handle ) = @_ ;
318
319# check if we are reading from a handle (GLOB or IO object)
320
321 if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
322
323# we have a handle. deal with seeking to it if it is DATA
324
325 my $err = _seek_data_handle( $handle ) ;
326
327# return the error string if any
328
329 return \$err if $err ;
330
331# we have good handle
332 return ;
333 }
334
335 eval { require overload } ;
336
337# return an error if we can't load the overload pragma
338# or if the object isn't overloaded
339
340 return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
341 if $@ || !overload::Overloaded( $handle ) ;
342
343# must be overloaded so return its stringified value
344
345 return "$handle" ;
346}
347
348sub _seek_data_handle {
349
350 my( $handle ) = @_ ;
351
352# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
353# glob/handle. only the DATA handle is untainted (since it is from
354# trusted data in the source file). this allows us to test if this is
355# the DATA handle and then to do a sysseek to make sure it gets
356# slurped correctly. on some systems, the buffered i/o pointer is not
357# left at the same place as the fd pointer. this sysseek makes them
358# the same so slurping with sysread will work.
359
360 eval{ require B } ;
361
362 if ( $@ ) {
363
364 return <<ERR ;
365Can't find B.pm with this Perl: $!.
366That module is needed to properly slurp the DATA handle.
367ERR
368 }
369
370 if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
371
372# set the seek position to the current tell.
373
374 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
375 return "read_file '$handle' - sysseek: $!" ;
376 }
377 }
378
379# seek was successful, return no error string
380
381 return ;
382}
383
384
635c7876 385sub write_file {
386
387 my $file_name = shift ;
388
389# get the optional argument hash ref from @_ or an empty hash ref.
390
b3b7ff4e 391 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
635c7876 392
393 my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
394
395# get the buffer ref - it depends on how the data is passed into write_file
396# after this if/else $buf_ref will have a scalar ref to the data.
397
b3b7ff4e 398 if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
635c7876 399
b3b7ff4e 400# a scalar ref passed in %opts has the data
635c7876 401# note that the data was passed by ref
402
b3b7ff4e 403 $buf_ref = $opts->{'buf_ref'} ;
635c7876 404 $data_is_ref = 1 ;
405 }
406 elsif ( ref $_[0] eq 'SCALAR' ) {
407
408# the first value in @_ is the scalar ref to the data
409# note that the data was passed by ref
410
411 $buf_ref = shift ;
412 $data_is_ref = 1 ;
413 }
414 elsif ( ref $_[0] eq 'ARRAY' ) {
415
416# the first value in @_ is the array ref to the data so join it.
417
418 ${$buf_ref} = join '', @{$_[0]} ;
419 }
420 else {
421
422# good old @_ has all the data so join it.
423
424 ${$buf_ref} = join '', @_ ;
425 }
426
b3b7ff4e 427# deal with ref for a file name
635c7876 428
429 if ( ref $file_name ) {
430
b3b7ff4e 431 my $ref_result = _check_ref( $file_name ) ;
432
433 if ( ref $ref_result ) {
635c7876 434
b3b7ff4e 435# we got an error, deal with it
436
437 @_ = ( $opts, $ref_result ) ;
438 goto &_error ;
439 }
440
441 if ( $ref_result ) {
442
443# we got an overloaded object and the result is the stringified value
444# use it as the file name
445
446 $file_name = $ref_result ;
447 }
448 else {
449
450# we now have a proper handle ref.
451# make sure we don't call truncate on it.
452
453 $write_fh = $file_name ;
454 $no_truncate = 1 ;
455 }
635c7876 456 }
b3b7ff4e 457
458# see if we have a path we need to open
459
460 unless( $write_fh ) {
635c7876 461
462# spew to regular file.
463
b3b7ff4e 464 if ( $opts->{'atomic'} ) {
635c7876 465
466# in atomic mode, we spew to a temp file so make one and save the original
467# file name.
468 $orig_file_name = $file_name ;
469 $file_name .= ".$$" ;
470 }
471
472# set the mode for the sysopen
473
474 my $mode = O_WRONLY | O_CREAT ;
b3b7ff4e 475 $mode |= O_APPEND if $opts->{'append'} ;
476 $mode |= O_EXCL if $opts->{'no_clobber'} ;
635c7876 477
b3b7ff4e 478 my $perms = $opts->{perms} ;
f02156f2 479 $perms = 0666 unless defined $perms ;
480
635c7876 481#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
482
483# open the file and handle any error.
484
485 $write_fh = gensym ;
f02156f2 486 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
b3b7ff4e 487
488 @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
635c7876 489 goto &_error ;
490 }
491 }
492
b3b7ff4e 493 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 494 binmode( $write_fh, $binmode ) ;
495 }
496
b3b7ff4e 497 sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
635c7876 498
499
500#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
501
502# fix up newline to write cr/lf if this is a windows text file
503
b3b7ff4e 504 if ( $is_win32 && !$opts->{'binmode'} ) {
635c7876 505
506# copy the write data if it was passed by ref so we don't clobber the
507# caller's data
508 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
509 ${$buf_ref} =~ s/\n/\015\012/g ;
510 }
511
512#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
513
514# get the size of how much we are writing and init the offset into that buffer
515
516 my $size_left = length( ${$buf_ref} ) ;
517 my $offset = 0 ;
518
519# loop until we have no more data left to write
520
521 do {
522
523# do the write and track how much we just wrote
524
525 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
526 $size_left, $offset ) ;
527
528 unless ( defined $write_cnt ) {
529
530# the write failed
b3b7ff4e 531 @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
635c7876 532 goto &_error ;
533 }
534
535# track much left to write and where to write from in the buffer
536
537 $size_left -= $write_cnt ;
538 $offset += $write_cnt ;
539
540 } while( $size_left > 0 ) ;
541
542# we truncate regular files in case we overwrite a long file with a shorter file
543# so seek to the current position to get it (same as tell()).
544
545 truncate( $write_fh,
546 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
547
548 close( $write_fh ) ;
549
550# handle the atomic mode - move the temp file to the original filename.
551
b3b7ff4e 552 if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
e2c51d31 553
b3b7ff4e 554 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
e2c51d31 555 goto &_error ;
556 }
635c7876 557
558 return 1 ;
559}
560
561# this is for backwards compatibility with the previous File::Slurp module.
562# write_file always overwrites an existing file
563
564*overwrite_file = \&write_file ;
565
566# the current write_file has an append mode so we use that. this
567# supports the same API with an optional second argument which is a
568# hash ref of options.
569
570sub append_file {
571
b3b7ff4e 572# get the optional opts hash ref
573 my $opts = $_[1] ;
574 if ( ref $opts eq 'HASH' ) {
635c7876 575
b3b7ff4e 576# we were passed an opts ref so just mark the append mode
635c7876 577
b3b7ff4e 578 $opts->{append} = 1 ;
635c7876 579 }
580 else {
581
b3b7ff4e 582# no opts hash so insert one with the append mode
635c7876 583
584 splice( @_, 1, 0, { append => 1 } ) ;
585 }
586
587# magic goto the main write_file sub. this overlays the sub without touching
588# the stack or @_
589
590 goto &write_file
591}
592
593# basic wrapper around opendir/readdir
594
b3b7ff4e 595# prepend data to the beginning of a file
596
597sub prepend_file {
598
599 my $file_name = shift ;
600
601#print "FILE $file_name\n" ;
602
603 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
604
605# delete unsupported options
606
607 my @bad_opts =
608 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
609
610 delete @{$opts}{@bad_opts} ;
611
612 my $prepend_data = shift ;
613 $prepend_data = '' unless defined $prepend_data ;
614 $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
615
616#print "PRE [$prepend_data]\n" ;
617
618
619###### set croak as error_mode
620###### wrap in eval
621
622 my $err_mode = delete $opts->{err_mode} ;
623 $opts->{ err_mode } = 'croak' ;
624 $opts->{ scalar_ref } = 1 ;
625
626 my $existing_data ;
627 eval { $existing_data = read_file( $file_name, $opts ) } ;
628
629 if ( $@ ) {
630
631 @_ = ( { err_mode => $err_mode },
632 "prepend_file '$file_name' - read_file: $!" ) ;
633 goto &_error ;
634 }
635
636#print "EXIST [$$existing_data]\n" ;
637
638 $opts->{ atomic } = 1 ;
639
640 my $write_result = eval {
641 write_file( $file_name, $opts,
642 $prepend_data, $$existing_data ) ;
643 } ;
644
645 if ( $@ ) {
646
647 @_ = ( { err_mode => $err_mode },
648 "prepend_file '$file_name' - write_file: $!" ) ;
649 goto &_error ;
650 }
651
652 return $write_result ;
653}
654
635c7876 655sub read_dir {
656
b3b7ff4e 657 my $dir = shift ;
658 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 659
660# this handle will be destroyed upon return
661
662 local(*DIRH);
663
664# open the dir and handle any errors
665
666 unless ( opendir( DIRH, $dir ) ) {
667
b3b7ff4e 668 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
635c7876 669 goto &_error ;
670 }
671
672 my @dir_entries = readdir(DIRH) ;
673
674 @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
b3b7ff4e 675 unless $opts->{'keep_dot_dot'} ;
635c7876 676
677 return @dir_entries if wantarray ;
678 return \@dir_entries ;
679}
680
681# error handling section
682#
683# all the error handling uses magic goto so the caller will get the
684# error message as if from their code and not this module. if we just
685# did a call on the error code, the carp/croak would report it from
686# this module since the error sub is one level down on the call stack
687# from read_file/write_file/read_dir.
688
689
690my %err_func = (
691 'carp' => \&carp,
692 'croak' => \&croak,
693) ;
694
695sub _error {
696
b3b7ff4e 697 my( $opts, $err_msg ) = @_ ;
635c7876 698
699# get the error function to use
700
b3b7ff4e 701 my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
635c7876 702
703# if we didn't find it in our error function hash, they must have set
704# it to quiet and we don't do anything.
705
706 return unless $func ;
707
708# call the carp/croak function
709
f02156f2 710 $func->($err_msg) if $func ;
635c7876 711
712# return a hard undef (in list context this will be a single value of
713# undef which is not a legal in-band value)
714
715 return undef ;
716}
717
7181;
719__END__
720
721=head1 NAME
722
b3b7ff4e 723File::Slurp - Simple and Efficient Reading/Writing of Complete Files
635c7876 724
725=head1 SYNOPSIS
726
727 use File::Slurp;
728
b3b7ff4e 729# read in a whole file into a scalar
730
635c7876 731 my $text = read_file( 'filename' ) ;
b3b7ff4e 732
733# read in a whole file into an array of lines
734
635c7876 735 my @lines = read_file( 'filename' ) ;
736
b3b7ff4e 737# write out a whole file from a scalar
738
739 write_file( 'filename', $text ) ;
740
741# write out a whole file from an array of lines
742
635c7876 743 write_file( 'filename', @lines ) ;
744
b3b7ff4e 745# Here is a simple and fast way to load and save a simple config file
746# made of key=value lines.
747
748 my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
749 write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
635c7876 750
b3b7ff4e 751# read in a whole directory of file names (skipping . and ..)
635c7876 752
b3b7ff4e 753 my @files = read_dir( '/path/to/dir' ) ;
635c7876 754
755=head1 DESCRIPTION
756
757This module provides subs that allow you to read or write entire files
758with one simple call. They are designed to be simple to use, have
759flexible ways to pass in or get the file contents and to be very
760efficient. There is also a sub to read in all the files in a
761directory other than C<.> and C<..>
762
b3b7ff4e 763These slurp/spew subs work for files, pipes and sockets, stdio,
764pseudo-files, and the DATA handle. Read more about why slurping files is
765a good thing in the file 'slurp_article.pod' in the extras/ directory.
766
767If you are interested in how fast these calls work, check out the
768slurp_bench.pl program in the extras/ directory. It compares many
769different forms of slurping. You can select the I/O direction, context
770and file sizes. Use the --help option to see how to run it.
635c7876 771
772=head2 B<read_file>
773
774This sub reads in an entire file and returns its contents to the
b3b7ff4e 775caller. In scalar context it returns the entire file as a single
776scalar. In list context it will return a list of lines (using the
635c7876 777current value of $/ as the separator including support for paragraph
b3b7ff4e 778mode when it is set to '').
635c7876 779
780 my $text = read_file( 'filename' ) ;
b3b7ff4e 781 my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
635c7876 782 my @lines = read_file( 'filename' ) ;
b3b7ff4e 783 my $lines = read_file( 'filename', array_ref => 1 ) ;
635c7876 784
b3b7ff4e 785The first argument is the file to slurp in. If the next argument is a
786hash reference, then it is used as the options. Otherwise the rest of
787the argument list are is used as key/value options.
635c7876 788
b3b7ff4e 789If the file argument is a handle (if it is a ref and is an IO or GLOB
790object), then that handle is slurped in. This mode is supported so you
791slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
792an example that does C<open( '-|' )> and the child process spews data
635c7876 793to the parant which slurps it in. All of the options that control how
794the data is returned to the caller still work in this case.
795
b3b7ff4e 796If the first argument is an overloaded object then its stringified value
797is used for the filename and that file is opened. This is a new feature
798in 9999.14. See the stringify.t test for an example.
799
800By default C<read_file> returns an undef in scalar contex or a single
801undef in list context if it encounters an error. Those are both
802impossible to get with a clean read_file call which means you can check
803the return value and always know if you had an error. You can change how
804errors are handled with the C<err_mode> option.
805
635c7876 806NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
807handle. It used to need a sysseek workaround but that is now handled
808when needed by the module itself.
809
810You can optionally request that C<slurp()> is exported to your code. This
811is an alias for read_file and is meant to be forward compatible with
812Perl 6 (which will have slurp() built-in).
813
b3b7ff4e 814The options for C<read_file> are:
635c7876 815
816=head3 binmode
817
b3b7ff4e 818If you set the binmode option, then its value is passed to a call to
819binmode on the opened handle. You can use this to set the file to be
820read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 821
822 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
9aab46ab 823 my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
635c7876 824
825=head3 array_ref
826
827If this boolean option is set, the return value (only in scalar
828context) will be an array reference which contains the lines of the
829slurped file. The following two calls are equivalent:
830
831 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
832 my $lines_ref = [ read_file( $bin_file ) ] ;
833
834=head3 scalar_ref
835
f02156f2 836If this boolean option is set, the return value (only in scalar context)
837will be an scalar reference to a string which is the contents of the
838slurped file. This will usually be faster than returning the plain
839scalar. It will also save memory as it will not make a copy of the file
840to return.
635c7876 841
842 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
843
844=head3 buf_ref
845
846You can use this option to pass in a scalar reference and the slurped
847file contents will be stored in the scalar. This can be used in
f02156f2 848conjunction with any of the other options. This saves an extra copy of
849the slurped file and can lower ram usage vs returning the file.
635c7876 850
b3b7ff4e 851 read_file( $bin_file, buf_ref => \$buffer ) ;
635c7876 852
853=head3 blk_size
854
b3b7ff4e 855You can use this option to set the block size used when slurping from
856an already open handle (like \*STDIN). It defaults to 1MB.
635c7876 857
858 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
859 array_ref => 1 ) ;
860
861=head3 err_mode
862
863You can use this option to control how read_file behaves when an error
b3b7ff4e 864occurs. This option defaults to 'croak'. You can set it to 'carp' or to
865'quiet to have no special error handling. This code wants to carp and
866then read another file if it fails.
635c7876 867
868 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
869 unless ( $text_ref ) {
870
871 # read a different file but croak if not found
872 $text_ref = read_file( $another_file ) ;
873 }
874
875 # process ${$text_ref}
876
877=head2 B<write_file>
878
879This sub writes out an entire file in one call.
880
881 write_file( 'filename', @data ) ;
882
883The first argument to C<write_file> is the filename. The next argument
884is an optional hash reference and it contains key/values that can
885modify the behavior of C<write_file>. The rest of the argument list is
886the data to be written to the file.
887
888 write_file( 'filename', {append => 1 }, @data ) ;
b3b7ff4e 889 write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
890
891As a shortcut if the first data argument is a scalar or array reference,
892it is used as the only data to be written to the file. Any following
893arguments in @_ are ignored. This is a faster way to pass in the output
894to be written to the file and is equivalent to the C<buf_ref> option of
895C<read_file>. These following pairs are equivalent but the pass by
896reference call will be faster in most cases (especially with larger
635c7876 897files).
898
899 write_file( 'filename', \$buffer ) ;
900 write_file( 'filename', $buffer ) ;
901
902 write_file( 'filename', \@lines ) ;
903 write_file( 'filename', @lines ) ;
904
b3b7ff4e 905If the first argument is a handle (if it is a ref and is an IO or GLOB
906object), then that handle is written to. This mode is supported so you
907spew to handles such as \*STDOUT. See the test handle.t for an example
908that does C<open( '-|' )> and child process spews data to the parent
909which slurps it in. All of the options that control how the data are
910passed into C<write_file> still work in this case.
911
912If the first argument is an overloaded object then its stringified value
913is used for the filename and that file is opened. This is new feature
914in 9999.14. See the stringify.t test for an example.
635c7876 915
b3b7ff4e 916By default C<write_file> returns 1 upon successfully writing the file or
917undef if it encountered an error. You can change how errors are handled
918with the C<err_mode> option.
635c7876 919
920The options are:
921
922=head3 binmode
923
b3b7ff4e 924If you set the binmode option, then its value is passed to a call to
925binmode on the opened handle. You can use this to set the file to be
926read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 927
928 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
b3b7ff4e 929 write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
930
931=head3 perms
635c7876 932
b3b7ff4e 933The perms option sets the permissions of newly-created files. This value
934is modified by your process's umask and defaults to 0666 (same as
935sysopen).
936
937NOTE: this option is new as of File::Slurp version 9999.14;
635c7876 938
939=head3 buf_ref
940
941You can use this option to pass in a scalar reference which has the
942data to be written. If this is set then any data arguments (including
943the scalar reference shortcut) in @_ will be ignored. These are
b3b7ff4e 944equivalent:
635c7876 945
946 write_file( $bin_file, { buf_ref => \$buffer } ) ;
947 write_file( $bin_file, \$buffer ) ;
948 write_file( $bin_file, $buffer ) ;
949
950=head3 atomic
951
952If you set this boolean option, the file will be written to in an
953atomic fashion. A temporary file name is created by appending the pid
954($$) to the file name argument and that file is spewed to. After the
955file is closed it is renamed to the original file name (and rename is
956an atomic operation on most OS's). If the program using this were to
957crash in the middle of this, then the file with the pid suffix could
958be left behind.
959
960=head3 append
961
962If you set this boolean option, the data will be written at the end of
f02156f2 963the current file. Internally this sets the sysopen mode flag O_APPEND.
635c7876 964
965 write_file( $file, {append => 1}, @data ) ;
966
b3b7ff4e 967 You
968can import append_file and it does the same thing.
635c7876 969
970=head3 no_clobber
971
972If you set this boolean option, an existing file will not be overwritten.
973
974 write_file( $file, {no_clobber => 1}, @data ) ;
975
976=head3 err_mode
977
978You can use this option to control how C<write_file> behaves when an
979error occurs. This option defaults to 'croak'. You can set it to
980'carp' or to 'quiet' to have no error handling other than the return
981value. If the first call to C<write_file> fails it will carp and then
982write to another file. If the second call to C<write_file> fails, it
983will croak.
984
985 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
986
987 # write a different file but croak if not found
988 write_file( $other_file, \$data ) ;
989 }
990
991=head2 overwrite_file
992
993This sub is just a typeglob alias to write_file since write_file
994always overwrites an existing file. This sub is supported for
995backwards compatibility with the original version of this module. See
996write_file for its API and behavior.
997
998=head2 append_file
999
1000This sub will write its data to the end of the file. It is a wrapper
1001around write_file and it has the same API so see that for the full
b3b7ff4e 1002documentation. These calls are equivalent:
635c7876 1003
1004 append_file( $file, @data ) ;
1005 write_file( $file, {append => 1}, @data ) ;
1006
b3b7ff4e 1007
1008=head2 prepend_file
1009
1010This sub writes data to the beginning of a file. The previously existing
1011data is written after that so the effect is prepending data in front of
1012a file. It is a counterpart to the append_file sub in this module. It
1013works by first using C<read_file> to slurp in the file and then calling
1014C<write_file> with the new data and the existing file data.
1015
1016The first argument to C<prepend_file> is the filename. The next argument
1017is an optional hash reference and it contains key/values that can modify
1018the behavior of C<prepend_file>. The rest of the argument list is the
1019data to be written to the file and that is passed to C<write_file> as is
1020(see that for allowed data).
1021
1022Only the C<binmode> and C<err_mode> options are supported. The
1023C<write_file> call has the C<atomic> option set so you will always have
1024a consistant file. See above for more about those options.
1025
1026C<prepend_file> is not exported by default, you need to import it
1027explicitly.
1028
1029 use File::Slurp qw( prepend_file ) ;
1030 prepend_file( $file, $header ) ;
1031 prepend_file( $file, \@lines ) ;
1032 prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1033
635c7876 1034=head2 read_dir
1035
1036This sub reads all the file names from directory and returns them to
1037the caller but C<.> and C<..> are removed by default.
1038
1039 my @files = read_dir( '/path/to/dir' ) ;
1040
b3b7ff4e 1041The first argument is the path to the directory to read. If the next
1042argument is a hash reference, then it is used as the options.
1043Otherwise the rest of the argument list are is used as key/value
1044options.
635c7876 1045
b3b7ff4e 1046In list context C<read_dir> returns a list of the entries in the
635c7876 1047directory. In a scalar context it returns an array reference which has
1048the entries.
1049
b3b7ff4e 1050=head3 err_mode
1051
1052If the C<err_mode> option is set, it selects how errors are handled (see
1053C<err_mode> in C<read_file> or C<write_file>).
1054
635c7876 1055=head3 keep_dot_dot
1056
1057If this boolean option is set, C<.> and C<..> are not removed from the
1058list of files.
1059
1060 my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1061
1062=head2 EXPORT
1063
1064 read_file write_file overwrite_file append_file read_dir
1065
f02156f2 1066=head2 LICENSE
1067
1068 Same as Perl.
1069
635c7876 1070=head2 SEE ALSO
1071
1072An article on file slurping in extras/slurp_article.pod. There is
1073also a benchmarking script in extras/slurp_bench.pl.
1074
1075=head2 BUGS
1076
1077If run under Perl 5.004, slurping from the DATA handle will fail as
1078that requires B.pm which didn't get into core until 5.005.
1079
1080=head1 AUTHOR
1081
b3b7ff4e 1082Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
635c7876 1083
1084=cut