added export_ok for edit_file and edit_file_lines
[urisagit/Perl-Docs.git] / lib / File / Slurp.pm
CommitLineData
635c7876 1package File::Slurp;
2
6f9e0c69 3use 5.6.2 ;
e2c51d31 4
635c7876 5use strict;
6
7use Carp ;
b3b7ff4e 8use Exporter ;
635c7876 9use Fcntl qw( :DEFAULT ) ;
e2c51d31 10use POSIX qw( :fcntl_h ) ;
635c7876 11use Symbol ;
12
6f9e0c69 13use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
b3b7ff4e 14@ISA = qw( Exporter ) ;
e2c51d31 15
6f9e0c69 16$VERSION = '9999.17';
17
8a1e3264 18@EXPORT_OK = qw(
19 slurp
20 prepend_file
21 edit_file
22 edit_file_lines
23) ;
24
25%EXPORT_TAGS = ( 'all' => [ qw(
26 read_file
27 write_file
28 overwrite_file
29 append_file
30 read_dir ),
31 @EXPORT_OK
32] ) ;
e2c51d31 33@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
e2c51d31 34
b3b7ff4e 35my $max_fast_slurp_size = 1024 * 100 ;
9aab46ab 36
635c7876 37my $is_win32 = $^O =~ /win32/i ;
38
39# Install subs for various constants that aren't set in older perls
40# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
41# () prototype These can't be overridden with the constant pragma or
42# we get a prototype mismatch. Hence this less than aesthetically
43# appealing BEGIN block:
44
45BEGIN {
8ed110f9 46 unless( defined &SEEK_SET ) {
635c7876 47 *SEEK_SET = sub { 0 };
48 *SEEK_CUR = sub { 1 };
49 *SEEK_END = sub { 2 };
50 }
51
8ed110f9 52 unless( defined &O_BINARY ) {
635c7876 53 *O_BINARY = sub { 0 };
54 *O_RDONLY = sub { 0 };
55 *O_WRONLY = sub { 1 };
56 }
57
f02156f2 58 unless ( defined &O_APPEND ) {
635c7876 59
60 if ( $^O =~ /olaris/ ) {
61 *O_APPEND = sub { 8 };
62 *O_CREAT = sub { 256 };
63 *O_EXCL = sub { 1024 };
64 }
65 elsif ( $^O =~ /inux/ ) {
66 *O_APPEND = sub { 1024 };
67 *O_CREAT = sub { 64 };
68 *O_EXCL = sub { 128 };
69 }
70 elsif ( $^O =~ /BSD/i ) {
71 *O_APPEND = sub { 8 };
72 *O_CREAT = sub { 512 };
73 *O_EXCL = sub { 2048 };
74 }
75 }
76}
77
78# print "OS [$^O]\n" ;
79
80# print "O_BINARY = ", O_BINARY(), "\n" ;
81# print "O_RDONLY = ", O_RDONLY(), "\n" ;
82# print "O_WRONLY = ", O_WRONLY(), "\n" ;
83# print "O_APPEND = ", O_APPEND(), "\n" ;
84# print "O_CREAT ", O_CREAT(), "\n" ;
85# print "O_EXCL ", O_EXCL(), "\n" ;
86
635c7876 87
88*slurp = \&read_file ;
89
90sub read_file {
91
b3b7ff4e 92 my $file_name = shift ;
93 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 94
6f9e0c69 95# this is the optimized read_file for shorter files.
96# the test for -s > 0 is to allow pseudo files to be read with the
97# regular loop since they return a size of 0.
98
99 if ( !ref $file_name && -e $file_name && -s _ > 0 &&
100 -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
e2c51d31 101
e2c51d31 102
6f9e0c69 103 my $fh ;
104 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
e2c51d31 105
b3b7ff4e 106 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
e2c51d31 107 goto &_error ;
108 }
109
6f9e0c69 110 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
e2c51d31 111
112 unless ( defined $read_cnt ) {
113
b3b7ff4e 114 @_ = ( $opts,
8ed110f9 115 "read_file '$file_name' - small sysread: $!");
e2c51d31 116 goto &_error ;
117 }
118
6f9e0c69 119 $buf =~ s/\015\012/\n/g if $is_win32 ;
e2c51d31 120 return $buf ;
121 }
122
635c7876 123# set the buffer to either the passed in one or ours and init it to the null
124# string
125
126 my $buf ;
b3b7ff4e 127 my $buf_ref = $opts->{'buf_ref'} || \$buf ;
635c7876 128 ${$buf_ref} = '' ;
129
130 my( $read_fh, $size_left, $blk_size ) ;
131
b3b7ff4e 132# deal with ref for a file name
133# it could be an open handle or an overloaded object
635c7876 134
135 if ( ref $file_name ) {
136
b3b7ff4e 137 my $ref_result = _check_ref( $file_name ) ;
635c7876 138
b3b7ff4e 139 if ( ref $ref_result ) {
635c7876 140
b3b7ff4e 141# we got an error, deal with it
635c7876 142
b3b7ff4e 143 @_ = ( $opts, $ref_result ) ;
635c7876 144 goto &_error ;
145 }
146
b3b7ff4e 147 if ( $ref_result ) {
635c7876 148
b3b7ff4e 149# we got an overloaded object and the result is the stringified value
150# use it as the file name
151
152 $file_name = $ref_result ;
153 }
154 else {
155
156# here we have just an open handle. set $read_fh so we don't do a sysopen
635c7876 157
b3b7ff4e 158 $read_fh = $file_name ;
159 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
160 $size_left = $blk_size ;
635c7876 161 }
162 }
b3b7ff4e 163
164# see if we have a path we need to open
165
166 unless ( $read_fh ) {
635c7876 167
168# a regular file. set the sysopen mode
169
170 my $mode = O_RDONLY ;
635c7876 171
172#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
173
635c7876 174 $read_fh = gensym ;
175 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
b3b7ff4e 176 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
635c7876 177 goto &_error ;
178 }
179
b3b7ff4e 180 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 181 binmode( $read_fh, $binmode ) ;
182 }
183
635c7876 184# get the size of the file for use in the read loop
185
186 $size_left = -s $read_fh ;
187
f9940db7 188#print "SIZE $size_left\n" ;
8ed110f9 189
f9940db7 190# we need a blk_size if the size is 0 so we can handle pseudofiles like in
191# /proc. these show as 0 size but have data to be slurped.
192
193 unless( $size_left ) {
194
b3b7ff4e 195 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
f9940db7 196 $size_left = $blk_size ;
197 }
e2c51d31 198 }
199
200
b3b7ff4e 201# if ( $size_left < 10000 && keys %{$opts} == 0 && !wantarray ) {
e2c51d31 202
8ed110f9 203# my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
e2c51d31 204
8ed110f9 205# unless ( defined $read_cnt ) {
e2c51d31 206
b3b7ff4e 207# @_ = ( $opts, "read_file '$file_name' - small2 sysread: $!");
8ed110f9 208# goto &_error ;
209# }
e2c51d31 210
8ed110f9 211# return $buf ;
212# }
635c7876 213
214# infinite read loop. we exit when we are done slurping
215
216 while( 1 ) {
217
218# do the read and see how much we got
219
220 my $read_cnt = sysread( $read_fh, ${$buf_ref},
221 $size_left, length ${$buf_ref} ) ;
222
e2c51d31 223 unless ( defined $read_cnt ) {
224
b3b7ff4e 225 @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
e2c51d31 226 goto &_error ;
227 }
635c7876 228
229# good read. see if we hit EOF (nothing left to read)
230
e2c51d31 231 last if $read_cnt == 0 ;
635c7876 232
233# loop if we are slurping a handle. we don't track $size_left then.
234
e2c51d31 235 next if $blk_size ;
635c7876 236
237# count down how much we read and loop if we have more to read.
635c7876 238
e2c51d31 239 $size_left -= $read_cnt ;
240 last if $size_left <= 0 ;
635c7876 241 }
242
243# fix up cr/lf to be a newline if this is a windows text file
244
b3b7ff4e 245 ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
635c7876 246
635c7876 247 my $sep = $/ ;
248 $sep = '\n\n+' if defined $sep && $sep eq '' ;
249
b3b7ff4e 250# see if caller wants lines
251
252 if( wantarray || $opts->{'array_ref'} ) {
635c7876 253
6f9e0c69 254 use re 'taint' ;
635c7876 255
6f9e0c69 256 my @lines = length(${$buf_ref}) ?
257 ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
635c7876 258
6f9e0c69 259# caller wants an array ref
b3b7ff4e 260
6f9e0c69 261 return \@lines if $opts->{'array_ref'} ;
b3b7ff4e 262
6f9e0c69 263# caller wants list of lines
b3b7ff4e 264
b3b7ff4e 265 return @lines ;
266 }
635c7876 267
268# caller wants a scalar ref to the slurped text
269
b3b7ff4e 270 return $buf_ref if $opts->{'scalar_ref'} ;
635c7876 271
272# caller wants a scalar with the slurped text (normal scalar context)
273
274 return ${$buf_ref} if defined wantarray ;
275
276# caller passed in an i/o buffer by reference (normal void context)
277
278 return ;
279}
280
b3b7ff4e 281# errors in this sub are returned as scalar refs
282# a normal IO/GLOB handle is an empty return
283# an overloaded object returns its stringified as a scalarfilename
284
285sub _check_ref {
286
287 my( $handle ) = @_ ;
288
289# check if we are reading from a handle (GLOB or IO object)
290
291 if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
292
293# we have a handle. deal with seeking to it if it is DATA
294
295 my $err = _seek_data_handle( $handle ) ;
296
297# return the error string if any
298
299 return \$err if $err ;
300
301# we have good handle
302 return ;
303 }
304
305 eval { require overload } ;
306
307# return an error if we can't load the overload pragma
308# or if the object isn't overloaded
309
310 return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
311 if $@ || !overload::Overloaded( $handle ) ;
312
313# must be overloaded so return its stringified value
314
315 return "$handle" ;
316}
317
318sub _seek_data_handle {
319
320 my( $handle ) = @_ ;
321
322# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
323# glob/handle. only the DATA handle is untainted (since it is from
324# trusted data in the source file). this allows us to test if this is
325# the DATA handle and then to do a sysseek to make sure it gets
326# slurped correctly. on some systems, the buffered i/o pointer is not
327# left at the same place as the fd pointer. this sysseek makes them
328# the same so slurping with sysread will work.
329
330 eval{ require B } ;
331
332 if ( $@ ) {
333
334 return <<ERR ;
335Can't find B.pm with this Perl: $!.
336That module is needed to properly slurp the DATA handle.
337ERR
338 }
339
340 if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
341
342# set the seek position to the current tell.
343
344 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
345 return "read_file '$handle' - sysseek: $!" ;
346 }
347 }
348
349# seek was successful, return no error string
350
351 return ;
352}
353
354
635c7876 355sub write_file {
356
357 my $file_name = shift ;
358
359# get the optional argument hash ref from @_ or an empty hash ref.
360
b3b7ff4e 361 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
635c7876 362
363 my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
364
365# get the buffer ref - it depends on how the data is passed into write_file
366# after this if/else $buf_ref will have a scalar ref to the data.
367
b3b7ff4e 368 if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
635c7876 369
b3b7ff4e 370# a scalar ref passed in %opts has the data
635c7876 371# note that the data was passed by ref
372
b3b7ff4e 373 $buf_ref = $opts->{'buf_ref'} ;
635c7876 374 $data_is_ref = 1 ;
375 }
376 elsif ( ref $_[0] eq 'SCALAR' ) {
377
378# the first value in @_ is the scalar ref to the data
379# note that the data was passed by ref
380
381 $buf_ref = shift ;
382 $data_is_ref = 1 ;
383 }
384 elsif ( ref $_[0] eq 'ARRAY' ) {
385
386# the first value in @_ is the array ref to the data so join it.
387
388 ${$buf_ref} = join '', @{$_[0]} ;
389 }
390 else {
391
392# good old @_ has all the data so join it.
393
394 ${$buf_ref} = join '', @_ ;
395 }
396
b3b7ff4e 397# deal with ref for a file name
635c7876 398
399 if ( ref $file_name ) {
400
b3b7ff4e 401 my $ref_result = _check_ref( $file_name ) ;
402
403 if ( ref $ref_result ) {
635c7876 404
b3b7ff4e 405# we got an error, deal with it
406
407 @_ = ( $opts, $ref_result ) ;
408 goto &_error ;
409 }
410
411 if ( $ref_result ) {
412
413# we got an overloaded object and the result is the stringified value
414# use it as the file name
415
416 $file_name = $ref_result ;
417 }
418 else {
419
420# we now have a proper handle ref.
421# make sure we don't call truncate on it.
422
423 $write_fh = $file_name ;
424 $no_truncate = 1 ;
425 }
635c7876 426 }
b3b7ff4e 427
428# see if we have a path we need to open
429
430 unless( $write_fh ) {
635c7876 431
432# spew to regular file.
433
b3b7ff4e 434 if ( $opts->{'atomic'} ) {
635c7876 435
436# in atomic mode, we spew to a temp file so make one and save the original
437# file name.
438 $orig_file_name = $file_name ;
439 $file_name .= ".$$" ;
440 }
441
442# set the mode for the sysopen
443
444 my $mode = O_WRONLY | O_CREAT ;
b3b7ff4e 445 $mode |= O_APPEND if $opts->{'append'} ;
446 $mode |= O_EXCL if $opts->{'no_clobber'} ;
635c7876 447
b3b7ff4e 448 my $perms = $opts->{perms} ;
f02156f2 449 $perms = 0666 unless defined $perms ;
450
635c7876 451#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
452
453# open the file and handle any error.
454
455 $write_fh = gensym ;
f02156f2 456 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
b3b7ff4e 457
458 @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
635c7876 459 goto &_error ;
460 }
461 }
462
b3b7ff4e 463 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 464 binmode( $write_fh, $binmode ) ;
465 }
466
b3b7ff4e 467 sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
635c7876 468
635c7876 469#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
470
471# fix up newline to write cr/lf if this is a windows text file
472
b3b7ff4e 473 if ( $is_win32 && !$opts->{'binmode'} ) {
635c7876 474
475# copy the write data if it was passed by ref so we don't clobber the
476# caller's data
477 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
478 ${$buf_ref} =~ s/\n/\015\012/g ;
479 }
480
481#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
482
483# get the size of how much we are writing and init the offset into that buffer
484
485 my $size_left = length( ${$buf_ref} ) ;
486 my $offset = 0 ;
487
488# loop until we have no more data left to write
489
490 do {
491
492# do the write and track how much we just wrote
493
494 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
495 $size_left, $offset ) ;
496
497 unless ( defined $write_cnt ) {
498
b3b7ff4e 499 @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
635c7876 500 goto &_error ;
501 }
502
6f9e0c69 503# track how much left to write and where to write from in the buffer
635c7876 504
505 $size_left -= $write_cnt ;
506 $offset += $write_cnt ;
507
508 } while( $size_left > 0 ) ;
509
510# we truncate regular files in case we overwrite a long file with a shorter file
511# so seek to the current position to get it (same as tell()).
512
513 truncate( $write_fh,
514 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
515
516 close( $write_fh ) ;
517
518# handle the atomic mode - move the temp file to the original filename.
519
b3b7ff4e 520 if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
e2c51d31 521
b3b7ff4e 522 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
e2c51d31 523 goto &_error ;
524 }
635c7876 525
526 return 1 ;
527}
528
529# this is for backwards compatibility with the previous File::Slurp module.
530# write_file always overwrites an existing file
531
532*overwrite_file = \&write_file ;
533
534# the current write_file has an append mode so we use that. this
535# supports the same API with an optional second argument which is a
536# hash ref of options.
537
538sub append_file {
539
b3b7ff4e 540# get the optional opts hash ref
541 my $opts = $_[1] ;
542 if ( ref $opts eq 'HASH' ) {
635c7876 543
b3b7ff4e 544# we were passed an opts ref so just mark the append mode
635c7876 545
b3b7ff4e 546 $opts->{append} = 1 ;
635c7876 547 }
548 else {
549
b3b7ff4e 550# no opts hash so insert one with the append mode
635c7876 551
552 splice( @_, 1, 0, { append => 1 } ) ;
553 }
554
555# magic goto the main write_file sub. this overlays the sub without touching
556# the stack or @_
557
558 goto &write_file
559}
560
b3b7ff4e 561# prepend data to the beginning of a file
562
563sub prepend_file {
564
565 my $file_name = shift ;
566
567#print "FILE $file_name\n" ;
568
569 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
570
571# delete unsupported options
572
573 my @bad_opts =
574 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
575
576 delete @{$opts}{@bad_opts} ;
577
578 my $prepend_data = shift ;
579 $prepend_data = '' unless defined $prepend_data ;
580 $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
581
582#print "PRE [$prepend_data]\n" ;
583
b3b7ff4e 584 my $err_mode = delete $opts->{err_mode} ;
585 $opts->{ err_mode } = 'croak' ;
586 $opts->{ scalar_ref } = 1 ;
587
6f9e0c69 588 my $existing_data = eval { read_file( $file_name, $opts ) } ;
b3b7ff4e 589
590 if ( $@ ) {
591
592 @_ = ( { err_mode => $err_mode },
593 "prepend_file '$file_name' - read_file: $!" ) ;
594 goto &_error ;
595 }
596
597#print "EXIST [$$existing_data]\n" ;
598
6f9e0c69 599 $opts->{atomic} = 1 ;
600 my $write_result =
601 eval { write_file( $file_name, $opts,
602 $prepend_data, $$existing_data ) ;
b3b7ff4e 603 } ;
604
605 if ( $@ ) {
606
607 @_ = ( { err_mode => $err_mode },
608 "prepend_file '$file_name' - write_file: $!" ) ;
609 goto &_error ;
610 }
611
612 return $write_result ;
613}
614
6f9e0c69 615# edit a file as a scalar in $_
616
617sub edit_file(&$;$) {
618
619 my( $edit_code, $file_name, $opts ) = @_ ;
620 $opts = {} unless ref $opts eq 'HASH' ;
621
622# my $edit_code = shift ;
623# my $file_name = shift ;
624# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
625
626#print "FILE $file_name\n" ;
627
628# delete unsupported options
629
630 my @bad_opts =
631 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
632
633 delete @{$opts}{@bad_opts} ;
634
635# keep the user err_mode and force croaking on internal errors
636
637 my $err_mode = delete $opts->{err_mode} ;
638 $opts->{ err_mode } = 'croak' ;
639
640# get a scalar ref for speed and slurp the file into a scalar
641
642 $opts->{ scalar_ref } = 1 ;
643 my $existing_data = eval { read_file( $file_name, $opts ) } ;
644
645 if ( $@ ) {
646
647 @_ = ( { err_mode => $err_mode },
648 "edit_file '$file_name' - read_file: $!" ) ;
649 goto &_error ;
650 }
651
652#print "EXIST [$$existing_data]\n" ;
653
654 my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
655
656 $opts->{atomic} = 1 ;
657 my $write_result =
658 eval { write_file( $file_name, $opts, $edited_data ) } ;
659
660 if ( $@ ) {
661
662 @_ = ( { err_mode => $err_mode },
663 "edit_file '$file_name' - write_file: $!" ) ;
664 goto &_error ;
665 }
666
667 return $write_result ;
668}
669
670sub edit_file_lines(&$;$) {
671
672 my( $edit_code, $file_name, $opts ) = @_ ;
673 $opts = {} unless ref $opts eq 'HASH' ;
674
675# my $edit_code = shift ;
676# my $file_name = shift ;
677# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
678
679#print "FILE $file_name\n" ;
680
681# delete unsupported options
682
683 my @bad_opts =
684 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
685
686 delete @{$opts}{@bad_opts} ;
687
688# keep the user err_mode and force croaking on internal errors
689
690 my $err_mode = delete $opts->{err_mode} ;
691 $opts->{ err_mode } = 'croak' ;
692
693# get an array ref for speed and slurp the file into lines
694
695 $opts->{ array_ref } = 1 ;
696 my $existing_data = eval { read_file( $file_name, $opts ) } ;
697
698 if ( $@ ) {
699
700 @_ = ( { err_mode => $err_mode },
701 "edit_file_lines '$file_name' - read_file: $!" ) ;
702 goto &_error ;
703 }
704
705#print "EXIST [$$existing_data]\n" ;
706
707 my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
708
709 $opts->{atomic} = 1 ;
710 my $write_result =
711 eval { write_file( $file_name, $opts, @edited_data ) } ;
712
713 if ( $@ ) {
714
715 @_ = ( { err_mode => $err_mode },
716 "edit_file_lines '$file_name' - write_file: $!" ) ;
717 goto &_error ;
718 }
719
720 return $write_result ;
721}
722
723# basic wrapper around opendir/readdir
724
635c7876 725sub read_dir {
726
b3b7ff4e 727 my $dir = shift ;
728 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 729
730# this handle will be destroyed upon return
731
732 local(*DIRH);
733
734# open the dir and handle any errors
735
736 unless ( opendir( DIRH, $dir ) ) {
737
b3b7ff4e 738 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
635c7876 739 goto &_error ;
740 }
741
742 my @dir_entries = readdir(DIRH) ;
743
744 @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
b3b7ff4e 745 unless $opts->{'keep_dot_dot'} ;
635c7876 746
747 return @dir_entries if wantarray ;
748 return \@dir_entries ;
749}
750
751# error handling section
752#
753# all the error handling uses magic goto so the caller will get the
754# error message as if from their code and not this module. if we just
755# did a call on the error code, the carp/croak would report it from
756# this module since the error sub is one level down on the call stack
757# from read_file/write_file/read_dir.
758
759
760my %err_func = (
761 'carp' => \&carp,
762 'croak' => \&croak,
763) ;
764
765sub _error {
766
b3b7ff4e 767 my( $opts, $err_msg ) = @_ ;
635c7876 768
769# get the error function to use
770
b3b7ff4e 771 my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
635c7876 772
773# if we didn't find it in our error function hash, they must have set
774# it to quiet and we don't do anything.
775
776 return unless $func ;
777
778# call the carp/croak function
779
f02156f2 780 $func->($err_msg) if $func ;
635c7876 781
782# return a hard undef (in list context this will be a single value of
783# undef which is not a legal in-band value)
784
785 return undef ;
786}
787
7881;
789__END__
790
791=head1 NAME
792
6f9e0c69 793File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
635c7876 794
795=head1 SYNOPSIS
796
797 use File::Slurp;
798
b3b7ff4e 799# read in a whole file into a scalar
800
635c7876 801 my $text = read_file( 'filename' ) ;
b3b7ff4e 802
803# read in a whole file into an array of lines
804
635c7876 805 my @lines = read_file( 'filename' ) ;
806
b3b7ff4e 807# write out a whole file from a scalar
808
809 write_file( 'filename', $text ) ;
810
811# write out a whole file from an array of lines
812
635c7876 813 write_file( 'filename', @lines ) ;
814
b3b7ff4e 815# Here is a simple and fast way to load and save a simple config file
816# made of key=value lines.
817
818 my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
819 write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
635c7876 820
6f9e0c69 821# insert text at the beginning of a file
822
823 prepend_file( 'filename', $text ) ;
824
b3b7ff4e 825# read in a whole directory of file names (skipping . and ..)
635c7876 826
b3b7ff4e 827 my @files = read_dir( '/path/to/dir' ) ;
635c7876 828
829=head1 DESCRIPTION
830
831This module provides subs that allow you to read or write entire files
832with one simple call. They are designed to be simple to use, have
833flexible ways to pass in or get the file contents and to be very
834efficient. There is also a sub to read in all the files in a
835directory other than C<.> and C<..>
836
b3b7ff4e 837These slurp/spew subs work for files, pipes and sockets, stdio,
838pseudo-files, and the DATA handle. Read more about why slurping files is
839a good thing in the file 'slurp_article.pod' in the extras/ directory.
840
841If you are interested in how fast these calls work, check out the
842slurp_bench.pl program in the extras/ directory. It compares many
843different forms of slurping. You can select the I/O direction, context
844and file sizes. Use the --help option to see how to run it.
635c7876 845
846=head2 B<read_file>
847
848This sub reads in an entire file and returns its contents to the
b3b7ff4e 849caller. In scalar context it returns the entire file as a single
850scalar. In list context it will return a list of lines (using the
635c7876 851current value of $/ as the separator including support for paragraph
b3b7ff4e 852mode when it is set to '').
635c7876 853
854 my $text = read_file( 'filename' ) ;
b3b7ff4e 855 my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
635c7876 856 my @lines = read_file( 'filename' ) ;
b3b7ff4e 857 my $lines = read_file( 'filename', array_ref => 1 ) ;
635c7876 858
b3b7ff4e 859The first argument is the file to slurp in. If the next argument is a
860hash reference, then it is used as the options. Otherwise the rest of
861the argument list are is used as key/value options.
635c7876 862
b3b7ff4e 863If the file argument is a handle (if it is a ref and is an IO or GLOB
864object), then that handle is slurped in. This mode is supported so you
865slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
866an example that does C<open( '-|' )> and the child process spews data
635c7876 867to the parant which slurps it in. All of the options that control how
868the data is returned to the caller still work in this case.
869
b3b7ff4e 870If the first argument is an overloaded object then its stringified value
871is used for the filename and that file is opened. This is a new feature
872in 9999.14. See the stringify.t test for an example.
873
874By default C<read_file> returns an undef in scalar contex or a single
875undef in list context if it encounters an error. Those are both
876impossible to get with a clean read_file call which means you can check
877the return value and always know if you had an error. You can change how
878errors are handled with the C<err_mode> option.
879
6f9e0c69 880Speed Note: If you call read_file and just get a scalar return value
881it is now optimized to handle shorter files. This is only used if no
882options are used, the file is shorter then 100k bytes, the filename is
883a plain scalar and a scalar file is returned. If you want the fastest
884slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
885
635c7876 886NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
887handle. It used to need a sysseek workaround but that is now handled
888when needed by the module itself.
889
890You can optionally request that C<slurp()> is exported to your code. This
891is an alias for read_file and is meant to be forward compatible with
892Perl 6 (which will have slurp() built-in).
893
b3b7ff4e 894The options for C<read_file> are:
635c7876 895
896=head3 binmode
897
b3b7ff4e 898If you set the binmode option, then its value is passed to a call to
899binmode on the opened handle. You can use this to set the file to be
900read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 901
902 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
9aab46ab 903 my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
635c7876 904
905=head3 array_ref
906
907If this boolean option is set, the return value (only in scalar
908context) will be an array reference which contains the lines of the
909slurped file. The following two calls are equivalent:
910
911 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
912 my $lines_ref = [ read_file( $bin_file ) ] ;
913
914=head3 scalar_ref
915
6f9e0c69 916If this boolean option is set, the return value (only in scalar
917context) will be an scalar reference to a string which is the contents
918of the slurped file. This will usually be faster than returning the
919plain scalar. It will also save memory as it will not make a copy of
920the file to return. Run the extras/slurp_bench.pl script to see speed
921comparisons.
635c7876 922
923 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
924
925=head3 buf_ref
926
927You can use this option to pass in a scalar reference and the slurped
928file contents will be stored in the scalar. This can be used in
f02156f2 929conjunction with any of the other options. This saves an extra copy of
6f9e0c69 930the slurped file and can lower ram usage vs returning the file. It is
931usually the fastest way to read a file into a scalar. Run the
932extras/slurp_bench.pl script to see speed comparisons.
933
635c7876 934
b3b7ff4e 935 read_file( $bin_file, buf_ref => \$buffer ) ;
635c7876 936
937=head3 blk_size
938
b3b7ff4e 939You can use this option to set the block size used when slurping from
940an already open handle (like \*STDIN). It defaults to 1MB.
635c7876 941
942 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
943 array_ref => 1 ) ;
944
945=head3 err_mode
946
947You can use this option to control how read_file behaves when an error
b3b7ff4e 948occurs. This option defaults to 'croak'. You can set it to 'carp' or to
949'quiet to have no special error handling. This code wants to carp and
950then read another file if it fails.
635c7876 951
952 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
953 unless ( $text_ref ) {
954
955 # read a different file but croak if not found
956 $text_ref = read_file( $another_file ) ;
957 }
958
959 # process ${$text_ref}
960
961=head2 B<write_file>
962
963This sub writes out an entire file in one call.
964
965 write_file( 'filename', @data ) ;
966
967The first argument to C<write_file> is the filename. The next argument
968is an optional hash reference and it contains key/values that can
969modify the behavior of C<write_file>. The rest of the argument list is
970the data to be written to the file.
971
972 write_file( 'filename', {append => 1 }, @data ) ;
b3b7ff4e 973 write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
974
975As a shortcut if the first data argument is a scalar or array reference,
976it is used as the only data to be written to the file. Any following
977arguments in @_ are ignored. This is a faster way to pass in the output
978to be written to the file and is equivalent to the C<buf_ref> option of
979C<read_file>. These following pairs are equivalent but the pass by
980reference call will be faster in most cases (especially with larger
635c7876 981files).
982
983 write_file( 'filename', \$buffer ) ;
984 write_file( 'filename', $buffer ) ;
985
986 write_file( 'filename', \@lines ) ;
987 write_file( 'filename', @lines ) ;
988
b3b7ff4e 989If the first argument is a handle (if it is a ref and is an IO or GLOB
990object), then that handle is written to. This mode is supported so you
991spew to handles such as \*STDOUT. See the test handle.t for an example
992that does C<open( '-|' )> and child process spews data to the parent
993which slurps it in. All of the options that control how the data are
994passed into C<write_file> still work in this case.
995
996If the first argument is an overloaded object then its stringified value
997is used for the filename and that file is opened. This is new feature
998in 9999.14. See the stringify.t test for an example.
635c7876 999
b3b7ff4e 1000By default C<write_file> returns 1 upon successfully writing the file or
1001undef if it encountered an error. You can change how errors are handled
1002with the C<err_mode> option.
635c7876 1003
1004The options are:
1005
1006=head3 binmode
1007
b3b7ff4e 1008If you set the binmode option, then its value is passed to a call to
1009binmode on the opened handle. You can use this to set the file to be
1010read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 1011
1012 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
b3b7ff4e 1013 write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1014
1015=head3 perms
635c7876 1016
b3b7ff4e 1017The perms option sets the permissions of newly-created files. This value
1018is modified by your process's umask and defaults to 0666 (same as
1019sysopen).
1020
1021NOTE: this option is new as of File::Slurp version 9999.14;
635c7876 1022
1023=head3 buf_ref
1024
1025You can use this option to pass in a scalar reference which has the
1026data to be written. If this is set then any data arguments (including
1027the scalar reference shortcut) in @_ will be ignored. These are
b3b7ff4e 1028equivalent:
635c7876 1029
1030 write_file( $bin_file, { buf_ref => \$buffer } ) ;
1031 write_file( $bin_file, \$buffer ) ;
1032 write_file( $bin_file, $buffer ) ;
1033
1034=head3 atomic
1035
1036If you set this boolean option, the file will be written to in an
1037atomic fashion. A temporary file name is created by appending the pid
1038($$) to the file name argument and that file is spewed to. After the
1039file is closed it is renamed to the original file name (and rename is
1040an atomic operation on most OS's). If the program using this were to
1041crash in the middle of this, then the file with the pid suffix could
1042be left behind.
1043
1044=head3 append
1045
1046If you set this boolean option, the data will be written at the end of
f02156f2 1047the current file. Internally this sets the sysopen mode flag O_APPEND.
635c7876 1048
1049 write_file( $file, {append => 1}, @data ) ;
1050
b3b7ff4e 1051 You
1052can import append_file and it does the same thing.
635c7876 1053
1054=head3 no_clobber
1055
1056If you set this boolean option, an existing file will not be overwritten.
1057
1058 write_file( $file, {no_clobber => 1}, @data ) ;
1059
1060=head3 err_mode
1061
1062You can use this option to control how C<write_file> behaves when an
1063error occurs. This option defaults to 'croak'. You can set it to
1064'carp' or to 'quiet' to have no error handling other than the return
1065value. If the first call to C<write_file> fails it will carp and then
1066write to another file. If the second call to C<write_file> fails, it
1067will croak.
1068
1069 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1070
1071 # write a different file but croak if not found
1072 write_file( $other_file, \$data ) ;
1073 }
1074
1075=head2 overwrite_file
1076
1077This sub is just a typeglob alias to write_file since write_file
1078always overwrites an existing file. This sub is supported for
1079backwards compatibility with the original version of this module. See
1080write_file for its API and behavior.
1081
1082=head2 append_file
1083
1084This sub will write its data to the end of the file. It is a wrapper
1085around write_file and it has the same API so see that for the full
b3b7ff4e 1086documentation. These calls are equivalent:
635c7876 1087
1088 append_file( $file, @data ) ;
1089 write_file( $file, {append => 1}, @data ) ;
1090
b3b7ff4e 1091
1092=head2 prepend_file
1093
1094This sub writes data to the beginning of a file. The previously existing
1095data is written after that so the effect is prepending data in front of
1096a file. It is a counterpart to the append_file sub in this module. It
1097works by first using C<read_file> to slurp in the file and then calling
1098C<write_file> with the new data and the existing file data.
1099
1100The first argument to C<prepend_file> is the filename. The next argument
1101is an optional hash reference and it contains key/values that can modify
1102the behavior of C<prepend_file>. The rest of the argument list is the
1103data to be written to the file and that is passed to C<write_file> as is
1104(see that for allowed data).
1105
1106Only the C<binmode> and C<err_mode> options are supported. The
1107C<write_file> call has the C<atomic> option set so you will always have
1108a consistant file. See above for more about those options.
1109
1110C<prepend_file> is not exported by default, you need to import it
1111explicitly.
1112
1113 use File::Slurp qw( prepend_file ) ;
1114 prepend_file( $file, $header ) ;
1115 prepend_file( $file, \@lines ) ;
1116 prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1117
635c7876 1118=head2 read_dir
1119
1120This sub reads all the file names from directory and returns them to
1121the caller but C<.> and C<..> are removed by default.
1122
1123 my @files = read_dir( '/path/to/dir' ) ;
1124
b3b7ff4e 1125The first argument is the path to the directory to read. If the next
1126argument is a hash reference, then it is used as the options.
1127Otherwise the rest of the argument list are is used as key/value
1128options.
635c7876 1129
b3b7ff4e 1130In list context C<read_dir> returns a list of the entries in the
635c7876 1131directory. In a scalar context it returns an array reference which has
1132the entries.
1133
b3b7ff4e 1134=head3 err_mode
1135
1136If the C<err_mode> option is set, it selects how errors are handled (see
1137C<err_mode> in C<read_file> or C<write_file>).
1138
635c7876 1139=head3 keep_dot_dot
1140
1141If this boolean option is set, C<.> and C<..> are not removed from the
1142list of files.
1143
1144 my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1145
1146=head2 EXPORT
1147
1148 read_file write_file overwrite_file append_file read_dir
1149
f02156f2 1150=head2 LICENSE
1151
1152 Same as Perl.
1153
635c7876 1154=head2 SEE ALSO
1155
1156An article on file slurping in extras/slurp_article.pod. There is
1157also a benchmarking script in extras/slurp_bench.pl.
1158
1159=head2 BUGS
1160
1161If run under Perl 5.004, slurping from the DATA handle will fail as
1162that requires B.pm which didn't get into core until 5.005.
1163
1164=head1 AUTHOR
1165
b3b7ff4e 1166Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
635c7876 1167
1168=cut