added prefix option to read_dir
[urisagit/Perl-Docs.git] / lib / File / Slurp.pm
CommitLineData
635c7876 1package File::Slurp;
2
6f9e0c69 3use 5.6.2 ;
e2c51d31 4
635c7876 5use strict;
6ccd701d 6use warnings ;
635c7876 7
8use Carp ;
b3b7ff4e 9use Exporter ;
635c7876 10use Fcntl qw( :DEFAULT ) ;
e2c51d31 11use POSIX qw( :fcntl_h ) ;
6ccd701d 12#use Symbol ;
635c7876 13
6f9e0c69 14use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
b3b7ff4e 15@ISA = qw( Exporter ) ;
e2c51d31 16
6f9e0c69 17$VERSION = '9999.17';
18
8a1e3264 19@EXPORT_OK = qw(
20 slurp
21 prepend_file
22 edit_file
23 edit_file_lines
24) ;
25
26%EXPORT_TAGS = ( 'all' => [ qw(
27 read_file
28 write_file
29 overwrite_file
30 append_file
31 read_dir ),
32 @EXPORT_OK
33] ) ;
e2c51d31 34@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
e2c51d31 35
b3b7ff4e 36my $max_fast_slurp_size = 1024 * 100 ;
9aab46ab 37
635c7876 38my $is_win32 = $^O =~ /win32/i ;
39
40# Install subs for various constants that aren't set in older perls
41# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
42# () prototype These can't be overridden with the constant pragma or
43# we get a prototype mismatch. Hence this less than aesthetically
44# appealing BEGIN block:
45
46BEGIN {
8ed110f9 47 unless( defined &SEEK_SET ) {
635c7876 48 *SEEK_SET = sub { 0 };
49 *SEEK_CUR = sub { 1 };
50 *SEEK_END = sub { 2 };
51 }
52
8ed110f9 53 unless( defined &O_BINARY ) {
635c7876 54 *O_BINARY = sub { 0 };
55 *O_RDONLY = sub { 0 };
56 *O_WRONLY = sub { 1 };
57 }
58
f02156f2 59 unless ( defined &O_APPEND ) {
635c7876 60
61 if ( $^O =~ /olaris/ ) {
62 *O_APPEND = sub { 8 };
63 *O_CREAT = sub { 256 };
64 *O_EXCL = sub { 1024 };
65 }
66 elsif ( $^O =~ /inux/ ) {
67 *O_APPEND = sub { 1024 };
68 *O_CREAT = sub { 64 };
69 *O_EXCL = sub { 128 };
70 }
71 elsif ( $^O =~ /BSD/i ) {
72 *O_APPEND = sub { 8 };
73 *O_CREAT = sub { 512 };
74 *O_EXCL = sub { 2048 };
75 }
76 }
77}
78
79# print "OS [$^O]\n" ;
80
81# print "O_BINARY = ", O_BINARY(), "\n" ;
82# print "O_RDONLY = ", O_RDONLY(), "\n" ;
83# print "O_WRONLY = ", O_WRONLY(), "\n" ;
84# print "O_APPEND = ", O_APPEND(), "\n" ;
85# print "O_CREAT ", O_CREAT(), "\n" ;
86# print "O_EXCL ", O_EXCL(), "\n" ;
87
635c7876 88
89*slurp = \&read_file ;
90
91sub read_file {
92
b3b7ff4e 93 my $file_name = shift ;
94 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 95
6f9e0c69 96# this is the optimized read_file for shorter files.
97# the test for -s > 0 is to allow pseudo files to be read with the
98# regular loop since they return a size of 0.
99
100 if ( !ref $file_name && -e $file_name && -s _ > 0 &&
101 -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
e2c51d31 102
e2c51d31 103
6f9e0c69 104 my $fh ;
105 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
e2c51d31 106
b3b7ff4e 107 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
e2c51d31 108 goto &_error ;
109 }
110
6f9e0c69 111 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
e2c51d31 112
113 unless ( defined $read_cnt ) {
114
b3b7ff4e 115 @_ = ( $opts,
8ed110f9 116 "read_file '$file_name' - small sysread: $!");
e2c51d31 117 goto &_error ;
118 }
119
6f9e0c69 120 $buf =~ s/\015\012/\n/g if $is_win32 ;
e2c51d31 121 return $buf ;
122 }
123
635c7876 124# set the buffer to either the passed in one or ours and init it to the null
125# string
126
127 my $buf ;
b3b7ff4e 128 my $buf_ref = $opts->{'buf_ref'} || \$buf ;
635c7876 129 ${$buf_ref} = '' ;
130
131 my( $read_fh, $size_left, $blk_size ) ;
132
b3b7ff4e 133# deal with ref for a file name
134# it could be an open handle or an overloaded object
635c7876 135
136 if ( ref $file_name ) {
137
b3b7ff4e 138 my $ref_result = _check_ref( $file_name ) ;
635c7876 139
b3b7ff4e 140 if ( ref $ref_result ) {
635c7876 141
b3b7ff4e 142# we got an error, deal with it
635c7876 143
b3b7ff4e 144 @_ = ( $opts, $ref_result ) ;
635c7876 145 goto &_error ;
146 }
147
b3b7ff4e 148 if ( $ref_result ) {
635c7876 149
b3b7ff4e 150# we got an overloaded object and the result is the stringified value
151# use it as the file name
152
153 $file_name = $ref_result ;
154 }
155 else {
156
157# here we have just an open handle. set $read_fh so we don't do a sysopen
635c7876 158
b3b7ff4e 159 $read_fh = $file_name ;
160 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
161 $size_left = $blk_size ;
635c7876 162 }
163 }
b3b7ff4e 164
165# see if we have a path we need to open
166
167 unless ( $read_fh ) {
635c7876 168
169# a regular file. set the sysopen mode
170
171 my $mode = O_RDONLY ;
635c7876 172
173#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
174
6ccd701d 175 $read_fh = local( *FH ) ;
176# $read_fh = gensym ;
635c7876 177 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
b3b7ff4e 178 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
635c7876 179 goto &_error ;
180 }
181
b3b7ff4e 182 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 183 binmode( $read_fh, $binmode ) ;
184 }
185
635c7876 186# get the size of the file for use in the read loop
187
188 $size_left = -s $read_fh ;
189
f9940db7 190#print "SIZE $size_left\n" ;
8ed110f9 191
f9940db7 192# we need a blk_size if the size is 0 so we can handle pseudofiles like in
193# /proc. these show as 0 size but have data to be slurped.
194
195 unless( $size_left ) {
196
b3b7ff4e 197 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
f9940db7 198 $size_left = $blk_size ;
199 }
e2c51d31 200 }
201
635c7876 202# infinite read loop. we exit when we are done slurping
203
204 while( 1 ) {
205
206# do the read and see how much we got
207
208 my $read_cnt = sysread( $read_fh, ${$buf_ref},
209 $size_left, length ${$buf_ref} ) ;
210
e2c51d31 211 unless ( defined $read_cnt ) {
212
b3b7ff4e 213 @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
e2c51d31 214 goto &_error ;
215 }
635c7876 216
217# good read. see if we hit EOF (nothing left to read)
218
e2c51d31 219 last if $read_cnt == 0 ;
635c7876 220
221# loop if we are slurping a handle. we don't track $size_left then.
222
e2c51d31 223 next if $blk_size ;
635c7876 224
225# count down how much we read and loop if we have more to read.
635c7876 226
e2c51d31 227 $size_left -= $read_cnt ;
228 last if $size_left <= 0 ;
635c7876 229 }
230
231# fix up cr/lf to be a newline if this is a windows text file
232
b3b7ff4e 233 ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
635c7876 234
635c7876 235 my $sep = $/ ;
236 $sep = '\n\n+' if defined $sep && $sep eq '' ;
237
b3b7ff4e 238# see if caller wants lines
239
240 if( wantarray || $opts->{'array_ref'} ) {
635c7876 241
6f9e0c69 242 use re 'taint' ;
635c7876 243
6f9e0c69 244 my @lines = length(${$buf_ref}) ?
245 ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
635c7876 246
6ccd701d 247 chomp @lines if $opts->{'chomp'} ;
248
6f9e0c69 249# caller wants an array ref
b3b7ff4e 250
6f9e0c69 251 return \@lines if $opts->{'array_ref'} ;
b3b7ff4e 252
6f9e0c69 253# caller wants list of lines
b3b7ff4e 254
b3b7ff4e 255 return @lines ;
256 }
635c7876 257
258# caller wants a scalar ref to the slurped text
259
b3b7ff4e 260 return $buf_ref if $opts->{'scalar_ref'} ;
635c7876 261
262# caller wants a scalar with the slurped text (normal scalar context)
263
264 return ${$buf_ref} if defined wantarray ;
265
266# caller passed in an i/o buffer by reference (normal void context)
267
268 return ;
269}
270
b3b7ff4e 271# errors in this sub are returned as scalar refs
272# a normal IO/GLOB handle is an empty return
273# an overloaded object returns its stringified as a scalarfilename
274
275sub _check_ref {
276
277 my( $handle ) = @_ ;
278
279# check if we are reading from a handle (GLOB or IO object)
280
281 if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
282
283# we have a handle. deal with seeking to it if it is DATA
284
285 my $err = _seek_data_handle( $handle ) ;
286
287# return the error string if any
288
289 return \$err if $err ;
290
291# we have good handle
292 return ;
293 }
294
295 eval { require overload } ;
296
297# return an error if we can't load the overload pragma
298# or if the object isn't overloaded
299
300 return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
301 if $@ || !overload::Overloaded( $handle ) ;
302
303# must be overloaded so return its stringified value
304
305 return "$handle" ;
306}
307
308sub _seek_data_handle {
309
310 my( $handle ) = @_ ;
311
312# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
313# glob/handle. only the DATA handle is untainted (since it is from
314# trusted data in the source file). this allows us to test if this is
315# the DATA handle and then to do a sysseek to make sure it gets
316# slurped correctly. on some systems, the buffered i/o pointer is not
317# left at the same place as the fd pointer. this sysseek makes them
318# the same so slurping with sysread will work.
319
320 eval{ require B } ;
321
322 if ( $@ ) {
323
324 return <<ERR ;
325Can't find B.pm with this Perl: $!.
326That module is needed to properly slurp the DATA handle.
327ERR
328 }
329
330 if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
331
332# set the seek position to the current tell.
333
334 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
335 return "read_file '$handle' - sysseek: $!" ;
336 }
337 }
338
339# seek was successful, return no error string
340
341 return ;
342}
343
344
635c7876 345sub write_file {
346
347 my $file_name = shift ;
348
349# get the optional argument hash ref from @_ or an empty hash ref.
350
b3b7ff4e 351 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
635c7876 352
353 my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
354
355# get the buffer ref - it depends on how the data is passed into write_file
356# after this if/else $buf_ref will have a scalar ref to the data.
357
b3b7ff4e 358 if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
635c7876 359
b3b7ff4e 360# a scalar ref passed in %opts has the data
635c7876 361# note that the data was passed by ref
362
b3b7ff4e 363 $buf_ref = $opts->{'buf_ref'} ;
635c7876 364 $data_is_ref = 1 ;
365 }
366 elsif ( ref $_[0] eq 'SCALAR' ) {
367
368# the first value in @_ is the scalar ref to the data
369# note that the data was passed by ref
370
371 $buf_ref = shift ;
372 $data_is_ref = 1 ;
373 }
374 elsif ( ref $_[0] eq 'ARRAY' ) {
375
376# the first value in @_ is the array ref to the data so join it.
377
378 ${$buf_ref} = join '', @{$_[0]} ;
379 }
380 else {
381
382# good old @_ has all the data so join it.
383
384 ${$buf_ref} = join '', @_ ;
385 }
386
b3b7ff4e 387# deal with ref for a file name
635c7876 388
389 if ( ref $file_name ) {
390
b3b7ff4e 391 my $ref_result = _check_ref( $file_name ) ;
392
393 if ( ref $ref_result ) {
635c7876 394
b3b7ff4e 395# we got an error, deal with it
396
397 @_ = ( $opts, $ref_result ) ;
398 goto &_error ;
399 }
400
401 if ( $ref_result ) {
402
403# we got an overloaded object and the result is the stringified value
404# use it as the file name
405
406 $file_name = $ref_result ;
407 }
408 else {
409
410# we now have a proper handle ref.
411# make sure we don't call truncate on it.
412
413 $write_fh = $file_name ;
414 $no_truncate = 1 ;
415 }
635c7876 416 }
b3b7ff4e 417
418# see if we have a path we need to open
419
420 unless( $write_fh ) {
635c7876 421
422# spew to regular file.
423
b3b7ff4e 424 if ( $opts->{'atomic'} ) {
635c7876 425
426# in atomic mode, we spew to a temp file so make one and save the original
427# file name.
428 $orig_file_name = $file_name ;
429 $file_name .= ".$$" ;
430 }
431
432# set the mode for the sysopen
433
434 my $mode = O_WRONLY | O_CREAT ;
b3b7ff4e 435 $mode |= O_APPEND if $opts->{'append'} ;
436 $mode |= O_EXCL if $opts->{'no_clobber'} ;
635c7876 437
b3b7ff4e 438 my $perms = $opts->{perms} ;
f02156f2 439 $perms = 0666 unless defined $perms ;
440
635c7876 441#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
442
443# open the file and handle any error.
444
6ccd701d 445 $write_fh = local( *FH ) ;
446# $write_fh = gensym ;
f02156f2 447 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
b3b7ff4e 448
449 @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
635c7876 450 goto &_error ;
451 }
452 }
453
b3b7ff4e 454 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 455 binmode( $write_fh, $binmode ) ;
456 }
457
b3b7ff4e 458 sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
635c7876 459
635c7876 460#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
461
462# fix up newline to write cr/lf if this is a windows text file
463
b3b7ff4e 464 if ( $is_win32 && !$opts->{'binmode'} ) {
635c7876 465
466# copy the write data if it was passed by ref so we don't clobber the
467# caller's data
468 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
469 ${$buf_ref} =~ s/\n/\015\012/g ;
470 }
471
472#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
473
474# get the size of how much we are writing and init the offset into that buffer
475
476 my $size_left = length( ${$buf_ref} ) ;
477 my $offset = 0 ;
478
479# loop until we have no more data left to write
480
481 do {
482
483# do the write and track how much we just wrote
484
485 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
486 $size_left, $offset ) ;
487
488 unless ( defined $write_cnt ) {
489
b3b7ff4e 490 @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
635c7876 491 goto &_error ;
492 }
493
6f9e0c69 494# track how much left to write and where to write from in the buffer
635c7876 495
496 $size_left -= $write_cnt ;
497 $offset += $write_cnt ;
498
499 } while( $size_left > 0 ) ;
500
501# we truncate regular files in case we overwrite a long file with a shorter file
502# so seek to the current position to get it (same as tell()).
503
504 truncate( $write_fh,
505 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
506
507 close( $write_fh ) ;
508
509# handle the atomic mode - move the temp file to the original filename.
510
b3b7ff4e 511 if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
e2c51d31 512
b3b7ff4e 513 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
e2c51d31 514 goto &_error ;
515 }
635c7876 516
517 return 1 ;
518}
519
520# this is for backwards compatibility with the previous File::Slurp module.
521# write_file always overwrites an existing file
522
523*overwrite_file = \&write_file ;
524
525# the current write_file has an append mode so we use that. this
526# supports the same API with an optional second argument which is a
527# hash ref of options.
528
529sub append_file {
530
b3b7ff4e 531# get the optional opts hash ref
532 my $opts = $_[1] ;
533 if ( ref $opts eq 'HASH' ) {
635c7876 534
b3b7ff4e 535# we were passed an opts ref so just mark the append mode
635c7876 536
b3b7ff4e 537 $opts->{append} = 1 ;
635c7876 538 }
539 else {
540
b3b7ff4e 541# no opts hash so insert one with the append mode
635c7876 542
543 splice( @_, 1, 0, { append => 1 } ) ;
544 }
545
546# magic goto the main write_file sub. this overlays the sub without touching
547# the stack or @_
548
549 goto &write_file
550}
551
b3b7ff4e 552# prepend data to the beginning of a file
553
554sub prepend_file {
555
556 my $file_name = shift ;
557
558#print "FILE $file_name\n" ;
559
560 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
561
562# delete unsupported options
563
564 my @bad_opts =
565 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
566
567 delete @{$opts}{@bad_opts} ;
568
569 my $prepend_data = shift ;
570 $prepend_data = '' unless defined $prepend_data ;
571 $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
572
573#print "PRE [$prepend_data]\n" ;
574
b3b7ff4e 575 my $err_mode = delete $opts->{err_mode} ;
576 $opts->{ err_mode } = 'croak' ;
577 $opts->{ scalar_ref } = 1 ;
578
6f9e0c69 579 my $existing_data = eval { read_file( $file_name, $opts ) } ;
b3b7ff4e 580
581 if ( $@ ) {
582
583 @_ = ( { err_mode => $err_mode },
584 "prepend_file '$file_name' - read_file: $!" ) ;
585 goto &_error ;
586 }
587
588#print "EXIST [$$existing_data]\n" ;
589
6f9e0c69 590 $opts->{atomic} = 1 ;
591 my $write_result =
592 eval { write_file( $file_name, $opts,
593 $prepend_data, $$existing_data ) ;
b3b7ff4e 594 } ;
595
596 if ( $@ ) {
597
598 @_ = ( { err_mode => $err_mode },
599 "prepend_file '$file_name' - write_file: $!" ) ;
600 goto &_error ;
601 }
602
603 return $write_result ;
604}
605
6f9e0c69 606# edit a file as a scalar in $_
607
608sub edit_file(&$;$) {
609
610 my( $edit_code, $file_name, $opts ) = @_ ;
611 $opts = {} unless ref $opts eq 'HASH' ;
612
613# my $edit_code = shift ;
614# my $file_name = shift ;
615# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
616
617#print "FILE $file_name\n" ;
618
619# delete unsupported options
620
621 my @bad_opts =
622 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
623
624 delete @{$opts}{@bad_opts} ;
625
626# keep the user err_mode and force croaking on internal errors
627
628 my $err_mode = delete $opts->{err_mode} ;
629 $opts->{ err_mode } = 'croak' ;
630
631# get a scalar ref for speed and slurp the file into a scalar
632
633 $opts->{ scalar_ref } = 1 ;
634 my $existing_data = eval { read_file( $file_name, $opts ) } ;
635
636 if ( $@ ) {
637
638 @_ = ( { err_mode => $err_mode },
639 "edit_file '$file_name' - read_file: $!" ) ;
640 goto &_error ;
641 }
642
643#print "EXIST [$$existing_data]\n" ;
644
645 my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
646
647 $opts->{atomic} = 1 ;
648 my $write_result =
649 eval { write_file( $file_name, $opts, $edited_data ) } ;
650
651 if ( $@ ) {
652
653 @_ = ( { err_mode => $err_mode },
654 "edit_file '$file_name' - write_file: $!" ) ;
655 goto &_error ;
656 }
657
658 return $write_result ;
659}
660
661sub edit_file_lines(&$;$) {
662
663 my( $edit_code, $file_name, $opts ) = @_ ;
664 $opts = {} unless ref $opts eq 'HASH' ;
665
666# my $edit_code = shift ;
667# my $file_name = shift ;
668# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
669
670#print "FILE $file_name\n" ;
671
672# delete unsupported options
673
674 my @bad_opts =
675 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
676
677 delete @{$opts}{@bad_opts} ;
678
679# keep the user err_mode and force croaking on internal errors
680
681 my $err_mode = delete $opts->{err_mode} ;
682 $opts->{ err_mode } = 'croak' ;
683
684# get an array ref for speed and slurp the file into lines
685
686 $opts->{ array_ref } = 1 ;
687 my $existing_data = eval { read_file( $file_name, $opts ) } ;
688
689 if ( $@ ) {
690
691 @_ = ( { err_mode => $err_mode },
692 "edit_file_lines '$file_name' - read_file: $!" ) ;
693 goto &_error ;
694 }
695
696#print "EXIST [$$existing_data]\n" ;
697
698 my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
699
700 $opts->{atomic} = 1 ;
701 my $write_result =
702 eval { write_file( $file_name, $opts, @edited_data ) } ;
703
704 if ( $@ ) {
705
706 @_ = ( { err_mode => $err_mode },
707 "edit_file_lines '$file_name' - write_file: $!" ) ;
708 goto &_error ;
709 }
710
711 return $write_result ;
712}
713
714# basic wrapper around opendir/readdir
715
635c7876 716sub read_dir {
717
b3b7ff4e 718 my $dir = shift ;
719 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 720
721# this handle will be destroyed upon return
722
723 local(*DIRH);
724
725# open the dir and handle any errors
726
727 unless ( opendir( DIRH, $dir ) ) {
728
b3b7ff4e 729 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
635c7876 730 goto &_error ;
731 }
732
733 my @dir_entries = readdir(DIRH) ;
734
735 @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
b3b7ff4e 736 unless $opts->{'keep_dot_dot'} ;
635c7876 737
6ccd701d 738 if ( $opts->{'prefix'} ) {
739
740 substr( $_, 0, 0, "$dir/" ) for @dir_entries ;
741 }
742
635c7876 743 return @dir_entries if wantarray ;
744 return \@dir_entries ;
745}
746
747# error handling section
748#
749# all the error handling uses magic goto so the caller will get the
750# error message as if from their code and not this module. if we just
751# did a call on the error code, the carp/croak would report it from
752# this module since the error sub is one level down on the call stack
753# from read_file/write_file/read_dir.
754
755
756my %err_func = (
757 'carp' => \&carp,
758 'croak' => \&croak,
759) ;
760
761sub _error {
762
b3b7ff4e 763 my( $opts, $err_msg ) = @_ ;
635c7876 764
765# get the error function to use
766
b3b7ff4e 767 my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
635c7876 768
769# if we didn't find it in our error function hash, they must have set
770# it to quiet and we don't do anything.
771
772 return unless $func ;
773
774# call the carp/croak function
775
f02156f2 776 $func->($err_msg) if $func ;
635c7876 777
778# return a hard undef (in list context this will be a single value of
779# undef which is not a legal in-band value)
780
781 return undef ;
782}
783
7841;
785__END__
786
787=head1 NAME
788
6f9e0c69 789File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
635c7876 790
791=head1 SYNOPSIS
792
793 use File::Slurp;
794
b3b7ff4e 795# read in a whole file into a scalar
635c7876 796 my $text = read_file( 'filename' ) ;
b3b7ff4e 797
798# read in a whole file into an array of lines
635c7876 799 my @lines = read_file( 'filename' ) ;
800
b3b7ff4e 801# write out a whole file from a scalar
b3b7ff4e 802 write_file( 'filename', $text ) ;
803
804# write out a whole file from an array of lines
635c7876 805 write_file( 'filename', @lines ) ;
806
b3b7ff4e 807# Here is a simple and fast way to load and save a simple config file
808# made of key=value lines.
b3b7ff4e 809 my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
810 write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
635c7876 811
6f9e0c69 812# insert text at the beginning of a file
6f9e0c69 813 prepend_file( 'filename', $text ) ;
814
6ccd701d 815# in-place edit to replace all 'foo' with 'bar' in file
816 edit_file { s/foo/bar/g } 'filename' ;
635c7876 817
6ccd701d 818# in-place edit to delete all lines with 'foo' from file
819 edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
820
821# read in a whole directory of file names (skipping . and ..)
b3b7ff4e 822 my @files = read_dir( '/path/to/dir' ) ;
635c7876 823
824=head1 DESCRIPTION
825
826This module provides subs that allow you to read or write entire files
827with one simple call. They are designed to be simple to use, have
828flexible ways to pass in or get the file contents and to be very
829efficient. There is also a sub to read in all the files in a
830directory other than C<.> and C<..>
831
b3b7ff4e 832These slurp/spew subs work for files, pipes and sockets, stdio,
833pseudo-files, and the DATA handle. Read more about why slurping files is
834a good thing in the file 'slurp_article.pod' in the extras/ directory.
835
836If you are interested in how fast these calls work, check out the
837slurp_bench.pl program in the extras/ directory. It compares many
838different forms of slurping. You can select the I/O direction, context
839and file sizes. Use the --help option to see how to run it.
635c7876 840
841=head2 B<read_file>
842
843This sub reads in an entire file and returns its contents to the
b3b7ff4e 844caller. In scalar context it returns the entire file as a single
845scalar. In list context it will return a list of lines (using the
635c7876 846current value of $/ as the separator including support for paragraph
b3b7ff4e 847mode when it is set to '').
635c7876 848
849 my $text = read_file( 'filename' ) ;
b3b7ff4e 850 my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
635c7876 851 my @lines = read_file( 'filename' ) ;
b3b7ff4e 852 my $lines = read_file( 'filename', array_ref => 1 ) ;
635c7876 853
b3b7ff4e 854The first argument is the file to slurp in. If the next argument is a
855hash reference, then it is used as the options. Otherwise the rest of
856the argument list are is used as key/value options.
635c7876 857
b3b7ff4e 858If the file argument is a handle (if it is a ref and is an IO or GLOB
859object), then that handle is slurped in. This mode is supported so you
860slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
861an example that does C<open( '-|' )> and the child process spews data
635c7876 862to the parant which slurps it in. All of the options that control how
863the data is returned to the caller still work in this case.
864
b3b7ff4e 865If the first argument is an overloaded object then its stringified value
866is used for the filename and that file is opened. This is a new feature
867in 9999.14. See the stringify.t test for an example.
868
869By default C<read_file> returns an undef in scalar contex or a single
870undef in list context if it encounters an error. Those are both
871impossible to get with a clean read_file call which means you can check
872the return value and always know if you had an error. You can change how
873errors are handled with the C<err_mode> option.
874
6f9e0c69 875Speed Note: If you call read_file and just get a scalar return value
876it is now optimized to handle shorter files. This is only used if no
877options are used, the file is shorter then 100k bytes, the filename is
878a plain scalar and a scalar file is returned. If you want the fastest
879slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
880
635c7876 881NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
882handle. It used to need a sysseek workaround but that is now handled
883when needed by the module itself.
884
885You can optionally request that C<slurp()> is exported to your code. This
886is an alias for read_file and is meant to be forward compatible with
887Perl 6 (which will have slurp() built-in).
888
b3b7ff4e 889The options for C<read_file> are:
635c7876 890
891=head3 binmode
892
b3b7ff4e 893If you set the binmode option, then its value is passed to a call to
894binmode on the opened handle. You can use this to set the file to be
895read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 896
897 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
9aab46ab 898 my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
635c7876 899
900=head3 array_ref
901
902If this boolean option is set, the return value (only in scalar
903context) will be an array reference which contains the lines of the
904slurped file. The following two calls are equivalent:
905
906 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
907 my $lines_ref = [ read_file( $bin_file ) ] ;
908
6ccd701d 909=head3 chomp
910
911If this boolean option is set, the lines are chomped. This only
912happens if you are slurping in a list context or using the
913C<array_ref> option.
914
635c7876 915=head3 scalar_ref
916
6f9e0c69 917If this boolean option is set, the return value (only in scalar
918context) will be an scalar reference to a string which is the contents
919of the slurped file. This will usually be faster than returning the
920plain scalar. It will also save memory as it will not make a copy of
921the file to return. Run the extras/slurp_bench.pl script to see speed
922comparisons.
635c7876 923
924 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
925
926=head3 buf_ref
927
928You can use this option to pass in a scalar reference and the slurped
929file contents will be stored in the scalar. This can be used in
f02156f2 930conjunction with any of the other options. This saves an extra copy of
6f9e0c69 931the slurped file and can lower ram usage vs returning the file. It is
932usually the fastest way to read a file into a scalar. Run the
933extras/slurp_bench.pl script to see speed comparisons.
934
635c7876 935
b3b7ff4e 936 read_file( $bin_file, buf_ref => \$buffer ) ;
635c7876 937
938=head3 blk_size
939
b3b7ff4e 940You can use this option to set the block size used when slurping from
941an already open handle (like \*STDIN). It defaults to 1MB.
635c7876 942
943 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
944 array_ref => 1 ) ;
945
946=head3 err_mode
947
948You can use this option to control how read_file behaves when an error
b3b7ff4e 949occurs. This option defaults to 'croak'. You can set it to 'carp' or to
950'quiet to have no special error handling. This code wants to carp and
951then read another file if it fails.
635c7876 952
953 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
954 unless ( $text_ref ) {
955
956 # read a different file but croak if not found
957 $text_ref = read_file( $another_file ) ;
958 }
959
960 # process ${$text_ref}
961
962=head2 B<write_file>
963
964This sub writes out an entire file in one call.
965
966 write_file( 'filename', @data ) ;
967
968The first argument to C<write_file> is the filename. The next argument
969is an optional hash reference and it contains key/values that can
970modify the behavior of C<write_file>. The rest of the argument list is
971the data to be written to the file.
972
973 write_file( 'filename', {append => 1 }, @data ) ;
b3b7ff4e 974 write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
975
976As a shortcut if the first data argument is a scalar or array reference,
977it is used as the only data to be written to the file. Any following
978arguments in @_ are ignored. This is a faster way to pass in the output
979to be written to the file and is equivalent to the C<buf_ref> option of
980C<read_file>. These following pairs are equivalent but the pass by
981reference call will be faster in most cases (especially with larger
635c7876 982files).
983
984 write_file( 'filename', \$buffer ) ;
985 write_file( 'filename', $buffer ) ;
986
987 write_file( 'filename', \@lines ) ;
988 write_file( 'filename', @lines ) ;
989
b3b7ff4e 990If the first argument is a handle (if it is a ref and is an IO or GLOB
991object), then that handle is written to. This mode is supported so you
992spew to handles such as \*STDOUT. See the test handle.t for an example
993that does C<open( '-|' )> and child process spews data to the parent
994which slurps it in. All of the options that control how the data are
995passed into C<write_file> still work in this case.
996
997If the first argument is an overloaded object then its stringified value
998is used for the filename and that file is opened. This is new feature
999in 9999.14. See the stringify.t test for an example.
635c7876 1000
b3b7ff4e 1001By default C<write_file> returns 1 upon successfully writing the file or
1002undef if it encountered an error. You can change how errors are handled
1003with the C<err_mode> option.
635c7876 1004
1005The options are:
1006
1007=head3 binmode
1008
b3b7ff4e 1009If you set the binmode option, then its value is passed to a call to
1010binmode on the opened handle. You can use this to set the file to be
1011read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 1012
1013 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
b3b7ff4e 1014 write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1015
1016=head3 perms
635c7876 1017
b3b7ff4e 1018The perms option sets the permissions of newly-created files. This value
1019is modified by your process's umask and defaults to 0666 (same as
1020sysopen).
1021
1022NOTE: this option is new as of File::Slurp version 9999.14;
635c7876 1023
1024=head3 buf_ref
1025
1026You can use this option to pass in a scalar reference which has the
1027data to be written. If this is set then any data arguments (including
1028the scalar reference shortcut) in @_ will be ignored. These are
b3b7ff4e 1029equivalent:
635c7876 1030
1031 write_file( $bin_file, { buf_ref => \$buffer } ) ;
1032 write_file( $bin_file, \$buffer ) ;
1033 write_file( $bin_file, $buffer ) ;
1034
1035=head3 atomic
1036
1037If you set this boolean option, the file will be written to in an
1038atomic fashion. A temporary file name is created by appending the pid
1039($$) to the file name argument and that file is spewed to. After the
1040file is closed it is renamed to the original file name (and rename is
1041an atomic operation on most OS's). If the program using this were to
1042crash in the middle of this, then the file with the pid suffix could
1043be left behind.
1044
1045=head3 append
1046
1047If you set this boolean option, the data will be written at the end of
f02156f2 1048the current file. Internally this sets the sysopen mode flag O_APPEND.
635c7876 1049
1050 write_file( $file, {append => 1}, @data ) ;
1051
b3b7ff4e 1052 You
1053can import append_file and it does the same thing.
635c7876 1054
1055=head3 no_clobber
1056
1057If you set this boolean option, an existing file will not be overwritten.
1058
1059 write_file( $file, {no_clobber => 1}, @data ) ;
1060
1061=head3 err_mode
1062
1063You can use this option to control how C<write_file> behaves when an
1064error occurs. This option defaults to 'croak'. You can set it to
1065'carp' or to 'quiet' to have no error handling other than the return
1066value. If the first call to C<write_file> fails it will carp and then
1067write to another file. If the second call to C<write_file> fails, it
1068will croak.
1069
1070 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1071
1072 # write a different file but croak if not found
1073 write_file( $other_file, \$data ) ;
1074 }
1075
1076=head2 overwrite_file
1077
1078This sub is just a typeglob alias to write_file since write_file
1079always overwrites an existing file. This sub is supported for
1080backwards compatibility with the original version of this module. See
1081write_file for its API and behavior.
1082
1083=head2 append_file
1084
1085This sub will write its data to the end of the file. It is a wrapper
1086around write_file and it has the same API so see that for the full
b3b7ff4e 1087documentation. These calls are equivalent:
635c7876 1088
1089 append_file( $file, @data ) ;
1090 write_file( $file, {append => 1}, @data ) ;
1091
b3b7ff4e 1092
1093=head2 prepend_file
1094
1095This sub writes data to the beginning of a file. The previously existing
1096data is written after that so the effect is prepending data in front of
1097a file. It is a counterpart to the append_file sub in this module. It
1098works by first using C<read_file> to slurp in the file and then calling
1099C<write_file> with the new data and the existing file data.
1100
1101The first argument to C<prepend_file> is the filename. The next argument
1102is an optional hash reference and it contains key/values that can modify
1103the behavior of C<prepend_file>. The rest of the argument list is the
1104data to be written to the file and that is passed to C<write_file> as is
1105(see that for allowed data).
1106
1107Only the C<binmode> and C<err_mode> options are supported. The
1108C<write_file> call has the C<atomic> option set so you will always have
1109a consistant file. See above for more about those options.
1110
1111C<prepend_file> is not exported by default, you need to import it
1112explicitly.
1113
1114 use File::Slurp qw( prepend_file ) ;
1115 prepend_file( $file, $header ) ;
1116 prepend_file( $file, \@lines ) ;
1117 prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1118
6ccd701d 1119
1120=head2 edit_file, edit_file_lines
1121
1122These subs read in a file into $_, execute a code block which should
1123modify $_ and then write $_ back to the file. The difference between
1124them is that C<edit_file> reads the whole file into $_ and calls the
1125code block one time. With C<edit_file_lines> each line is read into $_
1126and the code is called for each line. In both cases the code should
1127modify $_ if desired and it will be written back out. These subs are
1128the equivilent of the -pi command line options of Perl but you can
1129call them from inside your program and not fork out a process.
1130
1131The first argument to C<edit_file> and C<edit_file_lines> is a code
1132block or a code reference. The code block is not followed by a comma
1133(as with grep and map) but a code reference is followed by a
1134comma. See the examples below for both styles. The next argument is
1135the filename. The last argument is an optional hash reference and it
1136contains key/values that can modify the behavior of
1137C<prepend_file>.
1138
1139Only the C<binmode> and C<err_mode> options are supported. The
1140C<write_file> call has the C<atomic> option set so you will always
1141have a consistant file. See above for more about those options.
1142
1143Each group of calls below show a Perl command line instance and the
1144equivilent calls to C<edit_file> and C<edit_file_lines>.
1145
1146 perl -0777 -pi -e 's/foo/bar/g' filename
1147 use File::Slurp ;
1148 edit_file { s/foo/bar/g } 'filename' ;
1149 edit_file sub { s/foo/bar/g }, 'filename' ;
1150 edit_file \&replace_foo, 'filename' ;
1151 sub replace_foo { s/foo/bar/g }
1152
1153 perl -pi -e '$_ = '' if /foo/' filename
1154 use File::Slurp ;
1155 edit_file_lines { $_ = '' if /foo/ } 'filename' ;
1156 edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
1157 edit_file \&delete_foo, 'filename' ;
1158 sub delete_foo { $_ = '' if /foo/ }
1159
635c7876 1160=head2 read_dir
1161
1162This sub reads all the file names from directory and returns them to
1163the caller but C<.> and C<..> are removed by default.
1164
1165 my @files = read_dir( '/path/to/dir' ) ;
1166
b3b7ff4e 1167The first argument is the path to the directory to read. If the next
1168argument is a hash reference, then it is used as the options.
1169Otherwise the rest of the argument list are is used as key/value
1170options.
635c7876 1171
b3b7ff4e 1172In list context C<read_dir> returns a list of the entries in the
635c7876 1173directory. In a scalar context it returns an array reference which has
1174the entries.
1175
b3b7ff4e 1176=head3 err_mode
1177
1178If the C<err_mode> option is set, it selects how errors are handled (see
1179C<err_mode> in C<read_file> or C<write_file>).
1180
635c7876 1181=head3 keep_dot_dot
1182
1183If this boolean option is set, C<.> and C<..> are not removed from the
1184list of files.
1185
1186 my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1187
6ccd701d 1188=head3 prefix
1189
1190If this boolean option is set, the string "$dir/" is prefixed to each
1191dir entry. This means you can directly use the results to open
1192files. A common newbie mistake is not putting the directory in front
1193of entries when opening themn.
1194
1195 my @paths = read_dir( '/path/to/dir', prefix => 1 ) ;
1196
635c7876 1197=head2 EXPORT
1198
1199 read_file write_file overwrite_file append_file read_dir
1200
f02156f2 1201=head2 LICENSE
1202
1203 Same as Perl.
1204
635c7876 1205=head2 SEE ALSO
1206
1207An article on file slurping in extras/slurp_article.pod. There is
1208also a benchmarking script in extras/slurp_bench.pl.
1209
1210=head2 BUGS
1211
1212If run under Perl 5.004, slurping from the DATA handle will fail as
1213that requires B.pm which didn't get into core until 5.005.
1214
1215=head1 AUTHOR
1216
b3b7ff4e 1217Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
635c7876 1218
1219=cut