3 new versions checked in finally.
[urisagit/File-Slurp.git] / lib / File / Slurp.pm
CommitLineData
635c7876 1package File::Slurp;
2
7e284d1c 3use 5.006002 ;
e2c51d31 4
635c7876 5use strict;
6ccd701d 6use warnings ;
635c7876 7
8use Carp ;
b3b7ff4e 9use Exporter ;
635c7876 10use Fcntl qw( :DEFAULT ) ;
e2c51d31 11use POSIX qw( :fcntl_h ) ;
7e284d1c 12use Errno ;
6ccd701d 13#use Symbol ;
635c7876 14
6f9e0c69 15use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
b3b7ff4e 16@ISA = qw( Exporter ) ;
e2c51d31 17
7e284d1c 18$VERSION = '9999.20';
6f9e0c69 19
7e284d1c 20my @std_export = qw(
8a1e3264 21 read_file
22 write_file
23 overwrite_file
24 append_file
7e284d1c 25 read_dir
26) ;
27
28my @edit_export = qw(
29 edit_file
30 edit_file_lines
31) ;
32
33my @ok_export = qw(
34) ;
35
36my @abbrev_export = qw(
37 rf
38 wf
39 ef
40 efl
41) ;
42
43@EXPORT_OK = (
44 @edit_export,
45 @abbrev_export,
46 qw(
47 slurp
48 prepend_file
49 ),
50) ;
51
52%EXPORT_TAGS = (
53 'all' => [ @std_export, @edit_export, @abbrev_export, @EXPORT_OK ],
54 'edit' => [ @edit_export ],
55 'std' => [ @std_export ],
56 'abr' => [ @abbrev_export ],
57) ;
58
59@EXPORT = @std_export ;
e2c51d31 60
b3b7ff4e 61my $max_fast_slurp_size = 1024 * 100 ;
9aab46ab 62
635c7876 63my $is_win32 = $^O =~ /win32/i ;
64
65# Install subs for various constants that aren't set in older perls
66# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
67# () prototype These can't be overridden with the constant pragma or
68# we get a prototype mismatch. Hence this less than aesthetically
69# appealing BEGIN block:
70
71BEGIN {
8ed110f9 72 unless( defined &SEEK_SET ) {
635c7876 73 *SEEK_SET = sub { 0 };
74 *SEEK_CUR = sub { 1 };
75 *SEEK_END = sub { 2 };
76 }
77
8ed110f9 78 unless( defined &O_BINARY ) {
635c7876 79 *O_BINARY = sub { 0 };
80 *O_RDONLY = sub { 0 };
81 *O_WRONLY = sub { 1 };
82 }
83
f02156f2 84 unless ( defined &O_APPEND ) {
635c7876 85
86 if ( $^O =~ /olaris/ ) {
87 *O_APPEND = sub { 8 };
88 *O_CREAT = sub { 256 };
89 *O_EXCL = sub { 1024 };
90 }
91 elsif ( $^O =~ /inux/ ) {
92 *O_APPEND = sub { 1024 };
93 *O_CREAT = sub { 64 };
94 *O_EXCL = sub { 128 };
95 }
96 elsif ( $^O =~ /BSD/i ) {
97 *O_APPEND = sub { 8 };
98 *O_CREAT = sub { 512 };
99 *O_EXCL = sub { 2048 };
100 }
101 }
102}
103
104# print "OS [$^O]\n" ;
105
106# print "O_BINARY = ", O_BINARY(), "\n" ;
107# print "O_RDONLY = ", O_RDONLY(), "\n" ;
108# print "O_WRONLY = ", O_WRONLY(), "\n" ;
109# print "O_APPEND = ", O_APPEND(), "\n" ;
110# print "O_CREAT ", O_CREAT(), "\n" ;
111# print "O_EXCL ", O_EXCL(), "\n" ;
112
635c7876 113
114*slurp = \&read_file ;
7e284d1c 115*rf = \&read_file ;
635c7876 116
117sub read_file {
118
b3b7ff4e 119 my $file_name = shift ;
120 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 121
6f9e0c69 122# this is the optimized read_file for shorter files.
123# the test for -s > 0 is to allow pseudo files to be read with the
124# regular loop since they return a size of 0.
125
126 if ( !ref $file_name && -e $file_name && -s _ > 0 &&
127 -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
e2c51d31 128
e2c51d31 129
6f9e0c69 130 my $fh ;
131 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
e2c51d31 132
b3b7ff4e 133 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
e2c51d31 134 goto &_error ;
135 }
136
6f9e0c69 137 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
e2c51d31 138
139 unless ( defined $read_cnt ) {
140
b3b7ff4e 141 @_ = ( $opts,
8ed110f9 142 "read_file '$file_name' - small sysread: $!");
e2c51d31 143 goto &_error ;
144 }
145
6f9e0c69 146 $buf =~ s/\015\012/\n/g if $is_win32 ;
e2c51d31 147 return $buf ;
148 }
149
635c7876 150# set the buffer to either the passed in one or ours and init it to the null
151# string
152
153 my $buf ;
b3b7ff4e 154 my $buf_ref = $opts->{'buf_ref'} || \$buf ;
635c7876 155 ${$buf_ref} = '' ;
156
157 my( $read_fh, $size_left, $blk_size ) ;
158
b3b7ff4e 159# deal with ref for a file name
160# it could be an open handle or an overloaded object
635c7876 161
162 if ( ref $file_name ) {
163
b3b7ff4e 164 my $ref_result = _check_ref( $file_name ) ;
635c7876 165
b3b7ff4e 166 if ( ref $ref_result ) {
635c7876 167
b3b7ff4e 168# we got an error, deal with it
635c7876 169
b3b7ff4e 170 @_ = ( $opts, $ref_result ) ;
635c7876 171 goto &_error ;
172 }
173
b3b7ff4e 174 if ( $ref_result ) {
635c7876 175
b3b7ff4e 176# we got an overloaded object and the result is the stringified value
177# use it as the file name
178
179 $file_name = $ref_result ;
180 }
181 else {
182
183# here we have just an open handle. set $read_fh so we don't do a sysopen
635c7876 184
b3b7ff4e 185 $read_fh = $file_name ;
186 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
187 $size_left = $blk_size ;
635c7876 188 }
189 }
b3b7ff4e 190
191# see if we have a path we need to open
192
193 unless ( $read_fh ) {
635c7876 194
195# a regular file. set the sysopen mode
196
197 my $mode = O_RDONLY ;
635c7876 198
199#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
200
6ccd701d 201 $read_fh = local( *FH ) ;
202# $read_fh = gensym ;
635c7876 203 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
b3b7ff4e 204 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
635c7876 205 goto &_error ;
206 }
207
b3b7ff4e 208 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 209 binmode( $read_fh, $binmode ) ;
210 }
211
635c7876 212# get the size of the file for use in the read loop
213
214 $size_left = -s $read_fh ;
215
f9940db7 216#print "SIZE $size_left\n" ;
8ed110f9 217
f9940db7 218# we need a blk_size if the size is 0 so we can handle pseudofiles like in
219# /proc. these show as 0 size but have data to be slurped.
220
221 unless( $size_left ) {
222
b3b7ff4e 223 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
f9940db7 224 $size_left = $blk_size ;
225 }
e2c51d31 226 }
227
635c7876 228# infinite read loop. we exit when we are done slurping
229
230 while( 1 ) {
231
232# do the read and see how much we got
233
234 my $read_cnt = sysread( $read_fh, ${$buf_ref},
235 $size_left, length ${$buf_ref} ) ;
236
7e284d1c 237# since we're using sysread Perl won't automatically restart the call
238# when interrupted by a signal.
239
240 next if $!{EINTR};
241
e2c51d31 242 unless ( defined $read_cnt ) {
243
b3b7ff4e 244 @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
e2c51d31 245 goto &_error ;
246 }
635c7876 247
248# good read. see if we hit EOF (nothing left to read)
249
e2c51d31 250 last if $read_cnt == 0 ;
635c7876 251
252# loop if we are slurping a handle. we don't track $size_left then.
253
e2c51d31 254 next if $blk_size ;
635c7876 255
256# count down how much we read and loop if we have more to read.
635c7876 257
e2c51d31 258 $size_left -= $read_cnt ;
259 last if $size_left <= 0 ;
635c7876 260 }
261
262# fix up cr/lf to be a newline if this is a windows text file
263
b3b7ff4e 264 ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
635c7876 265
635c7876 266 my $sep = $/ ;
267 $sep = '\n\n+' if defined $sep && $sep eq '' ;
268
b3b7ff4e 269# see if caller wants lines
270
271 if( wantarray || $opts->{'array_ref'} ) {
635c7876 272
6f9e0c69 273 use re 'taint' ;
635c7876 274
6f9e0c69 275 my @lines = length(${$buf_ref}) ?
276 ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
635c7876 277
6ccd701d 278 chomp @lines if $opts->{'chomp'} ;
279
6f9e0c69 280# caller wants an array ref
b3b7ff4e 281
6f9e0c69 282 return \@lines if $opts->{'array_ref'} ;
b3b7ff4e 283
6f9e0c69 284# caller wants list of lines
b3b7ff4e 285
b3b7ff4e 286 return @lines ;
287 }
635c7876 288
289# caller wants a scalar ref to the slurped text
290
b3b7ff4e 291 return $buf_ref if $opts->{'scalar_ref'} ;
635c7876 292
293# caller wants a scalar with the slurped text (normal scalar context)
294
295 return ${$buf_ref} if defined wantarray ;
296
297# caller passed in an i/o buffer by reference (normal void context)
298
299 return ;
300}
301
b3b7ff4e 302# errors in this sub are returned as scalar refs
303# a normal IO/GLOB handle is an empty return
304# an overloaded object returns its stringified as a scalarfilename
305
306sub _check_ref {
307
308 my( $handle ) = @_ ;
309
310# check if we are reading from a handle (GLOB or IO object)
311
312 if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
313
314# we have a handle. deal with seeking to it if it is DATA
315
316 my $err = _seek_data_handle( $handle ) ;
317
318# return the error string if any
319
320 return \$err if $err ;
321
322# we have good handle
323 return ;
324 }
325
326 eval { require overload } ;
327
328# return an error if we can't load the overload pragma
329# or if the object isn't overloaded
330
331 return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
332 if $@ || !overload::Overloaded( $handle ) ;
333
334# must be overloaded so return its stringified value
335
336 return "$handle" ;
337}
338
339sub _seek_data_handle {
340
341 my( $handle ) = @_ ;
342
343# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
344# glob/handle. only the DATA handle is untainted (since it is from
345# trusted data in the source file). this allows us to test if this is
346# the DATA handle and then to do a sysseek to make sure it gets
347# slurped correctly. on some systems, the buffered i/o pointer is not
348# left at the same place as the fd pointer. this sysseek makes them
349# the same so slurping with sysread will work.
350
351 eval{ require B } ;
352
353 if ( $@ ) {
354
355 return <<ERR ;
356Can't find B.pm with this Perl: $!.
357That module is needed to properly slurp the DATA handle.
358ERR
359 }
360
361 if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
362
363# set the seek position to the current tell.
364
365 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
366 return "read_file '$handle' - sysseek: $!" ;
367 }
368 }
369
370# seek was successful, return no error string
371
372 return ;
373}
374
375
7e284d1c 376*wf = \&write_file ;
377
635c7876 378sub write_file {
379
380 my $file_name = shift ;
381
382# get the optional argument hash ref from @_ or an empty hash ref.
383
b3b7ff4e 384 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
635c7876 385
386 my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
387
388# get the buffer ref - it depends on how the data is passed into write_file
389# after this if/else $buf_ref will have a scalar ref to the data.
390
b3b7ff4e 391 if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
635c7876 392
b3b7ff4e 393# a scalar ref passed in %opts has the data
635c7876 394# note that the data was passed by ref
395
b3b7ff4e 396 $buf_ref = $opts->{'buf_ref'} ;
635c7876 397 $data_is_ref = 1 ;
398 }
399 elsif ( ref $_[0] eq 'SCALAR' ) {
400
401# the first value in @_ is the scalar ref to the data
402# note that the data was passed by ref
403
404 $buf_ref = shift ;
405 $data_is_ref = 1 ;
406 }
407 elsif ( ref $_[0] eq 'ARRAY' ) {
408
409# the first value in @_ is the array ref to the data so join it.
410
411 ${$buf_ref} = join '', @{$_[0]} ;
412 }
413 else {
414
415# good old @_ has all the data so join it.
416
417 ${$buf_ref} = join '', @_ ;
418 }
419
b3b7ff4e 420# deal with ref for a file name
635c7876 421
422 if ( ref $file_name ) {
423
b3b7ff4e 424 my $ref_result = _check_ref( $file_name ) ;
425
426 if ( ref $ref_result ) {
635c7876 427
b3b7ff4e 428# we got an error, deal with it
429
430 @_ = ( $opts, $ref_result ) ;
431 goto &_error ;
432 }
433
434 if ( $ref_result ) {
435
436# we got an overloaded object and the result is the stringified value
437# use it as the file name
438
439 $file_name = $ref_result ;
440 }
441 else {
442
443# we now have a proper handle ref.
444# make sure we don't call truncate on it.
445
446 $write_fh = $file_name ;
447 $no_truncate = 1 ;
448 }
635c7876 449 }
b3b7ff4e 450
451# see if we have a path we need to open
452
453 unless( $write_fh ) {
635c7876 454
455# spew to regular file.
456
b3b7ff4e 457 if ( $opts->{'atomic'} ) {
635c7876 458
459# in atomic mode, we spew to a temp file so make one and save the original
460# file name.
461 $orig_file_name = $file_name ;
462 $file_name .= ".$$" ;
463 }
464
465# set the mode for the sysopen
466
467 my $mode = O_WRONLY | O_CREAT ;
b3b7ff4e 468 $mode |= O_APPEND if $opts->{'append'} ;
469 $mode |= O_EXCL if $opts->{'no_clobber'} ;
635c7876 470
b3b7ff4e 471 my $perms = $opts->{perms} ;
f02156f2 472 $perms = 0666 unless defined $perms ;
473
635c7876 474#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
475
476# open the file and handle any error.
477
6ccd701d 478 $write_fh = local( *FH ) ;
479# $write_fh = gensym ;
f02156f2 480 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
b3b7ff4e 481
482 @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
635c7876 483 goto &_error ;
484 }
485 }
486
b3b7ff4e 487 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 488 binmode( $write_fh, $binmode ) ;
489 }
490
b3b7ff4e 491 sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
635c7876 492
635c7876 493#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
494
495# fix up newline to write cr/lf if this is a windows text file
496
b3b7ff4e 497 if ( $is_win32 && !$opts->{'binmode'} ) {
635c7876 498
499# copy the write data if it was passed by ref so we don't clobber the
500# caller's data
501 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
502 ${$buf_ref} =~ s/\n/\015\012/g ;
503 }
504
505#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
506
507# get the size of how much we are writing and init the offset into that buffer
508
509 my $size_left = length( ${$buf_ref} ) ;
510 my $offset = 0 ;
511
512# loop until we have no more data left to write
513
514 do {
515
516# do the write and track how much we just wrote
517
518 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
519 $size_left, $offset ) ;
520
7e284d1c 521# since we're using syswrite Perl won't automatically restart the call
522# when interrupted by a signal.
523
524 next if $!{EINTR};
525
635c7876 526 unless ( defined $write_cnt ) {
527
b3b7ff4e 528 @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
635c7876 529 goto &_error ;
530 }
531
6f9e0c69 532# track how much left to write and where to write from in the buffer
635c7876 533
534 $size_left -= $write_cnt ;
535 $offset += $write_cnt ;
536
537 } while( $size_left > 0 ) ;
538
539# we truncate regular files in case we overwrite a long file with a shorter file
540# so seek to the current position to get it (same as tell()).
541
542 truncate( $write_fh,
543 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
544
545 close( $write_fh ) ;
546
547# handle the atomic mode - move the temp file to the original filename.
548
b3b7ff4e 549 if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
e2c51d31 550
b3b7ff4e 551 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
e2c51d31 552 goto &_error ;
553 }
635c7876 554
555 return 1 ;
556}
557
558# this is for backwards compatibility with the previous File::Slurp module.
559# write_file always overwrites an existing file
560
561*overwrite_file = \&write_file ;
562
563# the current write_file has an append mode so we use that. this
564# supports the same API with an optional second argument which is a
565# hash ref of options.
566
567sub append_file {
568
b3b7ff4e 569# get the optional opts hash ref
570 my $opts = $_[1] ;
571 if ( ref $opts eq 'HASH' ) {
635c7876 572
b3b7ff4e 573# we were passed an opts ref so just mark the append mode
635c7876 574
b3b7ff4e 575 $opts->{append} = 1 ;
635c7876 576 }
577 else {
578
b3b7ff4e 579# no opts hash so insert one with the append mode
635c7876 580
581 splice( @_, 1, 0, { append => 1 } ) ;
582 }
583
584# magic goto the main write_file sub. this overlays the sub without touching
585# the stack or @_
586
587 goto &write_file
588}
589
b3b7ff4e 590# prepend data to the beginning of a file
591
592sub prepend_file {
593
594 my $file_name = shift ;
595
596#print "FILE $file_name\n" ;
597
598 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
599
600# delete unsupported options
601
602 my @bad_opts =
603 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
604
605 delete @{$opts}{@bad_opts} ;
606
607 my $prepend_data = shift ;
608 $prepend_data = '' unless defined $prepend_data ;
609 $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
610
611#print "PRE [$prepend_data]\n" ;
612
b3b7ff4e 613 my $err_mode = delete $opts->{err_mode} ;
614 $opts->{ err_mode } = 'croak' ;
615 $opts->{ scalar_ref } = 1 ;
616
6f9e0c69 617 my $existing_data = eval { read_file( $file_name, $opts ) } ;
b3b7ff4e 618
619 if ( $@ ) {
620
621 @_ = ( { err_mode => $err_mode },
622 "prepend_file '$file_name' - read_file: $!" ) ;
623 goto &_error ;
624 }
625
626#print "EXIST [$$existing_data]\n" ;
627
6f9e0c69 628 $opts->{atomic} = 1 ;
629 my $write_result =
630 eval { write_file( $file_name, $opts,
631 $prepend_data, $$existing_data ) ;
b3b7ff4e 632 } ;
633
634 if ( $@ ) {
635
636 @_ = ( { err_mode => $err_mode },
637 "prepend_file '$file_name' - write_file: $!" ) ;
638 goto &_error ;
639 }
640
641 return $write_result ;
642}
643
6f9e0c69 644# edit a file as a scalar in $_
645
7e284d1c 646*ef = \&edit_file ;
647
6f9e0c69 648sub edit_file(&$;$) {
649
650 my( $edit_code, $file_name, $opts ) = @_ ;
651 $opts = {} unless ref $opts eq 'HASH' ;
652
653# my $edit_code = shift ;
654# my $file_name = shift ;
655# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
656
657#print "FILE $file_name\n" ;
658
659# delete unsupported options
660
661 my @bad_opts =
662 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
663
664 delete @{$opts}{@bad_opts} ;
665
666# keep the user err_mode and force croaking on internal errors
667
668 my $err_mode = delete $opts->{err_mode} ;
669 $opts->{ err_mode } = 'croak' ;
670
671# get a scalar ref for speed and slurp the file into a scalar
672
673 $opts->{ scalar_ref } = 1 ;
674 my $existing_data = eval { read_file( $file_name, $opts ) } ;
675
676 if ( $@ ) {
677
678 @_ = ( { err_mode => $err_mode },
679 "edit_file '$file_name' - read_file: $!" ) ;
680 goto &_error ;
681 }
682
683#print "EXIST [$$existing_data]\n" ;
684
685 my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
686
687 $opts->{atomic} = 1 ;
688 my $write_result =
689 eval { write_file( $file_name, $opts, $edited_data ) } ;
690
691 if ( $@ ) {
692
693 @_ = ( { err_mode => $err_mode },
694 "edit_file '$file_name' - write_file: $!" ) ;
695 goto &_error ;
696 }
697
698 return $write_result ;
699}
700
7e284d1c 701*efl = \&edit_file_lines ;
702
6f9e0c69 703sub edit_file_lines(&$;$) {
704
705 my( $edit_code, $file_name, $opts ) = @_ ;
706 $opts = {} unless ref $opts eq 'HASH' ;
707
708# my $edit_code = shift ;
709# my $file_name = shift ;
710# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
711
712#print "FILE $file_name\n" ;
713
714# delete unsupported options
715
716 my @bad_opts =
717 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
718
719 delete @{$opts}{@bad_opts} ;
720
721# keep the user err_mode and force croaking on internal errors
722
723 my $err_mode = delete $opts->{err_mode} ;
724 $opts->{ err_mode } = 'croak' ;
725
726# get an array ref for speed and slurp the file into lines
727
728 $opts->{ array_ref } = 1 ;
729 my $existing_data = eval { read_file( $file_name, $opts ) } ;
730
731 if ( $@ ) {
732
733 @_ = ( { err_mode => $err_mode },
734 "edit_file_lines '$file_name' - read_file: $!" ) ;
735 goto &_error ;
736 }
737
738#print "EXIST [$$existing_data]\n" ;
739
740 my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
741
742 $opts->{atomic} = 1 ;
743 my $write_result =
744 eval { write_file( $file_name, $opts, @edited_data ) } ;
745
746 if ( $@ ) {
747
748 @_ = ( { err_mode => $err_mode },
749 "edit_file_lines '$file_name' - write_file: $!" ) ;
750 goto &_error ;
751 }
752
753 return $write_result ;
754}
755
756# basic wrapper around opendir/readdir
757
635c7876 758sub read_dir {
759
b3b7ff4e 760 my $dir = shift ;
761 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 762
763# this handle will be destroyed upon return
764
765 local(*DIRH);
766
767# open the dir and handle any errors
768
769 unless ( opendir( DIRH, $dir ) ) {
770
b3b7ff4e 771 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
635c7876 772 goto &_error ;
773 }
774
775 my @dir_entries = readdir(DIRH) ;
776
777 @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
b3b7ff4e 778 unless $opts->{'keep_dot_dot'} ;
635c7876 779
6ccd701d 780 if ( $opts->{'prefix'} ) {
781
782 substr( $_, 0, 0, "$dir/" ) for @dir_entries ;
783 }
784
635c7876 785 return @dir_entries if wantarray ;
786 return \@dir_entries ;
787}
788
789# error handling section
790#
791# all the error handling uses magic goto so the caller will get the
792# error message as if from their code and not this module. if we just
793# did a call on the error code, the carp/croak would report it from
794# this module since the error sub is one level down on the call stack
795# from read_file/write_file/read_dir.
796
797
798my %err_func = (
799 'carp' => \&carp,
800 'croak' => \&croak,
801) ;
802
803sub _error {
804
b3b7ff4e 805 my( $opts, $err_msg ) = @_ ;
635c7876 806
807# get the error function to use
808
b3b7ff4e 809 my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
635c7876 810
811# if we didn't find it in our error function hash, they must have set
812# it to quiet and we don't do anything.
813
814 return unless $func ;
815
816# call the carp/croak function
817
f02156f2 818 $func->($err_msg) if $func ;
635c7876 819
820# return a hard undef (in list context this will be a single value of
821# undef which is not a legal in-band value)
822
823 return undef ;
824}
825
8261;
827__END__
828
829=head1 NAME
830
6f9e0c69 831File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
635c7876 832
833=head1 SYNOPSIS
834
835 use File::Slurp;
836
b3b7ff4e 837# read in a whole file into a scalar
635c7876 838 my $text = read_file( 'filename' ) ;
b3b7ff4e 839
840# read in a whole file into an array of lines
635c7876 841 my @lines = read_file( 'filename' ) ;
842
b3b7ff4e 843# write out a whole file from a scalar
b3b7ff4e 844 write_file( 'filename', $text ) ;
845
846# write out a whole file from an array of lines
635c7876 847 write_file( 'filename', @lines ) ;
848
b3b7ff4e 849# Here is a simple and fast way to load and save a simple config file
850# made of key=value lines.
7e284d1c 851 my %conf = read_file( $file_name ) =~ /^(\w+)=(.*)$/mg ;
852 write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ) ;
635c7876 853
6f9e0c69 854# insert text at the beginning of a file
6f9e0c69 855 prepend_file( 'filename', $text ) ;
856
6ccd701d 857# in-place edit to replace all 'foo' with 'bar' in file
858 edit_file { s/foo/bar/g } 'filename' ;
635c7876 859
6ccd701d 860# in-place edit to delete all lines with 'foo' from file
861 edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
862
863# read in a whole directory of file names (skipping . and ..)
b3b7ff4e 864 my @files = read_dir( '/path/to/dir' ) ;
635c7876 865
866=head1 DESCRIPTION
867
868This module provides subs that allow you to read or write entire files
869with one simple call. They are designed to be simple to use, have
870flexible ways to pass in or get the file contents and to be very
871efficient. There is also a sub to read in all the files in a
872directory other than C<.> and C<..>
873
b3b7ff4e 874These slurp/spew subs work for files, pipes and sockets, stdio,
875pseudo-files, and the DATA handle. Read more about why slurping files is
876a good thing in the file 'slurp_article.pod' in the extras/ directory.
877
878If you are interested in how fast these calls work, check out the
879slurp_bench.pl program in the extras/ directory. It compares many
880different forms of slurping. You can select the I/O direction, context
881and file sizes. Use the --help option to see how to run it.
635c7876 882
883=head2 B<read_file>
884
885This sub reads in an entire file and returns its contents to the
b3b7ff4e 886caller. In scalar context it returns the entire file as a single
887scalar. In list context it will return a list of lines (using the
635c7876 888current value of $/ as the separator including support for paragraph
b3b7ff4e 889mode when it is set to '').
635c7876 890
891 my $text = read_file( 'filename' ) ;
b3b7ff4e 892 my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
635c7876 893 my @lines = read_file( 'filename' ) ;
b3b7ff4e 894 my $lines = read_file( 'filename', array_ref => 1 ) ;
635c7876 895
b3b7ff4e 896The first argument is the file to slurp in. If the next argument is a
897hash reference, then it is used as the options. Otherwise the rest of
898the argument list are is used as key/value options.
635c7876 899
b3b7ff4e 900If the file argument is a handle (if it is a ref and is an IO or GLOB
901object), then that handle is slurped in. This mode is supported so you
902slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
903an example that does C<open( '-|' )> and the child process spews data
635c7876 904to the parant which slurps it in. All of the options that control how
905the data is returned to the caller still work in this case.
906
b3b7ff4e 907If the first argument is an overloaded object then its stringified value
908is used for the filename and that file is opened. This is a new feature
909in 9999.14. See the stringify.t test for an example.
910
911By default C<read_file> returns an undef in scalar contex or a single
912undef in list context if it encounters an error. Those are both
913impossible to get with a clean read_file call which means you can check
914the return value and always know if you had an error. You can change how
915errors are handled with the C<err_mode> option.
916
6f9e0c69 917Speed Note: If you call read_file and just get a scalar return value
918it is now optimized to handle shorter files. This is only used if no
919options are used, the file is shorter then 100k bytes, the filename is
920a plain scalar and a scalar file is returned. If you want the fastest
921slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
922
635c7876 923NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
924handle. It used to need a sysseek workaround but that is now handled
925when needed by the module itself.
926
927You can optionally request that C<slurp()> is exported to your code. This
928is an alias for read_file and is meant to be forward compatible with
929Perl 6 (which will have slurp() built-in).
930
b3b7ff4e 931The options for C<read_file> are:
635c7876 932
933=head3 binmode
934
b3b7ff4e 935If you set the binmode option, then its value is passed to a call to
936binmode on the opened handle. You can use this to set the file to be
937read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 938
939 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
9aab46ab 940 my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
635c7876 941
942=head3 array_ref
943
944If this boolean option is set, the return value (only in scalar
945context) will be an array reference which contains the lines of the
946slurped file. The following two calls are equivalent:
947
948 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
949 my $lines_ref = [ read_file( $bin_file ) ] ;
950
6ccd701d 951=head3 chomp
952
953If this boolean option is set, the lines are chomped. This only
954happens if you are slurping in a list context or using the
955C<array_ref> option.
956
635c7876 957=head3 scalar_ref
958
6f9e0c69 959If this boolean option is set, the return value (only in scalar
960context) will be an scalar reference to a string which is the contents
961of the slurped file. This will usually be faster than returning the
962plain scalar. It will also save memory as it will not make a copy of
963the file to return. Run the extras/slurp_bench.pl script to see speed
964comparisons.
635c7876 965
966 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
967
968=head3 buf_ref
969
970You can use this option to pass in a scalar reference and the slurped
971file contents will be stored in the scalar. This can be used in
f02156f2 972conjunction with any of the other options. This saves an extra copy of
6f9e0c69 973the slurped file and can lower ram usage vs returning the file. It is
974usually the fastest way to read a file into a scalar. Run the
975extras/slurp_bench.pl script to see speed comparisons.
976
635c7876 977
b3b7ff4e 978 read_file( $bin_file, buf_ref => \$buffer ) ;
635c7876 979
980=head3 blk_size
981
b3b7ff4e 982You can use this option to set the block size used when slurping from
983an already open handle (like \*STDIN). It defaults to 1MB.
635c7876 984
985 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
986 array_ref => 1 ) ;
987
988=head3 err_mode
989
990You can use this option to control how read_file behaves when an error
b3b7ff4e 991occurs. This option defaults to 'croak'. You can set it to 'carp' or to
7e284d1c 992'quiet' to have no special error handling. This code wants to carp and
b3b7ff4e 993then read another file if it fails.
635c7876 994
995 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
996 unless ( $text_ref ) {
997
998 # read a different file but croak if not found
999 $text_ref = read_file( $another_file ) ;
1000 }
7e284d1c 1001
635c7876 1002 # process ${$text_ref}
1003
1004=head2 B<write_file>
1005
1006This sub writes out an entire file in one call.
1007
1008 write_file( 'filename', @data ) ;
1009
1010The first argument to C<write_file> is the filename. The next argument
1011is an optional hash reference and it contains key/values that can
1012modify the behavior of C<write_file>. The rest of the argument list is
1013the data to be written to the file.
1014
1015 write_file( 'filename', {append => 1 }, @data ) ;
b3b7ff4e 1016 write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
1017
1018As a shortcut if the first data argument is a scalar or array reference,
1019it is used as the only data to be written to the file. Any following
1020arguments in @_ are ignored. This is a faster way to pass in the output
1021to be written to the file and is equivalent to the C<buf_ref> option of
1022C<read_file>. These following pairs are equivalent but the pass by
1023reference call will be faster in most cases (especially with larger
635c7876 1024files).
1025
1026 write_file( 'filename', \$buffer ) ;
1027 write_file( 'filename', $buffer ) ;
1028
1029 write_file( 'filename', \@lines ) ;
1030 write_file( 'filename', @lines ) ;
1031
b3b7ff4e 1032If the first argument is a handle (if it is a ref and is an IO or GLOB
1033object), then that handle is written to. This mode is supported so you
1034spew to handles such as \*STDOUT. See the test handle.t for an example
1035that does C<open( '-|' )> and child process spews data to the parent
1036which slurps it in. All of the options that control how the data are
1037passed into C<write_file> still work in this case.
1038
1039If the first argument is an overloaded object then its stringified value
1040is used for the filename and that file is opened. This is new feature
1041in 9999.14. See the stringify.t test for an example.
635c7876 1042
b3b7ff4e 1043By default C<write_file> returns 1 upon successfully writing the file or
1044undef if it encountered an error. You can change how errors are handled
1045with the C<err_mode> option.
635c7876 1046
1047The options are:
1048
1049=head3 binmode
1050
b3b7ff4e 1051If you set the binmode option, then its value is passed to a call to
1052binmode on the opened handle. You can use this to set the file to be
1053read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 1054
1055 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
b3b7ff4e 1056 write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1057
1058=head3 perms
635c7876 1059
b3b7ff4e 1060The perms option sets the permissions of newly-created files. This value
1061is modified by your process's umask and defaults to 0666 (same as
1062sysopen).
1063
1064NOTE: this option is new as of File::Slurp version 9999.14;
635c7876 1065
1066=head3 buf_ref
1067
1068You can use this option to pass in a scalar reference which has the
1069data to be written. If this is set then any data arguments (including
1070the scalar reference shortcut) in @_ will be ignored. These are
b3b7ff4e 1071equivalent:
635c7876 1072
1073 write_file( $bin_file, { buf_ref => \$buffer } ) ;
1074 write_file( $bin_file, \$buffer ) ;
1075 write_file( $bin_file, $buffer ) ;
1076
1077=head3 atomic
1078
1079If you set this boolean option, the file will be written to in an
1080atomic fashion. A temporary file name is created by appending the pid
1081($$) to the file name argument and that file is spewed to. After the
1082file is closed it is renamed to the original file name (and rename is
1083an atomic operation on most OS's). If the program using this were to
1084crash in the middle of this, then the file with the pid suffix could
1085be left behind.
1086
1087=head3 append
1088
1089If you set this boolean option, the data will be written at the end of
f02156f2 1090the current file. Internally this sets the sysopen mode flag O_APPEND.
635c7876 1091
1092 write_file( $file, {append => 1}, @data ) ;
1093
b3b7ff4e 1094 You
1095can import append_file and it does the same thing.
635c7876 1096
1097=head3 no_clobber
1098
1099If you set this boolean option, an existing file will not be overwritten.
1100
1101 write_file( $file, {no_clobber => 1}, @data ) ;
1102
1103=head3 err_mode
1104
1105You can use this option to control how C<write_file> behaves when an
1106error occurs. This option defaults to 'croak'. You can set it to
1107'carp' or to 'quiet' to have no error handling other than the return
1108value. If the first call to C<write_file> fails it will carp and then
1109write to another file. If the second call to C<write_file> fails, it
1110will croak.
1111
1112 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1113
1114 # write a different file but croak if not found
1115 write_file( $other_file, \$data ) ;
1116 }
1117
1118=head2 overwrite_file
1119
1120This sub is just a typeglob alias to write_file since write_file
1121always overwrites an existing file. This sub is supported for
1122backwards compatibility with the original version of this module. See
1123write_file for its API and behavior.
1124
1125=head2 append_file
1126
1127This sub will write its data to the end of the file. It is a wrapper
1128around write_file and it has the same API so see that for the full
b3b7ff4e 1129documentation. These calls are equivalent:
635c7876 1130
1131 append_file( $file, @data ) ;
1132 write_file( $file, {append => 1}, @data ) ;
1133
b3b7ff4e 1134
1135=head2 prepend_file
1136
1137This sub writes data to the beginning of a file. The previously existing
1138data is written after that so the effect is prepending data in front of
1139a file. It is a counterpart to the append_file sub in this module. It
1140works by first using C<read_file> to slurp in the file and then calling
1141C<write_file> with the new data and the existing file data.
1142
1143The first argument to C<prepend_file> is the filename. The next argument
1144is an optional hash reference and it contains key/values that can modify
1145the behavior of C<prepend_file>. The rest of the argument list is the
1146data to be written to the file and that is passed to C<write_file> as is
1147(see that for allowed data).
1148
1149Only the C<binmode> and C<err_mode> options are supported. The
1150C<write_file> call has the C<atomic> option set so you will always have
1151a consistant file. See above for more about those options.
1152
1153C<prepend_file> is not exported by default, you need to import it
1154explicitly.
1155
1156 use File::Slurp qw( prepend_file ) ;
1157 prepend_file( $file, $header ) ;
1158 prepend_file( $file, \@lines ) ;
1159 prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1160
6ccd701d 1161
1162=head2 edit_file, edit_file_lines
1163
1164These subs read in a file into $_, execute a code block which should
1165modify $_ and then write $_ back to the file. The difference between
1166them is that C<edit_file> reads the whole file into $_ and calls the
1167code block one time. With C<edit_file_lines> each line is read into $_
1168and the code is called for each line. In both cases the code should
1169modify $_ if desired and it will be written back out. These subs are
7e284d1c 1170the equivalent of the -pi command line options of Perl but you can
1171call them from inside your program and not fork out a process. They
1172are in @EXPORT_OK so you need to request them to be imported on the
1173use line or you can import both of them with:
1174
1175 use File::Slurp qw( :edit ) ;
6ccd701d 1176
1177The first argument to C<edit_file> and C<edit_file_lines> is a code
1178block or a code reference. The code block is not followed by a comma
1179(as with grep and map) but a code reference is followed by a
1180comma. See the examples below for both styles. The next argument is
1181the filename. The last argument is an optional hash reference and it
1182contains key/values that can modify the behavior of
1183C<prepend_file>.
1184
1185Only the C<binmode> and C<err_mode> options are supported. The
1186C<write_file> call has the C<atomic> option set so you will always
1187have a consistant file. See above for more about those options.
1188
1189Each group of calls below show a Perl command line instance and the
7e284d1c 1190equivalent calls to C<edit_file> and C<edit_file_lines>.
6ccd701d 1191
1192 perl -0777 -pi -e 's/foo/bar/g' filename
7e284d1c 1193 use File::Slurp qw( edit_file ) ;
6ccd701d 1194 edit_file { s/foo/bar/g } 'filename' ;
1195 edit_file sub { s/foo/bar/g }, 'filename' ;
1196 edit_file \&replace_foo, 'filename' ;
1197 sub replace_foo { s/foo/bar/g }
1198
7e284d1c 1199 perl -pi -e '$_ = "" if /foo/' filename
1200 use File::Slurp qw( edit_file_lines ) ;
6ccd701d 1201 use File::Slurp ;
1202 edit_file_lines { $_ = '' if /foo/ } 'filename' ;
1203 edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
1204 edit_file \&delete_foo, 'filename' ;
1205 sub delete_foo { $_ = '' if /foo/ }
1206
635c7876 1207=head2 read_dir
1208
1209This sub reads all the file names from directory and returns them to
1210the caller but C<.> and C<..> are removed by default.
1211
1212 my @files = read_dir( '/path/to/dir' ) ;
1213
b3b7ff4e 1214The first argument is the path to the directory to read. If the next
1215argument is a hash reference, then it is used as the options.
1216Otherwise the rest of the argument list are is used as key/value
1217options.
635c7876 1218
b3b7ff4e 1219In list context C<read_dir> returns a list of the entries in the
635c7876 1220directory. In a scalar context it returns an array reference which has
1221the entries.
1222
b3b7ff4e 1223=head3 err_mode
1224
1225If the C<err_mode> option is set, it selects how errors are handled (see
1226C<err_mode> in C<read_file> or C<write_file>).
1227
635c7876 1228=head3 keep_dot_dot
1229
1230If this boolean option is set, C<.> and C<..> are not removed from the
1231list of files.
1232
1233 my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1234
6ccd701d 1235=head3 prefix
1236
1237If this boolean option is set, the string "$dir/" is prefixed to each
1238dir entry. This means you can directly use the results to open
1239files. A common newbie mistake is not putting the directory in front
1240of entries when opening themn.
1241
1242 my @paths = read_dir( '/path/to/dir', prefix => 1 ) ;
1243
635c7876 1244=head2 EXPORT
1245
7e284d1c 1246 These are exported by default or with
1247 use File::Slurp qw( :std ) ;
1248
635c7876 1249 read_file write_file overwrite_file append_file read_dir
1250
7e284d1c 1251 These are exported with
1252 use File::Slurp qw( :edit ) ;
1253
1254 edit_file edit_file_lines
1255
1256 You can get all subs in the module exported with
1257 use File::Slurp qw( :all ) ;
1258
f02156f2 1259=head2 LICENSE
1260
1261 Same as Perl.
1262
635c7876 1263=head2 SEE ALSO
1264
1265An article on file slurping in extras/slurp_article.pod. There is
1266also a benchmarking script in extras/slurp_bench.pl.
1267
1268=head2 BUGS
1269
1270If run under Perl 5.004, slurping from the DATA handle will fail as
1271that requires B.pm which didn't get into core until 5.005.
1272
1273=head1 AUTHOR
1274
b3b7ff4e 1275Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
635c7876 1276
1277=cut