added edit_file and edit_file lines. needs testing and pod
[urisagit/Perl-Docs.git] / lib / File / Slurp.pm
CommitLineData
635c7876 1package File::Slurp;
2
6f9e0c69 3use 5.6.2 ;
e2c51d31 4
635c7876 5use strict;
6
7use Carp ;
b3b7ff4e 8use Exporter ;
635c7876 9use Fcntl qw( :DEFAULT ) ;
e2c51d31 10use POSIX qw( :fcntl_h ) ;
635c7876 11use Symbol ;
12
6f9e0c69 13use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
b3b7ff4e 14@ISA = qw( Exporter ) ;
e2c51d31 15
6f9e0c69 16$VERSION = '9999.17';
17
18@EXPORT_OK = qw( slurp prepend_file ) ;
e2c51d31 19%EXPORT_TAGS = ( 'all' => [
20 qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
e2c51d31 21@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
e2c51d31 22
b3b7ff4e 23my $max_fast_slurp_size = 1024 * 100 ;
9aab46ab 24
635c7876 25my $is_win32 = $^O =~ /win32/i ;
26
27# Install subs for various constants that aren't set in older perls
28# (< 5.005). Fcntl on old perls uses Exporter to define subs without a
29# () prototype These can't be overridden with the constant pragma or
30# we get a prototype mismatch. Hence this less than aesthetically
31# appealing BEGIN block:
32
33BEGIN {
8ed110f9 34 unless( defined &SEEK_SET ) {
635c7876 35 *SEEK_SET = sub { 0 };
36 *SEEK_CUR = sub { 1 };
37 *SEEK_END = sub { 2 };
38 }
39
8ed110f9 40 unless( defined &O_BINARY ) {
635c7876 41 *O_BINARY = sub { 0 };
42 *O_RDONLY = sub { 0 };
43 *O_WRONLY = sub { 1 };
44 }
45
f02156f2 46 unless ( defined &O_APPEND ) {
635c7876 47
48 if ( $^O =~ /olaris/ ) {
49 *O_APPEND = sub { 8 };
50 *O_CREAT = sub { 256 };
51 *O_EXCL = sub { 1024 };
52 }
53 elsif ( $^O =~ /inux/ ) {
54 *O_APPEND = sub { 1024 };
55 *O_CREAT = sub { 64 };
56 *O_EXCL = sub { 128 };
57 }
58 elsif ( $^O =~ /BSD/i ) {
59 *O_APPEND = sub { 8 };
60 *O_CREAT = sub { 512 };
61 *O_EXCL = sub { 2048 };
62 }
63 }
64}
65
66# print "OS [$^O]\n" ;
67
68# print "O_BINARY = ", O_BINARY(), "\n" ;
69# print "O_RDONLY = ", O_RDONLY(), "\n" ;
70# print "O_WRONLY = ", O_WRONLY(), "\n" ;
71# print "O_APPEND = ", O_APPEND(), "\n" ;
72# print "O_CREAT ", O_CREAT(), "\n" ;
73# print "O_EXCL ", O_EXCL(), "\n" ;
74
635c7876 75
76*slurp = \&read_file ;
77
78sub read_file {
79
b3b7ff4e 80 my $file_name = shift ;
81 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 82
6f9e0c69 83# this is the optimized read_file for shorter files.
84# the test for -s > 0 is to allow pseudo files to be read with the
85# regular loop since they return a size of 0.
86
87 if ( !ref $file_name && -e $file_name && -s _ > 0 &&
88 -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
e2c51d31 89
e2c51d31 90
6f9e0c69 91 my $fh ;
92 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
e2c51d31 93
b3b7ff4e 94 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
e2c51d31 95 goto &_error ;
96 }
97
6f9e0c69 98 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
e2c51d31 99
100 unless ( defined $read_cnt ) {
101
b3b7ff4e 102 @_ = ( $opts,
8ed110f9 103 "read_file '$file_name' - small sysread: $!");
e2c51d31 104 goto &_error ;
105 }
106
6f9e0c69 107 $buf =~ s/\015\012/\n/g if $is_win32 ;
e2c51d31 108 return $buf ;
109 }
110
635c7876 111# set the buffer to either the passed in one or ours and init it to the null
112# string
113
114 my $buf ;
b3b7ff4e 115 my $buf_ref = $opts->{'buf_ref'} || \$buf ;
635c7876 116 ${$buf_ref} = '' ;
117
118 my( $read_fh, $size_left, $blk_size ) ;
119
b3b7ff4e 120# deal with ref for a file name
121# it could be an open handle or an overloaded object
635c7876 122
123 if ( ref $file_name ) {
124
b3b7ff4e 125 my $ref_result = _check_ref( $file_name ) ;
635c7876 126
b3b7ff4e 127 if ( ref $ref_result ) {
635c7876 128
b3b7ff4e 129# we got an error, deal with it
635c7876 130
b3b7ff4e 131 @_ = ( $opts, $ref_result ) ;
635c7876 132 goto &_error ;
133 }
134
b3b7ff4e 135 if ( $ref_result ) {
635c7876 136
b3b7ff4e 137# we got an overloaded object and the result is the stringified value
138# use it as the file name
139
140 $file_name = $ref_result ;
141 }
142 else {
143
144# here we have just an open handle. set $read_fh so we don't do a sysopen
635c7876 145
b3b7ff4e 146 $read_fh = $file_name ;
147 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
148 $size_left = $blk_size ;
635c7876 149 }
150 }
b3b7ff4e 151
152# see if we have a path we need to open
153
154 unless ( $read_fh ) {
635c7876 155
156# a regular file. set the sysopen mode
157
158 my $mode = O_RDONLY ;
635c7876 159
160#printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
161
635c7876 162 $read_fh = gensym ;
163 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
b3b7ff4e 164 @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
635c7876 165 goto &_error ;
166 }
167
b3b7ff4e 168 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 169 binmode( $read_fh, $binmode ) ;
170 }
171
635c7876 172# get the size of the file for use in the read loop
173
174 $size_left = -s $read_fh ;
175
f9940db7 176#print "SIZE $size_left\n" ;
8ed110f9 177
f9940db7 178# we need a blk_size if the size is 0 so we can handle pseudofiles like in
179# /proc. these show as 0 size but have data to be slurped.
180
181 unless( $size_left ) {
182
b3b7ff4e 183 $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
f9940db7 184 $size_left = $blk_size ;
185 }
e2c51d31 186 }
187
188
b3b7ff4e 189# if ( $size_left < 10000 && keys %{$opts} == 0 && !wantarray ) {
e2c51d31 190
8ed110f9 191# my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
e2c51d31 192
8ed110f9 193# unless ( defined $read_cnt ) {
e2c51d31 194
b3b7ff4e 195# @_ = ( $opts, "read_file '$file_name' - small2 sysread: $!");
8ed110f9 196# goto &_error ;
197# }
e2c51d31 198
8ed110f9 199# return $buf ;
200# }
635c7876 201
202# infinite read loop. we exit when we are done slurping
203
204 while( 1 ) {
205
206# do the read and see how much we got
207
208 my $read_cnt = sysread( $read_fh, ${$buf_ref},
209 $size_left, length ${$buf_ref} ) ;
210
e2c51d31 211 unless ( defined $read_cnt ) {
212
b3b7ff4e 213 @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
e2c51d31 214 goto &_error ;
215 }
635c7876 216
217# good read. see if we hit EOF (nothing left to read)
218
e2c51d31 219 last if $read_cnt == 0 ;
635c7876 220
221# loop if we are slurping a handle. we don't track $size_left then.
222
e2c51d31 223 next if $blk_size ;
635c7876 224
225# count down how much we read and loop if we have more to read.
635c7876 226
e2c51d31 227 $size_left -= $read_cnt ;
228 last if $size_left <= 0 ;
635c7876 229 }
230
231# fix up cr/lf to be a newline if this is a windows text file
232
b3b7ff4e 233 ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
635c7876 234
635c7876 235 my $sep = $/ ;
236 $sep = '\n\n+' if defined $sep && $sep eq '' ;
237
b3b7ff4e 238# see if caller wants lines
239
240 if( wantarray || $opts->{'array_ref'} ) {
635c7876 241
6f9e0c69 242 use re 'taint' ;
635c7876 243
6f9e0c69 244 my @lines = length(${$buf_ref}) ?
245 ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
635c7876 246
6f9e0c69 247# caller wants an array ref
b3b7ff4e 248
6f9e0c69 249 return \@lines if $opts->{'array_ref'} ;
b3b7ff4e 250
6f9e0c69 251# caller wants list of lines
b3b7ff4e 252
b3b7ff4e 253 return @lines ;
254 }
635c7876 255
256# caller wants a scalar ref to the slurped text
257
b3b7ff4e 258 return $buf_ref if $opts->{'scalar_ref'} ;
635c7876 259
260# caller wants a scalar with the slurped text (normal scalar context)
261
262 return ${$buf_ref} if defined wantarray ;
263
264# caller passed in an i/o buffer by reference (normal void context)
265
266 return ;
267}
268
b3b7ff4e 269# errors in this sub are returned as scalar refs
270# a normal IO/GLOB handle is an empty return
271# an overloaded object returns its stringified as a scalarfilename
272
273sub _check_ref {
274
275 my( $handle ) = @_ ;
276
277# check if we are reading from a handle (GLOB or IO object)
278
279 if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
280
281# we have a handle. deal with seeking to it if it is DATA
282
283 my $err = _seek_data_handle( $handle ) ;
284
285# return the error string if any
286
287 return \$err if $err ;
288
289# we have good handle
290 return ;
291 }
292
293 eval { require overload } ;
294
295# return an error if we can't load the overload pragma
296# or if the object isn't overloaded
297
298 return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
299 if $@ || !overload::Overloaded( $handle ) ;
300
301# must be overloaded so return its stringified value
302
303 return "$handle" ;
304}
305
306sub _seek_data_handle {
307
308 my( $handle ) = @_ ;
309
310# DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
311# glob/handle. only the DATA handle is untainted (since it is from
312# trusted data in the source file). this allows us to test if this is
313# the DATA handle and then to do a sysseek to make sure it gets
314# slurped correctly. on some systems, the buffered i/o pointer is not
315# left at the same place as the fd pointer. this sysseek makes them
316# the same so slurping with sysread will work.
317
318 eval{ require B } ;
319
320 if ( $@ ) {
321
322 return <<ERR ;
323Can't find B.pm with this Perl: $!.
324That module is needed to properly slurp the DATA handle.
325ERR
326 }
327
328 if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
329
330# set the seek position to the current tell.
331
332 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
333 return "read_file '$handle' - sysseek: $!" ;
334 }
335 }
336
337# seek was successful, return no error string
338
339 return ;
340}
341
342
635c7876 343sub write_file {
344
345 my $file_name = shift ;
346
347# get the optional argument hash ref from @_ or an empty hash ref.
348
b3b7ff4e 349 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
635c7876 350
351 my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
352
353# get the buffer ref - it depends on how the data is passed into write_file
354# after this if/else $buf_ref will have a scalar ref to the data.
355
b3b7ff4e 356 if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
635c7876 357
b3b7ff4e 358# a scalar ref passed in %opts has the data
635c7876 359# note that the data was passed by ref
360
b3b7ff4e 361 $buf_ref = $opts->{'buf_ref'} ;
635c7876 362 $data_is_ref = 1 ;
363 }
364 elsif ( ref $_[0] eq 'SCALAR' ) {
365
366# the first value in @_ is the scalar ref to the data
367# note that the data was passed by ref
368
369 $buf_ref = shift ;
370 $data_is_ref = 1 ;
371 }
372 elsif ( ref $_[0] eq 'ARRAY' ) {
373
374# the first value in @_ is the array ref to the data so join it.
375
376 ${$buf_ref} = join '', @{$_[0]} ;
377 }
378 else {
379
380# good old @_ has all the data so join it.
381
382 ${$buf_ref} = join '', @_ ;
383 }
384
b3b7ff4e 385# deal with ref for a file name
635c7876 386
387 if ( ref $file_name ) {
388
b3b7ff4e 389 my $ref_result = _check_ref( $file_name ) ;
390
391 if ( ref $ref_result ) {
635c7876 392
b3b7ff4e 393# we got an error, deal with it
394
395 @_ = ( $opts, $ref_result ) ;
396 goto &_error ;
397 }
398
399 if ( $ref_result ) {
400
401# we got an overloaded object and the result is the stringified value
402# use it as the file name
403
404 $file_name = $ref_result ;
405 }
406 else {
407
408# we now have a proper handle ref.
409# make sure we don't call truncate on it.
410
411 $write_fh = $file_name ;
412 $no_truncate = 1 ;
413 }
635c7876 414 }
b3b7ff4e 415
416# see if we have a path we need to open
417
418 unless( $write_fh ) {
635c7876 419
420# spew to regular file.
421
b3b7ff4e 422 if ( $opts->{'atomic'} ) {
635c7876 423
424# in atomic mode, we spew to a temp file so make one and save the original
425# file name.
426 $orig_file_name = $file_name ;
427 $file_name .= ".$$" ;
428 }
429
430# set the mode for the sysopen
431
432 my $mode = O_WRONLY | O_CREAT ;
b3b7ff4e 433 $mode |= O_APPEND if $opts->{'append'} ;
434 $mode |= O_EXCL if $opts->{'no_clobber'} ;
635c7876 435
b3b7ff4e 436 my $perms = $opts->{perms} ;
f02156f2 437 $perms = 0666 unless defined $perms ;
438
635c7876 439#printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
440
441# open the file and handle any error.
442
443 $write_fh = gensym ;
f02156f2 444 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
b3b7ff4e 445
446 @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
635c7876 447 goto &_error ;
448 }
449 }
450
b3b7ff4e 451 if ( my $binmode = $opts->{'binmode'} ) {
cee624ab 452 binmode( $write_fh, $binmode ) ;
453 }
454
b3b7ff4e 455 sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
635c7876 456
635c7876 457#print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
458
459# fix up newline to write cr/lf if this is a windows text file
460
b3b7ff4e 461 if ( $is_win32 && !$opts->{'binmode'} ) {
635c7876 462
463# copy the write data if it was passed by ref so we don't clobber the
464# caller's data
465 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
466 ${$buf_ref} =~ s/\n/\015\012/g ;
467 }
468
469#print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
470
471# get the size of how much we are writing and init the offset into that buffer
472
473 my $size_left = length( ${$buf_ref} ) ;
474 my $offset = 0 ;
475
476# loop until we have no more data left to write
477
478 do {
479
480# do the write and track how much we just wrote
481
482 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
483 $size_left, $offset ) ;
484
485 unless ( defined $write_cnt ) {
486
b3b7ff4e 487 @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
635c7876 488 goto &_error ;
489 }
490
6f9e0c69 491# track how much left to write and where to write from in the buffer
635c7876 492
493 $size_left -= $write_cnt ;
494 $offset += $write_cnt ;
495
496 } while( $size_left > 0 ) ;
497
498# we truncate regular files in case we overwrite a long file with a shorter file
499# so seek to the current position to get it (same as tell()).
500
501 truncate( $write_fh,
502 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
503
504 close( $write_fh ) ;
505
506# handle the atomic mode - move the temp file to the original filename.
507
b3b7ff4e 508 if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
e2c51d31 509
b3b7ff4e 510 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
e2c51d31 511 goto &_error ;
512 }
635c7876 513
514 return 1 ;
515}
516
517# this is for backwards compatibility with the previous File::Slurp module.
518# write_file always overwrites an existing file
519
520*overwrite_file = \&write_file ;
521
522# the current write_file has an append mode so we use that. this
523# supports the same API with an optional second argument which is a
524# hash ref of options.
525
526sub append_file {
527
b3b7ff4e 528# get the optional opts hash ref
529 my $opts = $_[1] ;
530 if ( ref $opts eq 'HASH' ) {
635c7876 531
b3b7ff4e 532# we were passed an opts ref so just mark the append mode
635c7876 533
b3b7ff4e 534 $opts->{append} = 1 ;
635c7876 535 }
536 else {
537
b3b7ff4e 538# no opts hash so insert one with the append mode
635c7876 539
540 splice( @_, 1, 0, { append => 1 } ) ;
541 }
542
543# magic goto the main write_file sub. this overlays the sub without touching
544# the stack or @_
545
546 goto &write_file
547}
548
b3b7ff4e 549# prepend data to the beginning of a file
550
551sub prepend_file {
552
553 my $file_name = shift ;
554
555#print "FILE $file_name\n" ;
556
557 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
558
559# delete unsupported options
560
561 my @bad_opts =
562 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
563
564 delete @{$opts}{@bad_opts} ;
565
566 my $prepend_data = shift ;
567 $prepend_data = '' unless defined $prepend_data ;
568 $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
569
570#print "PRE [$prepend_data]\n" ;
571
b3b7ff4e 572 my $err_mode = delete $opts->{err_mode} ;
573 $opts->{ err_mode } = 'croak' ;
574 $opts->{ scalar_ref } = 1 ;
575
6f9e0c69 576 my $existing_data = eval { read_file( $file_name, $opts ) } ;
b3b7ff4e 577
578 if ( $@ ) {
579
580 @_ = ( { err_mode => $err_mode },
581 "prepend_file '$file_name' - read_file: $!" ) ;
582 goto &_error ;
583 }
584
585#print "EXIST [$$existing_data]\n" ;
586
6f9e0c69 587 $opts->{atomic} = 1 ;
588 my $write_result =
589 eval { write_file( $file_name, $opts,
590 $prepend_data, $$existing_data ) ;
b3b7ff4e 591 } ;
592
593 if ( $@ ) {
594
595 @_ = ( { err_mode => $err_mode },
596 "prepend_file '$file_name' - write_file: $!" ) ;
597 goto &_error ;
598 }
599
600 return $write_result ;
601}
602
6f9e0c69 603# edit a file as a scalar in $_
604
605sub edit_file(&$;$) {
606
607 my( $edit_code, $file_name, $opts ) = @_ ;
608 $opts = {} unless ref $opts eq 'HASH' ;
609
610# my $edit_code = shift ;
611# my $file_name = shift ;
612# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
613
614#print "FILE $file_name\n" ;
615
616# delete unsupported options
617
618 my @bad_opts =
619 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
620
621 delete @{$opts}{@bad_opts} ;
622
623# keep the user err_mode and force croaking on internal errors
624
625 my $err_mode = delete $opts->{err_mode} ;
626 $opts->{ err_mode } = 'croak' ;
627
628# get a scalar ref for speed and slurp the file into a scalar
629
630 $opts->{ scalar_ref } = 1 ;
631 my $existing_data = eval { read_file( $file_name, $opts ) } ;
632
633 if ( $@ ) {
634
635 @_ = ( { err_mode => $err_mode },
636 "edit_file '$file_name' - read_file: $!" ) ;
637 goto &_error ;
638 }
639
640#print "EXIST [$$existing_data]\n" ;
641
642 my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
643
644 $opts->{atomic} = 1 ;
645 my $write_result =
646 eval { write_file( $file_name, $opts, $edited_data ) } ;
647
648 if ( $@ ) {
649
650 @_ = ( { err_mode => $err_mode },
651 "edit_file '$file_name' - write_file: $!" ) ;
652 goto &_error ;
653 }
654
655 return $write_result ;
656}
657
658sub edit_file_lines(&$;$) {
659
660 my( $edit_code, $file_name, $opts ) = @_ ;
661 $opts = {} unless ref $opts eq 'HASH' ;
662
663# my $edit_code = shift ;
664# my $file_name = shift ;
665# my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
666
667#print "FILE $file_name\n" ;
668
669# delete unsupported options
670
671 my @bad_opts =
672 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
673
674 delete @{$opts}{@bad_opts} ;
675
676# keep the user err_mode and force croaking on internal errors
677
678 my $err_mode = delete $opts->{err_mode} ;
679 $opts->{ err_mode } = 'croak' ;
680
681# get an array ref for speed and slurp the file into lines
682
683 $opts->{ array_ref } = 1 ;
684 my $existing_data = eval { read_file( $file_name, $opts ) } ;
685
686 if ( $@ ) {
687
688 @_ = ( { err_mode => $err_mode },
689 "edit_file_lines '$file_name' - read_file: $!" ) ;
690 goto &_error ;
691 }
692
693#print "EXIST [$$existing_data]\n" ;
694
695 my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
696
697 $opts->{atomic} = 1 ;
698 my $write_result =
699 eval { write_file( $file_name, $opts, @edited_data ) } ;
700
701 if ( $@ ) {
702
703 @_ = ( { err_mode => $err_mode },
704 "edit_file_lines '$file_name' - write_file: $!" ) ;
705 goto &_error ;
706 }
707
708 return $write_result ;
709}
710
711# basic wrapper around opendir/readdir
712
635c7876 713sub read_dir {
714
b3b7ff4e 715 my $dir = shift ;
716 my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
635c7876 717
718# this handle will be destroyed upon return
719
720 local(*DIRH);
721
722# open the dir and handle any errors
723
724 unless ( opendir( DIRH, $dir ) ) {
725
b3b7ff4e 726 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
635c7876 727 goto &_error ;
728 }
729
730 my @dir_entries = readdir(DIRH) ;
731
732 @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
b3b7ff4e 733 unless $opts->{'keep_dot_dot'} ;
635c7876 734
735 return @dir_entries if wantarray ;
736 return \@dir_entries ;
737}
738
739# error handling section
740#
741# all the error handling uses magic goto so the caller will get the
742# error message as if from their code and not this module. if we just
743# did a call on the error code, the carp/croak would report it from
744# this module since the error sub is one level down on the call stack
745# from read_file/write_file/read_dir.
746
747
748my %err_func = (
749 'carp' => \&carp,
750 'croak' => \&croak,
751) ;
752
753sub _error {
754
b3b7ff4e 755 my( $opts, $err_msg ) = @_ ;
635c7876 756
757# get the error function to use
758
b3b7ff4e 759 my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
635c7876 760
761# if we didn't find it in our error function hash, they must have set
762# it to quiet and we don't do anything.
763
764 return unless $func ;
765
766# call the carp/croak function
767
f02156f2 768 $func->($err_msg) if $func ;
635c7876 769
770# return a hard undef (in list context this will be a single value of
771# undef which is not a legal in-band value)
772
773 return undef ;
774}
775
7761;
777__END__
778
779=head1 NAME
780
6f9e0c69 781File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
635c7876 782
783=head1 SYNOPSIS
784
785 use File::Slurp;
786
b3b7ff4e 787# read in a whole file into a scalar
788
635c7876 789 my $text = read_file( 'filename' ) ;
b3b7ff4e 790
791# read in a whole file into an array of lines
792
635c7876 793 my @lines = read_file( 'filename' ) ;
794
b3b7ff4e 795# write out a whole file from a scalar
796
797 write_file( 'filename', $text ) ;
798
799# write out a whole file from an array of lines
800
635c7876 801 write_file( 'filename', @lines ) ;
802
b3b7ff4e 803# Here is a simple and fast way to load and save a simple config file
804# made of key=value lines.
805
806 my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
807 write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
635c7876 808
6f9e0c69 809# insert text at the beginning of a file
810
811 prepend_file( 'filename', $text ) ;
812
b3b7ff4e 813# read in a whole directory of file names (skipping . and ..)
635c7876 814
b3b7ff4e 815 my @files = read_dir( '/path/to/dir' ) ;
635c7876 816
817=head1 DESCRIPTION
818
819This module provides subs that allow you to read or write entire files
820with one simple call. They are designed to be simple to use, have
821flexible ways to pass in or get the file contents and to be very
822efficient. There is also a sub to read in all the files in a
823directory other than C<.> and C<..>
824
b3b7ff4e 825These slurp/spew subs work for files, pipes and sockets, stdio,
826pseudo-files, and the DATA handle. Read more about why slurping files is
827a good thing in the file 'slurp_article.pod' in the extras/ directory.
828
829If you are interested in how fast these calls work, check out the
830slurp_bench.pl program in the extras/ directory. It compares many
831different forms of slurping. You can select the I/O direction, context
832and file sizes. Use the --help option to see how to run it.
635c7876 833
834=head2 B<read_file>
835
836This sub reads in an entire file and returns its contents to the
b3b7ff4e 837caller. In scalar context it returns the entire file as a single
838scalar. In list context it will return a list of lines (using the
635c7876 839current value of $/ as the separator including support for paragraph
b3b7ff4e 840mode when it is set to '').
635c7876 841
842 my $text = read_file( 'filename' ) ;
b3b7ff4e 843 my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
635c7876 844 my @lines = read_file( 'filename' ) ;
b3b7ff4e 845 my $lines = read_file( 'filename', array_ref => 1 ) ;
635c7876 846
b3b7ff4e 847The first argument is the file to slurp in. If the next argument is a
848hash reference, then it is used as the options. Otherwise the rest of
849the argument list are is used as key/value options.
635c7876 850
b3b7ff4e 851If the file argument is a handle (if it is a ref and is an IO or GLOB
852object), then that handle is slurped in. This mode is supported so you
853slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
854an example that does C<open( '-|' )> and the child process spews data
635c7876 855to the parant which slurps it in. All of the options that control how
856the data is returned to the caller still work in this case.
857
b3b7ff4e 858If the first argument is an overloaded object then its stringified value
859is used for the filename and that file is opened. This is a new feature
860in 9999.14. See the stringify.t test for an example.
861
862By default C<read_file> returns an undef in scalar contex or a single
863undef in list context if it encounters an error. Those are both
864impossible to get with a clean read_file call which means you can check
865the return value and always know if you had an error. You can change how
866errors are handled with the C<err_mode> option.
867
6f9e0c69 868Speed Note: If you call read_file and just get a scalar return value
869it is now optimized to handle shorter files. This is only used if no
870options are used, the file is shorter then 100k bytes, the filename is
871a plain scalar and a scalar file is returned. If you want the fastest
872slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
873
635c7876 874NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
875handle. It used to need a sysseek workaround but that is now handled
876when needed by the module itself.
877
878You can optionally request that C<slurp()> is exported to your code. This
879is an alias for read_file and is meant to be forward compatible with
880Perl 6 (which will have slurp() built-in).
881
b3b7ff4e 882The options for C<read_file> are:
635c7876 883
884=head3 binmode
885
b3b7ff4e 886If you set the binmode option, then its value is passed to a call to
887binmode on the opened handle. You can use this to set the file to be
888read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 889
890 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
9aab46ab 891 my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
635c7876 892
893=head3 array_ref
894
895If this boolean option is set, the return value (only in scalar
896context) will be an array reference which contains the lines of the
897slurped file. The following two calls are equivalent:
898
899 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
900 my $lines_ref = [ read_file( $bin_file ) ] ;
901
902=head3 scalar_ref
903
6f9e0c69 904If this boolean option is set, the return value (only in scalar
905context) will be an scalar reference to a string which is the contents
906of the slurped file. This will usually be faster than returning the
907plain scalar. It will also save memory as it will not make a copy of
908the file to return. Run the extras/slurp_bench.pl script to see speed
909comparisons.
635c7876 910
911 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
912
913=head3 buf_ref
914
915You can use this option to pass in a scalar reference and the slurped
916file contents will be stored in the scalar. This can be used in
f02156f2 917conjunction with any of the other options. This saves an extra copy of
6f9e0c69 918the slurped file and can lower ram usage vs returning the file. It is
919usually the fastest way to read a file into a scalar. Run the
920extras/slurp_bench.pl script to see speed comparisons.
921
635c7876 922
b3b7ff4e 923 read_file( $bin_file, buf_ref => \$buffer ) ;
635c7876 924
925=head3 blk_size
926
b3b7ff4e 927You can use this option to set the block size used when slurping from
928an already open handle (like \*STDIN). It defaults to 1MB.
635c7876 929
930 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
931 array_ref => 1 ) ;
932
933=head3 err_mode
934
935You can use this option to control how read_file behaves when an error
b3b7ff4e 936occurs. This option defaults to 'croak'. You can set it to 'carp' or to
937'quiet to have no special error handling. This code wants to carp and
938then read another file if it fails.
635c7876 939
940 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
941 unless ( $text_ref ) {
942
943 # read a different file but croak if not found
944 $text_ref = read_file( $another_file ) ;
945 }
946
947 # process ${$text_ref}
948
949=head2 B<write_file>
950
951This sub writes out an entire file in one call.
952
953 write_file( 'filename', @data ) ;
954
955The first argument to C<write_file> is the filename. The next argument
956is an optional hash reference and it contains key/values that can
957modify the behavior of C<write_file>. The rest of the argument list is
958the data to be written to the file.
959
960 write_file( 'filename', {append => 1 }, @data ) ;
b3b7ff4e 961 write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
962
963As a shortcut if the first data argument is a scalar or array reference,
964it is used as the only data to be written to the file. Any following
965arguments in @_ are ignored. This is a faster way to pass in the output
966to be written to the file and is equivalent to the C<buf_ref> option of
967C<read_file>. These following pairs are equivalent but the pass by
968reference call will be faster in most cases (especially with larger
635c7876 969files).
970
971 write_file( 'filename', \$buffer ) ;
972 write_file( 'filename', $buffer ) ;
973
974 write_file( 'filename', \@lines ) ;
975 write_file( 'filename', @lines ) ;
976
b3b7ff4e 977If the first argument is a handle (if it is a ref and is an IO or GLOB
978object), then that handle is written to. This mode is supported so you
979spew to handles such as \*STDOUT. See the test handle.t for an example
980that does C<open( '-|' )> and child process spews data to the parent
981which slurps it in. All of the options that control how the data are
982passed into C<write_file> still work in this case.
983
984If the first argument is an overloaded object then its stringified value
985is used for the filename and that file is opened. This is new feature
986in 9999.14. See the stringify.t test for an example.
635c7876 987
b3b7ff4e 988By default C<write_file> returns 1 upon successfully writing the file or
989undef if it encountered an error. You can change how errors are handled
990with the C<err_mode> option.
635c7876 991
992The options are:
993
994=head3 binmode
995
b3b7ff4e 996If you set the binmode option, then its value is passed to a call to
997binmode on the opened handle. You can use this to set the file to be
998read in binary mode, utf8, etc. See perldoc -f binmode for more.
635c7876 999
1000 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
b3b7ff4e 1001 write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1002
1003=head3 perms
635c7876 1004
b3b7ff4e 1005The perms option sets the permissions of newly-created files. This value
1006is modified by your process's umask and defaults to 0666 (same as
1007sysopen).
1008
1009NOTE: this option is new as of File::Slurp version 9999.14;
635c7876 1010
1011=head3 buf_ref
1012
1013You can use this option to pass in a scalar reference which has the
1014data to be written. If this is set then any data arguments (including
1015the scalar reference shortcut) in @_ will be ignored. These are
b3b7ff4e 1016equivalent:
635c7876 1017
1018 write_file( $bin_file, { buf_ref => \$buffer } ) ;
1019 write_file( $bin_file, \$buffer ) ;
1020 write_file( $bin_file, $buffer ) ;
1021
1022=head3 atomic
1023
1024If you set this boolean option, the file will be written to in an
1025atomic fashion. A temporary file name is created by appending the pid
1026($$) to the file name argument and that file is spewed to. After the
1027file is closed it is renamed to the original file name (and rename is
1028an atomic operation on most OS's). If the program using this were to
1029crash in the middle of this, then the file with the pid suffix could
1030be left behind.
1031
1032=head3 append
1033
1034If you set this boolean option, the data will be written at the end of
f02156f2 1035the current file. Internally this sets the sysopen mode flag O_APPEND.
635c7876 1036
1037 write_file( $file, {append => 1}, @data ) ;
1038
b3b7ff4e 1039 You
1040can import append_file and it does the same thing.
635c7876 1041
1042=head3 no_clobber
1043
1044If you set this boolean option, an existing file will not be overwritten.
1045
1046 write_file( $file, {no_clobber => 1}, @data ) ;
1047
1048=head3 err_mode
1049
1050You can use this option to control how C<write_file> behaves when an
1051error occurs. This option defaults to 'croak'. You can set it to
1052'carp' or to 'quiet' to have no error handling other than the return
1053value. If the first call to C<write_file> fails it will carp and then
1054write to another file. If the second call to C<write_file> fails, it
1055will croak.
1056
1057 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1058
1059 # write a different file but croak if not found
1060 write_file( $other_file, \$data ) ;
1061 }
1062
1063=head2 overwrite_file
1064
1065This sub is just a typeglob alias to write_file since write_file
1066always overwrites an existing file. This sub is supported for
1067backwards compatibility with the original version of this module. See
1068write_file for its API and behavior.
1069
1070=head2 append_file
1071
1072This sub will write its data to the end of the file. It is a wrapper
1073around write_file and it has the same API so see that for the full
b3b7ff4e 1074documentation. These calls are equivalent:
635c7876 1075
1076 append_file( $file, @data ) ;
1077 write_file( $file, {append => 1}, @data ) ;
1078
b3b7ff4e 1079
1080=head2 prepend_file
1081
1082This sub writes data to the beginning of a file. The previously existing
1083data is written after that so the effect is prepending data in front of
1084a file. It is a counterpart to the append_file sub in this module. It
1085works by first using C<read_file> to slurp in the file and then calling
1086C<write_file> with the new data and the existing file data.
1087
1088The first argument to C<prepend_file> is the filename. The next argument
1089is an optional hash reference and it contains key/values that can modify
1090the behavior of C<prepend_file>. The rest of the argument list is the
1091data to be written to the file and that is passed to C<write_file> as is
1092(see that for allowed data).
1093
1094Only the C<binmode> and C<err_mode> options are supported. The
1095C<write_file> call has the C<atomic> option set so you will always have
1096a consistant file. See above for more about those options.
1097
1098C<prepend_file> is not exported by default, you need to import it
1099explicitly.
1100
1101 use File::Slurp qw( prepend_file ) ;
1102 prepend_file( $file, $header ) ;
1103 prepend_file( $file, \@lines ) ;
1104 prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1105
635c7876 1106=head2 read_dir
1107
1108This sub reads all the file names from directory and returns them to
1109the caller but C<.> and C<..> are removed by default.
1110
1111 my @files = read_dir( '/path/to/dir' ) ;
1112
b3b7ff4e 1113The first argument is the path to the directory to read. If the next
1114argument is a hash reference, then it is used as the options.
1115Otherwise the rest of the argument list are is used as key/value
1116options.
635c7876 1117
b3b7ff4e 1118In list context C<read_dir> returns a list of the entries in the
635c7876 1119directory. In a scalar context it returns an array reference which has
1120the entries.
1121
b3b7ff4e 1122=head3 err_mode
1123
1124If the C<err_mode> option is set, it selects how errors are handled (see
1125C<err_mode> in C<read_file> or C<write_file>).
1126
635c7876 1127=head3 keep_dot_dot
1128
1129If this boolean option is set, C<.> and C<..> are not removed from the
1130list of files.
1131
1132 my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1133
1134=head2 EXPORT
1135
1136 read_file write_file overwrite_file append_file read_dir
1137
f02156f2 1138=head2 LICENSE
1139
1140 Same as Perl.
1141
635c7876 1142=head2 SEE ALSO
1143
1144An article on file slurping in extras/slurp_article.pod. There is
1145also a benchmarking script in extras/slurp_bench.pl.
1146
1147=head2 BUGS
1148
1149If run under Perl 5.004, slurping from the DATA handle will fail as
1150that requires B.pm which didn't get into core until 5.005.
1151
1152=head1 AUTHOR
1153
b3b7ff4e 1154Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
635c7876 1155
1156=cut