lib/File/Slurp.pm

   1 package File::Slurp;
   2
   3 my $printed ;
   4
   5 use strict;
   6
   7 use Carp ;
   8 use Exporter ;
   9 use Fcntl qw( :DEFAULT ) ;
  10 use POSIX qw( :fcntl_h ) ;
  11 use Symbol ;
  12 use UNIVERSAL ;
  13
  14 use vars qw( @ISA %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ;
  15 @ISA = qw( Exporter ) ;
  16
  17 %EXPORT_TAGS = ( 'all' => [
  18         qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
  19
  20 @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
  21 @EXPORT_OK = qw( slurp prepend_file ) ;
  22
  23 $VERSION = '9999.16';
  24
  25 my $max_fast_slurp_size = 1024 * 100 ;
  26
  27 my $is_win32 = $^O =~ /win32/i ;
  28
  29 # Install subs for various constants that aren't set in older perls
  30 # (< 5.005).  Fcntl on old perls uses Exporter to define subs without a
  31 # () prototype These can't be overridden with the constant pragma or
  32 # we get a prototype mismatch.  Hence this less than aesthetically
  33 # appealing BEGIN block:
  34
  35 BEGIN {
  36         unless( defined &SEEK_SET ) {
  37                 *SEEK_SET = sub { 0 };
  38                 *SEEK_CUR = sub { 1 };
  39                 *SEEK_END = sub { 2 };
  40         }
  41
  42         unless( defined &O_BINARY ) {
  43                 *O_BINARY = sub { 0 };
  44                 *O_RDONLY = sub { 0 };
  45                 *O_WRONLY = sub { 1 };
  46         }
  47
  48         unless ( defined &O_APPEND ) {
  49
  50                 if ( $^O =~ /olaris/ ) {
  51                         *O_APPEND = sub { 8 };
  52                         *O_CREAT = sub { 256 };
  53                         *O_EXCL = sub { 1024 };
  54                 }
  55                 elsif ( $^O =~ /inux/ ) {
  56                         *O_APPEND = sub { 1024 };
  57                         *O_CREAT = sub { 64 };
  58                         *O_EXCL = sub { 128 };
  59                 }
  60                 elsif ( $^O =~ /BSD/i ) {
  61                         *O_APPEND = sub { 8 };
  62                         *O_CREAT = sub { 512 };
  63                         *O_EXCL = sub { 2048 };
  64                 }
  65         }
  66 }
  67
  68 # print "OS [$^O]\n" ;
  69
  70 # print "O_BINARY = ", O_BINARY(), "\n" ;
  71 # print "O_RDONLY = ", O_RDONLY(), "\n" ;
  72 # print "O_WRONLY = ", O_WRONLY(), "\n" ;
  73 # print "O_APPEND = ", O_APPEND(), "\n" ;
  74 # print "O_CREAT   ", O_CREAT(), "\n" ;
  75 # print "O_EXCL   ", O_EXCL(), "\n" ;
  76
  77
  78 *slurp = \&read_file ;
  79
  80 sub read_file {
  81
  82         my $file_name = shift ;
  83         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
  84
  85         if ( !ref $file_name && 0 &&
  86              -e $file_name && -s _ < $max_fast_slurp_size && ! %{$opts} && !wantarray ) {
  87
  88                 local( *FH ) ;
  89
  90                 unless( open( FH, $file_name ) ) {
  91
  92                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
  93                         goto &_error ;
  94                 }
  95
  96                 my $read_cnt = sysread( FH, my $buf, -s _ ) ;
  97
  98                 unless ( defined $read_cnt ) {
  99
 100 # handle the read error
 101
 102                         @_ = ( $opts,
 103                                 "read_file '$file_name' - small sysread: $!");
 104                         goto &_error ;
 105                 }
 106
 107                 return $buf ;
 108         }
 109
 110 # set the buffer to either the passed in one or ours and init it to the null
 111 # string
 112
 113         my $buf ;
 114         my $buf_ref = $opts->{'buf_ref'} || \$buf ;
 115         ${$buf_ref} = '' ;
 116
 117         my( $read_fh, $size_left, $blk_size ) ;
 118
 119 # deal with ref for a file name
 120 # it could be an open handle or an overloaded object
 121
 122         if ( ref $file_name ) {
 123
 124                 my $ref_result = _check_ref( $file_name ) ;
 125
 126                 if ( ref $ref_result ) {
 127
 128 # we got an error, deal with it
 129
 130                         @_ = ( $opts, $ref_result ) ;
 131                         goto &_error ;
 132                 }
 133
 134                 if ( $ref_result ) {
 135
 136 # we got an overloaded object and the result is the stringified value
 137 # use it as the file name
 138
 139                         $file_name = $ref_result ;
 140                 }
 141                 else {
 142
 143 # here we have just an open handle. set $read_fh so we don't do a sysopen
 144
 145                         $read_fh = $file_name ;
 146                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 147                         $size_left = $blk_size ;
 148                 }
 149         }
 150
 151 # see if we have a path we need to open
 152
 153         unless ( $read_fh ) {
 154
 155 # a regular file. set the sysopen mode
 156
 157                 my $mode = O_RDONLY ;
 158
 159 #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
 160
 161 # open the file and handle any error
 162
 163                 $read_fh = gensym ;
 164                 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
 165                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 166                         goto &_error ;
 167                 }
 168
 169                 if ( my $binmode = $opts->{'binmode'} ) {
 170                         binmode( $read_fh, $binmode ) ;
 171                 }
 172
 173 # get the size of the file for use in the read loop
 174
 175                 $size_left = -s $read_fh ;
 176
 177 #print "SIZE $size_left\n" ;
 178
 179
 180 # we need a blk_size if the size is 0 so we can handle pseudofiles like in
 181 # /proc. these show as 0 size but have data to be slurped.
 182
 183                 unless( $size_left ) {
 184
 185                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 186                         $size_left = $blk_size ;
 187                 }
 188         }
 189
 190
 191 #       if ( $size_left < 10000 && keys %{$opts} == 0 && !wantarray ) {
 192
 193 # #print "OPT\n" and $printed++ unless $printed ;
 194
 195 #               my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
 196
 197 #               unless ( defined $read_cnt ) {
 198
 199 # # handle the read error
 200
 201 #                       @_ = ( $opts, "read_file '$file_name' - small2 sysread: $!");
 202 #                       goto &_error ;
 203 #               }
 204
 205 #               return $buf ;
 206 #       }
 207
 208 # infinite read loop. we exit when we are done slurping
 209
 210         while( 1 ) {
 211
 212 # do the read and see how much we got
 213
 214                 my $read_cnt = sysread( $read_fh, ${$buf_ref},
 215                                 $size_left, length ${$buf_ref} ) ;
 216
 217                 unless ( defined $read_cnt ) {
 218
 219 # handle the read error
 220
 221                         @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
 222                         goto &_error ;
 223                 }
 224
 225 # good read. see if we hit EOF (nothing left to read)
 226
 227                 last if $read_cnt == 0 ;
 228
 229 # loop if we are slurping a handle. we don't track $size_left then.
 230
 231                 next if $blk_size ;
 232
 233 # count down how much we read and loop if we have more to read.
 234
 235                 $size_left -= $read_cnt ;
 236                 last if $size_left <= 0 ;
 237         }
 238
 239 # fix up cr/lf to be a newline if this is a windows text file
 240
 241         ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
 242
 243 # this is the 5 returns in a row. each handles one possible
 244 # combination of caller context and requested return type
 245
 246         my $sep = $/ ;
 247         $sep = '\n\n+' if defined $sep && $sep eq '' ;
 248
 249 # see if caller wants lines
 250
 251         if( wantarray || $opts->{'array_ref'} ) {
 252
 253                 my @parts = split m/($sep)/, ${$buf_ref}, -1;
 254
 255                 my @lines ;
 256
 257                 while( @parts > 2 ) {
 258
 259                         my( $line, $sep ) = splice( @parts, 0, 2 ) ;
 260                         push @lines, "$line$sep" ;
 261                 }
 262
 263                 push @lines, shift @parts if @parts && length $parts[0] ;
 264
 265                 return \@lines if $opts->{'array_ref'} ;
 266                 return @lines ;
 267         }
 268
 269 # caller wants a scalar ref to the slurped text
 270
 271         return $buf_ref if $opts->{'scalar_ref'} ;
 272
 273 # caller wants a scalar with the slurped text (normal scalar context)
 274
 275         return ${$buf_ref} if defined wantarray ;
 276
 277 # caller passed in an i/o buffer by reference (normal void context)
 278
 279         return ;
 280
 281
 282 # # caller wants to get an array ref of lines
 283
 284 # # this split doesn't work since it tries to use variable length lookbehind
 285 # # the m// line works.
 286 # #     return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $opts->{'array_ref'}  ;
 287 #       return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ]
 288 #               if $opts->{'array_ref'}  ;
 289
 290 # # caller wants a list of lines (normal list context)
 291
 292 # # same problem with this split as before.
 293 # #     return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ;
 294 #       return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : ()
 295 #               if wantarray ;
 296
 297 # # caller wants a scalar ref to the slurped text
 298
 299 #       return $buf_ref if $opts->{'scalar_ref'} ;
 300
 301 # # caller wants a scalar with the slurped text (normal scalar context)
 302
 303 #       return ${$buf_ref} if defined wantarray ;
 304
 305 # # caller passed in an i/o buffer by reference (normal void context)
 306
 307 #       return ;
 308 }
 309
 310
 311 # errors in this sub are returned as scalar refs
 312 # a normal IO/GLOB handle is an empty return
 313 # an overloaded object returns its stringified as a scalarfilename
 314
 315 sub _check_ref {
 316
 317         my( $handle ) = @_ ;
 318
 319 # check if we are reading from a handle (GLOB or IO object)
 320
 321         if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
 322
 323 # we have a handle. deal with seeking to it if it is DATA
 324
 325                 my $err = _seek_data_handle( $handle ) ;
 326
 327 # return the error string if any
 328
 329                 return \$err if $err ;
 330
 331 # we have good handle
 332                 return ;
 333         }
 334
 335         eval { require overload } ;
 336
 337 # return an error if we can't load the overload pragma
 338 # or if the object isn't overloaded
 339
 340         return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
 341                  if $@ || !overload::Overloaded( $handle ) ;
 342
 343 # must be overloaded so return its stringified value
 344
 345         return "$handle" ;
 346 }
 347
 348 sub _seek_data_handle {
 349
 350         my( $handle ) = @_ ;
 351
 352 # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
 353 # glob/handle. only the DATA handle is untainted (since it is from
 354 # trusted data in the source file). this allows us to test if this is
 355 # the DATA handle and then to do a sysseek to make sure it gets
 356 # slurped correctly. on some systems, the buffered i/o pointer is not
 357 # left at the same place as the fd pointer. this sysseek makes them
 358 # the same so slurping with sysread will work.
 359
 360         eval{ require B } ;
 361
 362         if ( $@ ) {
 363
 364                 return <<ERR ;
 365 Can't find B.pm with this Perl: $!.
 366 That module is needed to properly slurp the DATA handle.
 367 ERR
 368         }
 369
 370         if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
 371
 372 # set the seek position to the current tell.
 373
 374                 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
 375                         return "read_file '$handle' - sysseek: $!" ;
 376                 }
 377         }
 378
 379 # seek was successful, return no error string
 380
 381         return ;
 382 }
 383
 384
 385 sub write_file {
 386
 387         my $file_name = shift ;
 388
 389 # get the optional argument hash ref from @_ or an empty hash ref.
 390
 391         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 392
 393         my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
 394
 395 # get the buffer ref - it depends on how the data is passed into write_file
 396 # after this if/else $buf_ref will have a scalar ref to the data.
 397
 398         if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
 399
 400 # a scalar ref passed in %opts has the data
 401 # note that the data was passed by ref
 402
 403                 $buf_ref = $opts->{'buf_ref'} ;
 404                 $data_is_ref = 1 ;
 405         }
 406         elsif ( ref $_[0] eq 'SCALAR' ) {
 407
 408 # the first value in @_ is the scalar ref to the data
 409 # note that the data was passed by ref
 410
 411                 $buf_ref = shift ;
 412                 $data_is_ref = 1 ;
 413         }
 414         elsif ( ref $_[0] eq 'ARRAY' ) {
 415
 416 # the first value in @_ is the array ref to the data so join it.
 417
 418                 ${$buf_ref} = join '', @{$_[0]} ;
 419         }
 420         else {
 421
 422 # good old @_ has all the data so join it.
 423
 424                 ${$buf_ref} = join '', @_ ;
 425         }
 426
 427 # deal with ref for a file name
 428
 429         if ( ref $file_name ) {
 430
 431                 my $ref_result = _check_ref( $file_name ) ;
 432
 433                 if ( ref $ref_result ) {
 434
 435 # we got an error, deal with it
 436
 437                         @_ = ( $opts, $ref_result ) ;
 438                         goto &_error ;
 439                 }
 440
 441                 if ( $ref_result ) {
 442
 443 # we got an overloaded object and the result is the stringified value
 444 # use it as the file name
 445
 446                         $file_name = $ref_result ;
 447                 }
 448                 else {
 449
 450 # we now have a proper handle ref.
 451 # make sure we don't call truncate on it.
 452
 453                         $write_fh = $file_name ;
 454                         $no_truncate = 1 ;
 455                 }
 456         }
 457
 458 # see if we have a path we need to open
 459
 460         unless( $write_fh ) {
 461
 462 # spew to regular file.
 463
 464                 if ( $opts->{'atomic'} ) {
 465
 466 # in atomic mode, we spew to a temp file so make one and save the original
 467 # file name.
 468                         $orig_file_name = $file_name ;
 469                         $file_name .= ".$$" ;
 470                 }
 471
 472 # set the mode for the sysopen
 473
 474                 my $mode = O_WRONLY | O_CREAT ;
 475                 $mode |= O_APPEND if $opts->{'append'} ;
 476                 $mode |= O_EXCL if $opts->{'no_clobber'} ;
 477
 478                 my $perms = $opts->{perms} ;
 479                 $perms = 0666 unless defined $perms ;
 480
 481 #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
 482
 483 # open the file and handle any error.
 484
 485                 $write_fh = gensym ;
 486                 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
 487
 488                         @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
 489                         goto &_error ;
 490                 }
 491         }
 492
 493         if ( my $binmode = $opts->{'binmode'} ) {
 494                 binmode( $write_fh, $binmode ) ;
 495         }
 496
 497         sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
 498
 499
 500 #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 501
 502 # fix up newline to write cr/lf if this is a windows text file
 503
 504         if ( $is_win32 && !$opts->{'binmode'} ) {
 505
 506 # copy the write data if it was passed by ref so we don't clobber the
 507 # caller's data
 508                 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
 509                 ${$buf_ref} =~ s/\n/\015\012/g ;
 510         }
 511
 512 #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 513
 514 # get the size of how much we are writing and init the offset into that buffer
 515
 516         my $size_left = length( ${$buf_ref} ) ;
 517         my $offset = 0 ;
 518
 519 # loop until we have no more data left to write
 520
 521         do {
 522
 523 # do the write and track how much we just wrote
 524
 525                 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
 526                                 $size_left, $offset ) ;
 527
 528                 unless ( defined $write_cnt ) {
 529
 530 # the write failed
 531                         @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
 532                         goto &_error ;
 533                 }
 534
 535 # track much left to write and where to write from in the buffer
 536
 537                 $size_left -= $write_cnt ;
 538                 $offset += $write_cnt ;
 539
 540         } while( $size_left > 0 ) ;
 541
 542 # we truncate regular files in case we overwrite a long file with a shorter file
 543 # so seek to the current position to get it (same as tell()).
 544
 545         truncate( $write_fh,
 546                   sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
 547
 548         close( $write_fh ) ;
 549
 550 # handle the atomic mode - move the temp file to the original filename.
 551
 552         if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
 553
 554                 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
 555                 goto &_error ;
 556         }
 557
 558         return 1 ;
 559 }
 560
 561 # this is for backwards compatibility with the previous File::Slurp module.
 562 # write_file always overwrites an existing file
 563
 564 *overwrite_file = \&write_file ;
 565
 566 # the current write_file has an append mode so we use that. this
 567 # supports the same API with an optional second argument which is a
 568 # hash ref of options.
 569
 570 sub append_file {
 571
 572 # get the optional opts hash ref
 573         my $opts = $_[1] ;
 574         if ( ref $opts eq 'HASH' ) {
 575
 576 # we were passed an opts ref so just mark the append mode
 577
 578                 $opts->{append} = 1 ;
 579         }
 580         else {
 581
 582 # no opts hash so insert one with the append mode
 583
 584                 splice( @_, 1, 0, { append => 1 } ) ;
 585         }
 586
 587 # magic goto the main write_file sub. this overlays the sub without touching
 588 # the stack or @_
 589
 590         goto &write_file
 591 }
 592
 593 # basic wrapper around opendir/readdir
 594
 595 # prepend data to the beginning of a file
 596
 597 sub prepend_file {
 598
 599         my $file_name = shift ;
 600
 601 #print "FILE $file_name\n" ;
 602
 603         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 604
 605 # delete unsupported options
 606
 607         my @bad_opts =
 608                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 609
 610         delete @{$opts}{@bad_opts} ;
 611
 612         my $prepend_data = shift ;
 613         $prepend_data = '' unless defined $prepend_data ;
 614         $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
 615
 616 #print "PRE [$prepend_data]\n" ;
 617
 618
 619 ###### set croak as error_mode
 620 ###### wrap in eval
 621
 622         my $err_mode = delete $opts->{err_mode} ;
 623         $opts->{ err_mode } = 'croak' ;
 624         $opts->{ scalar_ref } = 1 ;
 625
 626         my $existing_data ;
 627         eval { $existing_data = read_file( $file_name, $opts ) } ;
 628
 629         if ( $@ ) {
 630
 631                 @_ = ( { err_mode => $err_mode },
 632                         "prepend_file '$file_name' - read_file: $!" ) ;
 633                 goto &_error ;
 634         }
 635
 636 #print "EXIST [$$existing_data]\n" ;
 637
 638         $opts->{ atomic } = 1 ;
 639
 640         my $write_result = eval {
 641                 write_file( $file_name, $opts,
 642                         $prepend_data, $$existing_data ) ;
 643         } ;
 644
 645         if ( $@ ) {
 646
 647                 @_ = ( { err_mode => $err_mode },
 648                         "prepend_file '$file_name' - write_file: $!" ) ;
 649                 goto &_error ;
 650         }
 651
 652         return $write_result ;
 653 }
 654
 655 sub read_dir {
 656
 657         my $dir = shift ;
 658         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
 659
 660 # this handle will be destroyed upon return
 661
 662         local(*DIRH);
 663
 664 # open the dir and handle any errors
 665
 666         unless ( opendir( DIRH, $dir ) ) {
 667
 668                 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
 669                 goto &_error ;
 670         }
 671
 672         my @dir_entries = readdir(DIRH) ;
 673
 674         @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
 675                 unless $opts->{'keep_dot_dot'} ;
 676
 677         return @dir_entries if wantarray ;
 678         return \@dir_entries ;
 679 }
 680
 681 # error handling section
 682 #
 683 # all the error handling uses magic goto so the caller will get the
 684 # error message as if from their code and not this module. if we just
 685 # did a call on the error code, the carp/croak would report it from
 686 # this module since the error sub is one level down on the call stack
 687 # from read_file/write_file/read_dir.
 688
 689
 690 my %err_func = (
 691         'carp'  => \&carp,
 692         'croak' => \&croak,
 693 ) ;
 694
 695 sub _error {
 696
 697         my( $opts, $err_msg ) = @_ ;
 698
 699 # get the error function to use
 700
 701         my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
 702
 703 # if we didn't find it in our error function hash, they must have set
 704 # it to quiet and we don't do anything.
 705
 706         return unless $func ;
 707
 708 # call the carp/croak function
 709
 710         $func->($err_msg) if $func ;
 711
 712 # return a hard undef (in list context this will be a single value of
 713 # undef which is not a legal in-band value)
 714
 715         return undef ;
 716 }
 717
 718 1;
 719 __END__
 720
 721 =head1 NAME
 722
 723 File::Slurp - Simple and Efficient Reading/Writing of Complete Files
 724
 725 =head1 SYNOPSIS
 726
 727   use File::Slurp;
 728
 729 # read in a whole file into a scalar
 730
 731   my $text = read_file( 'filename' ) ;
 732
 733 # read in a whole file into an array of lines
 734
 735   my @lines = read_file( 'filename' ) ;
 736
 737 # write out a whole file from a scalar
 738
 739   write_file( 'filename', $text ) ;
 740
 741 # write out a whole file from an array of lines
 742
 743   write_file( 'filename', @lines ) ;
 744
 745 # Here is a simple and fast way to load and save a simple config file
 746 # made of key=value lines.
 747
 748   my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
 749   write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
 750
 751 # read in a whole directory of file names (skipping . and ..)
 752
 753   my @files = read_dir( '/path/to/dir' ) ;
 754
 755 =head1 DESCRIPTION
 756
 757 This module provides subs that allow you to read or write entire files
 758 with one simple call. They are designed to be simple to use, have
 759 flexible ways to pass in or get the file contents and to be very
 760 efficient.  There is also a sub to read in all the files in a
 761 directory other than C<.> and C<..>
 762
 763 These slurp/spew subs work for files, pipes and sockets, stdio,
 764 pseudo-files, and the DATA handle. Read more about why slurping files is
 765 a good thing in the file 'slurp_article.pod' in the extras/ directory.
 766
 767 If you are interested in how fast these calls work, check out the
 768 slurp_bench.pl program in the extras/ directory. It compares many
 769 different forms of slurping. You can select the I/O direction, context
 770 and file sizes. Use the --help option to see how to run it.
 771
 772 =head2 B<read_file>
 773
 774 This sub reads in an entire file and returns its contents to the
 775 caller.  In scalar context it returns the entire file as a single
 776 scalar. In list context it will return a list of lines (using the
 777 current value of $/ as the separator including support for paragraph
 778 mode when it is set to '').
 779
 780   my $text = read_file( 'filename' ) ;
 781   my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
 782   my @lines = read_file( 'filename' ) ;
 783   my $lines = read_file( 'filename', array_ref => 1 ) ;
 784
 785 The first argument is the file to slurp in. If the next argument is a
 786 hash reference, then it is used as the options. Otherwise the rest of
 787 the argument list are is used as key/value options.
 788
 789 If the file argument is a handle (if it is a ref and is an IO or GLOB
 790 object), then that handle is slurped in. This mode is supported so you
 791 slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
 792 an example that does C<open( '-|' )> and the child process spews data
 793 to the parant which slurps it in.  All of the options that control how
 794 the data is returned to the caller still work in this case.
 795
 796 If the first argument is an overloaded object then its stringified value
 797 is used for the filename and that file is opened.  This is a new feature
 798 in 9999.14. See the stringify.t test for an example.
 799
 800 By default C<read_file> returns an undef in scalar contex or a single
 801 undef in list context if it encounters an error. Those are both
 802 impossible to get with a clean read_file call which means you can check
 803 the return value and always know if you had an error. You can change how
 804 errors are handled with the C<err_mode> option.
 805
 806 NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
 807 handle. It used to need a sysseek workaround but that is now handled
 808 when needed by the module itself.
 809
 810 You can optionally request that C<slurp()> is exported to your code. This
 811 is an alias for read_file and is meant to be forward compatible with
 812 Perl 6 (which will have slurp() built-in).
 813
 814 The options for C<read_file> are:
 815
 816 =head3 binmode
 817
 818 If you set the binmode option, then its value is passed to a call to
 819 binmode on the opened handle. You can use this to set the file to be
 820 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 821
 822         my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
 823         my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
 824
 825 =head3 array_ref
 826
 827 If this boolean option is set, the return value (only in scalar
 828 context) will be an array reference which contains the lines of the
 829 slurped file. The following two calls are equivalent:
 830
 831         my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
 832         my $lines_ref = [ read_file( $bin_file ) ] ;
 833
 834 =head3 scalar_ref
 835
 836 If this boolean option is set, the return value (only in scalar context)
 837 will be an scalar reference to a string which is the contents of the
 838 slurped file. This will usually be faster than returning the plain
 839 scalar. It will also save memory as it will not make a copy of the file
 840 to return.
 841
 842         my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
 843
 844 =head3 buf_ref
 845
 846 You can use this option to pass in a scalar reference and the slurped
 847 file contents will be stored in the scalar. This can be used in
 848 conjunction with any of the other options. This saves an extra copy of
 849 the slurped file and can lower ram usage vs returning the file.
 850
 851         read_file( $bin_file, buf_ref => \$buffer ) ;
 852
 853 =head3 blk_size
 854
 855 You can use this option to set the block size used when slurping from
 856 an already open handle (like \*STDIN). It defaults to 1MB.
 857
 858         my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
 859                                              array_ref => 1 ) ;
 860
 861 =head3 err_mode
 862
 863 You can use this option to control how read_file behaves when an error
 864 occurs. This option defaults to 'croak'. You can set it to 'carp' or to
 865 'quiet to have no special error handling. This code wants to carp and
 866 then read another file if it fails.
 867
 868         my $text_ref = read_file( $file, err_mode => 'carp' ) ;
 869         unless ( $text_ref ) {
 870
 871                 # read a different file but croak if not found
 872                 $text_ref = read_file( $another_file ) ;
 873         }
 874
 875         # process ${$text_ref}
 876
 877 =head2 B<write_file>
 878
 879 This sub writes out an entire file in one call.
 880
 881   write_file( 'filename', @data ) ;
 882
 883 The first argument to C<write_file> is the filename. The next argument
 884 is an optional hash reference and it contains key/values that can
 885 modify the behavior of C<write_file>. The rest of the argument list is
 886 the data to be written to the file.
 887
 888   write_file( 'filename', {append => 1 }, @data ) ;
 889   write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
 890
 891 As a shortcut if the first data argument is a scalar or array reference,
 892 it is used as the only data to be written to the file. Any following
 893 arguments in @_ are ignored. This is a faster way to pass in the output
 894 to be written to the file and is equivalent to the C<buf_ref> option of
 895 C<read_file>. These following pairs are equivalent but the pass by
 896 reference call will be faster in most cases (especially with larger
 897 files).
 898
 899   write_file( 'filename', \$buffer ) ;
 900   write_file( 'filename', $buffer ) ;
 901
 902   write_file( 'filename', \@lines ) ;
 903   write_file( 'filename', @lines ) ;
 904
 905 If the first argument is a handle (if it is a ref and is an IO or GLOB
 906 object), then that handle is written to. This mode is supported so you
 907 spew to handles such as \*STDOUT. See the test handle.t for an example
 908 that does C<open( '-|' )> and child process spews data to the parent
 909 which slurps it in.  All of the options that control how the data are
 910 passed into C<write_file> still work in this case.
 911
 912 If the first argument is an overloaded object then its stringified value
 913 is used for the filename and that file is opened.  This is new feature
 914 in 9999.14. See the stringify.t test for an example.
 915
 916 By default C<write_file> returns 1 upon successfully writing the file or
 917 undef if it encountered an error. You can change how errors are handled
 918 with the C<err_mode> option.
 919
 920 The options are:
 921
 922 =head3 binmode
 923
 924 If you set the binmode option, then its value is passed to a call to
 925 binmode on the opened handle. You can use this to set the file to be
 926 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 927
 928         write_file( $bin_file, {binmode => ':raw'}, @data ) ;
 929         write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
 930
 931 =head3 perms
 932
 933 The perms option sets the permissions of newly-created files. This value
 934 is modified by your process's umask and defaults to 0666 (same as
 935 sysopen).
 936
 937 NOTE: this option is new as of File::Slurp version 9999.14;
 938
 939 =head3 buf_ref
 940
 941 You can use this option to pass in a scalar reference which has the
 942 data to be written. If this is set then any data arguments (including
 943 the scalar reference shortcut) in @_ will be ignored. These are
 944 equivalent:
 945
 946         write_file( $bin_file, { buf_ref => \$buffer } ) ;
 947         write_file( $bin_file, \$buffer ) ;
 948         write_file( $bin_file, $buffer ) ;
 949
 950 =head3 atomic
 951
 952 If you set this boolean option, the file will be written to in an
 953 atomic fashion. A temporary file name is created by appending the pid
 954 ($$) to the file name argument and that file is spewed to. After the
 955 file is closed it is renamed to the original file name (and rename is
 956 an atomic operation on most OS's). If the program using this were to
 957 crash in the middle of this, then the file with the pid suffix could
 958 be left behind.
 959
 960 =head3 append
 961
 962 If you set this boolean option, the data will be written at the end of
 963 the current file. Internally this sets the sysopen mode flag O_APPEND.
 964
 965         write_file( $file, {append => 1}, @data ) ;
 966
 967  You
 968 can import append_file and it does the same thing.
 969
 970 =head3 no_clobber
 971
 972 If you set this boolean option, an existing file will not be overwritten.
 973
 974         write_file( $file, {no_clobber => 1}, @data ) ;
 975
 976 =head3 err_mode
 977
 978 You can use this option to control how C<write_file> behaves when an
 979 error occurs. This option defaults to 'croak'. You can set it to
 980 'carp' or to 'quiet' to have no error handling other than the return
 981 value. If the first call to C<write_file> fails it will carp and then
 982 write to another file. If the second call to C<write_file> fails, it
 983 will croak.
 984
 985         unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
 986
 987                 # write a different file but croak if not found
 988                 write_file( $other_file, \$data ) ;
 989         }
 990
 991 =head2 overwrite_file
 992
 993 This sub is just a typeglob alias to write_file since write_file
 994 always overwrites an existing file. This sub is supported for
 995 backwards compatibility with the original version of this module. See
 996 write_file for its API and behavior.
 997
 998 =head2 append_file
 999
1000 This sub will write its data to the end of the file. It is a wrapper
1001 around write_file and it has the same API so see that for the full
1002 documentation. These calls are equivalent:
1003
1004         append_file( $file, @data ) ;
1005         write_file( $file, {append => 1}, @data ) ;
1006
1007
1008 =head2 prepend_file
1009
1010 This sub writes data to the beginning of a file. The previously existing
1011 data is written after that so the effect is prepending data in front of
1012 a file. It is a counterpart to the append_file sub in this module. It
1013 works by first using C<read_file> to slurp in the file and then calling
1014 C<write_file> with the new data and the existing file data.
1015
1016 The first argument to C<prepend_file> is the filename. The next argument
1017 is an optional hash reference and it contains key/values that can modify
1018 the behavior of C<prepend_file>. The rest of the argument list is the
1019 data to be written to the file and that is passed to C<write_file> as is
1020 (see that for allowed data).
1021
1022 Only the C<binmode> and C<err_mode> options are supported. The
1023 C<write_file> call has the C<atomic> option set so you will always have
1024 a consistant file. See above for more about those options.
1025
1026 C<prepend_file> is not exported by default, you need to import it
1027 explicitly.
1028
1029         use File::Slurp qw( prepend_file ) ;
1030         prepend_file( $file, $header ) ;
1031         prepend_file( $file, \@lines ) ;
1032         prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1033
1034 =head2 read_dir
1035
1036 This sub reads all the file names from directory and returns them to
1037 the caller but C<.> and C<..> are removed by default.
1038
1039         my @files = read_dir( '/path/to/dir' ) ;
1040
1041 The first argument is the path to the directory to read.  If the next
1042 argument is a hash reference, then it is used as the options.
1043 Otherwise the rest of the argument list are is used as key/value
1044 options.
1045
1046 In list context C<read_dir> returns a list of the entries in the
1047 directory. In a scalar context it returns an array reference which has
1048 the entries.
1049
1050 =head3 err_mode
1051
1052 If the C<err_mode> option is set, it selects how errors are handled (see
1053 C<err_mode> in C<read_file> or C<write_file>).
1054
1055 =head3 keep_dot_dot
1056
1057 If this boolean option is set, C<.> and C<..> are not removed from the
1058 list of files.
1059
1060         my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1061
1062 =head2 EXPORT
1063
1064   read_file write_file overwrite_file append_file read_dir
1065
1066 =head2 LICENSE
1067
1068   Same as Perl.
1069
1070 =head2 SEE ALSO
1071
1072 An article on file slurping in extras/slurp_article.pod. There is
1073 also a benchmarking script in extras/slurp_bench.pl.
1074
1075 =head2 BUGS
1076
1077 If run under Perl 5.004, slurping from the DATA handle will fail as
1078 that requires B.pm which didn't get into core until 5.005.
1079
1080 =head1 AUTHOR
1081
1082 Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
1083
1084 =cut