lib/File/Slurp.pm

   1 package File::Slurp;
   2
   3 use 5.6.2 ;
   4
   5 use strict;
   6
   7 use Carp ;
   8 use Exporter ;
   9 use Fcntl qw( :DEFAULT ) ;
  10 use POSIX qw( :fcntl_h ) ;
  11 use Symbol ;
  12
  13 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
  14 @ISA = qw( Exporter ) ;
  15
  16 $VERSION = '9999.17';
  17
  18 @EXPORT_OK = qw(
  19         slurp
  20         prepend_file
  21         edit_file
  22         edit_file_lines
  23 ) ;
  24
  25 %EXPORT_TAGS = ( 'all' => [ qw(
  26         read_file
  27         write_file
  28         overwrite_file
  29         append_file
  30         read_dir ),
  31         @EXPORT_OK
  32 ] ) ;
  33 @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
  34
  35 my $max_fast_slurp_size = 1024 * 100 ;
  36
  37 my $is_win32 = $^O =~ /win32/i ;
  38
  39 # Install subs for various constants that aren't set in older perls
  40 # (< 5.005).  Fcntl on old perls uses Exporter to define subs without a
  41 # () prototype These can't be overridden with the constant pragma or
  42 # we get a prototype mismatch.  Hence this less than aesthetically
  43 # appealing BEGIN block:
  44
  45 BEGIN {
  46         unless( defined &SEEK_SET ) {
  47                 *SEEK_SET = sub { 0 };
  48                 *SEEK_CUR = sub { 1 };
  49                 *SEEK_END = sub { 2 };
  50         }
  51
  52         unless( defined &O_BINARY ) {
  53                 *O_BINARY = sub { 0 };
  54                 *O_RDONLY = sub { 0 };
  55                 *O_WRONLY = sub { 1 };
  56         }
  57
  58         unless ( defined &O_APPEND ) {
  59
  60                 if ( $^O =~ /olaris/ ) {
  61                         *O_APPEND = sub { 8 };
  62                         *O_CREAT = sub { 256 };
  63                         *O_EXCL = sub { 1024 };
  64                 }
  65                 elsif ( $^O =~ /inux/ ) {
  66                         *O_APPEND = sub { 1024 };
  67                         *O_CREAT = sub { 64 };
  68                         *O_EXCL = sub { 128 };
  69                 }
  70                 elsif ( $^O =~ /BSD/i ) {
  71                         *O_APPEND = sub { 8 };
  72                         *O_CREAT = sub { 512 };
  73                         *O_EXCL = sub { 2048 };
  74                 }
  75         }
  76 }
  77
  78 # print "OS [$^O]\n" ;
  79
  80 # print "O_BINARY = ", O_BINARY(), "\n" ;
  81 # print "O_RDONLY = ", O_RDONLY(), "\n" ;
  82 # print "O_WRONLY = ", O_WRONLY(), "\n" ;
  83 # print "O_APPEND = ", O_APPEND(), "\n" ;
  84 # print "O_CREAT   ", O_CREAT(), "\n" ;
  85 # print "O_EXCL   ", O_EXCL(), "\n" ;
  86
  87
  88 *slurp = \&read_file ;
  89
  90 sub read_file {
  91
  92         my $file_name = shift ;
  93         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
  94
  95 # this is the optimized read_file for shorter files.
  96 # the test for -s > 0 is to allow pseudo files to be read with the
  97 # regular loop since they return a size of 0.
  98
  99         if ( !ref $file_name && -e $file_name && -s _ > 0 &&
 100              -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
 101
 102
 103                 my $fh ;
 104                 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
 105
 106                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 107                         goto &_error ;
 108                 }
 109
 110                 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
 111
 112                 unless ( defined $read_cnt ) {
 113
 114                         @_ = ( $opts,
 115                                 "read_file '$file_name' - small sysread: $!");
 116                         goto &_error ;
 117                 }
 118
 119                 $buf =~ s/\015\012/\n/g if $is_win32 ;
 120                 return $buf ;
 121         }
 122
 123 # set the buffer to either the passed in one or ours and init it to the null
 124 # string
 125
 126         my $buf ;
 127         my $buf_ref = $opts->{'buf_ref'} || \$buf ;
 128         ${$buf_ref} = '' ;
 129
 130         my( $read_fh, $size_left, $blk_size ) ;
 131
 132 # deal with ref for a file name
 133 # it could be an open handle or an overloaded object
 134
 135         if ( ref $file_name ) {
 136
 137                 my $ref_result = _check_ref( $file_name ) ;
 138
 139                 if ( ref $ref_result ) {
 140
 141 # we got an error, deal with it
 142
 143                         @_ = ( $opts, $ref_result ) ;
 144                         goto &_error ;
 145                 }
 146
 147                 if ( $ref_result ) {
 148
 149 # we got an overloaded object and the result is the stringified value
 150 # use it as the file name
 151
 152                         $file_name = $ref_result ;
 153                 }
 154                 else {
 155
 156 # here we have just an open handle. set $read_fh so we don't do a sysopen
 157
 158                         $read_fh = $file_name ;
 159                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 160                         $size_left = $blk_size ;
 161                 }
 162         }
 163
 164 # see if we have a path we need to open
 165
 166         unless ( $read_fh ) {
 167
 168 # a regular file. set the sysopen mode
 169
 170                 my $mode = O_RDONLY ;
 171
 172 #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
 173
 174                 $read_fh = gensym ;
 175                 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
 176                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 177                         goto &_error ;
 178                 }
 179
 180                 if ( my $binmode = $opts->{'binmode'} ) {
 181                         binmode( $read_fh, $binmode ) ;
 182                 }
 183
 184 # get the size of the file for use in the read loop
 185
 186                 $size_left = -s $read_fh ;
 187
 188 #print "SIZE $size_left\n" ;
 189
 190 # we need a blk_size if the size is 0 so we can handle pseudofiles like in
 191 # /proc. these show as 0 size but have data to be slurped.
 192
 193                 unless( $size_left ) {
 194
 195                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 196                         $size_left = $blk_size ;
 197                 }
 198         }
 199
 200
 201 #       if ( $size_left < 10000 && keys %{$opts} == 0 && !wantarray ) {
 202
 203 #               my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
 204
 205 #               unless ( defined $read_cnt ) {
 206
 207 #                       @_ = ( $opts, "read_file '$file_name' - small2 sysread: $!");
 208 #                       goto &_error ;
 209 #               }
 210
 211 #               return $buf ;
 212 #       }
 213
 214 # infinite read loop. we exit when we are done slurping
 215
 216         while( 1 ) {
 217
 218 # do the read and see how much we got
 219
 220                 my $read_cnt = sysread( $read_fh, ${$buf_ref},
 221                                 $size_left, length ${$buf_ref} ) ;
 222
 223                 unless ( defined $read_cnt ) {
 224
 225                         @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
 226                         goto &_error ;
 227                 }
 228
 229 # good read. see if we hit EOF (nothing left to read)
 230
 231                 last if $read_cnt == 0 ;
 232
 233 # loop if we are slurping a handle. we don't track $size_left then.
 234
 235                 next if $blk_size ;
 236
 237 # count down how much we read and loop if we have more to read.
 238
 239                 $size_left -= $read_cnt ;
 240                 last if $size_left <= 0 ;
 241         }
 242
 243 # fix up cr/lf to be a newline if this is a windows text file
 244
 245         ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
 246
 247         my $sep = $/ ;
 248         $sep = '\n\n+' if defined $sep && $sep eq '' ;
 249
 250 # see if caller wants lines
 251
 252         if( wantarray || $opts->{'array_ref'} ) {
 253
 254                 use re 'taint' ;
 255
 256                 my @lines = length(${$buf_ref}) ?
 257                         ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
 258
 259 # caller wants an array ref
 260
 261                 return \@lines if $opts->{'array_ref'} ;
 262
 263 # caller wants list of lines
 264
 265                 return @lines ;
 266         }
 267
 268 # caller wants a scalar ref to the slurped text
 269
 270         return $buf_ref if $opts->{'scalar_ref'} ;
 271
 272 # caller wants a scalar with the slurped text (normal scalar context)
 273
 274         return ${$buf_ref} if defined wantarray ;
 275
 276 # caller passed in an i/o buffer by reference (normal void context)
 277
 278         return ;
 279 }
 280
 281 # errors in this sub are returned as scalar refs
 282 # a normal IO/GLOB handle is an empty return
 283 # an overloaded object returns its stringified as a scalarfilename
 284
 285 sub _check_ref {
 286
 287         my( $handle ) = @_ ;
 288
 289 # check if we are reading from a handle (GLOB or IO object)
 290
 291         if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
 292
 293 # we have a handle. deal with seeking to it if it is DATA
 294
 295                 my $err = _seek_data_handle( $handle ) ;
 296
 297 # return the error string if any
 298
 299                 return \$err if $err ;
 300
 301 # we have good handle
 302                 return ;
 303         }
 304
 305         eval { require overload } ;
 306
 307 # return an error if we can't load the overload pragma
 308 # or if the object isn't overloaded
 309
 310         return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
 311                  if $@ || !overload::Overloaded( $handle ) ;
 312
 313 # must be overloaded so return its stringified value
 314
 315         return "$handle" ;
 316 }
 317
 318 sub _seek_data_handle {
 319
 320         my( $handle ) = @_ ;
 321
 322 # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
 323 # glob/handle. only the DATA handle is untainted (since it is from
 324 # trusted data in the source file). this allows us to test if this is
 325 # the DATA handle and then to do a sysseek to make sure it gets
 326 # slurped correctly. on some systems, the buffered i/o pointer is not
 327 # left at the same place as the fd pointer. this sysseek makes them
 328 # the same so slurping with sysread will work.
 329
 330         eval{ require B } ;
 331
 332         if ( $@ ) {
 333
 334                 return <<ERR ;
 335 Can't find B.pm with this Perl: $!.
 336 That module is needed to properly slurp the DATA handle.
 337 ERR
 338         }
 339
 340         if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
 341
 342 # set the seek position to the current tell.
 343
 344                 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
 345                         return "read_file '$handle' - sysseek: $!" ;
 346                 }
 347         }
 348
 349 # seek was successful, return no error string
 350
 351         return ;
 352 }
 353
 354
 355 sub write_file {
 356
 357         my $file_name = shift ;
 358
 359 # get the optional argument hash ref from @_ or an empty hash ref.
 360
 361         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 362
 363         my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
 364
 365 # get the buffer ref - it depends on how the data is passed into write_file
 366 # after this if/else $buf_ref will have a scalar ref to the data.
 367
 368         if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
 369
 370 # a scalar ref passed in %opts has the data
 371 # note that the data was passed by ref
 372
 373                 $buf_ref = $opts->{'buf_ref'} ;
 374                 $data_is_ref = 1 ;
 375         }
 376         elsif ( ref $_[0] eq 'SCALAR' ) {
 377
 378 # the first value in @_ is the scalar ref to the data
 379 # note that the data was passed by ref
 380
 381                 $buf_ref = shift ;
 382                 $data_is_ref = 1 ;
 383         }
 384         elsif ( ref $_[0] eq 'ARRAY' ) {
 385
 386 # the first value in @_ is the array ref to the data so join it.
 387
 388                 ${$buf_ref} = join '', @{$_[0]} ;
 389         }
 390         else {
 391
 392 # good old @_ has all the data so join it.
 393
 394                 ${$buf_ref} = join '', @_ ;
 395         }
 396
 397 # deal with ref for a file name
 398
 399         if ( ref $file_name ) {
 400
 401                 my $ref_result = _check_ref( $file_name ) ;
 402
 403                 if ( ref $ref_result ) {
 404
 405 # we got an error, deal with it
 406
 407                         @_ = ( $opts, $ref_result ) ;
 408                         goto &_error ;
 409                 }
 410
 411                 if ( $ref_result ) {
 412
 413 # we got an overloaded object and the result is the stringified value
 414 # use it as the file name
 415
 416                         $file_name = $ref_result ;
 417                 }
 418                 else {
 419
 420 # we now have a proper handle ref.
 421 # make sure we don't call truncate on it.
 422
 423                         $write_fh = $file_name ;
 424                         $no_truncate = 1 ;
 425                 }
 426         }
 427
 428 # see if we have a path we need to open
 429
 430         unless( $write_fh ) {
 431
 432 # spew to regular file.
 433
 434                 if ( $opts->{'atomic'} ) {
 435
 436 # in atomic mode, we spew to a temp file so make one and save the original
 437 # file name.
 438                         $orig_file_name = $file_name ;
 439                         $file_name .= ".$$" ;
 440                 }
 441
 442 # set the mode for the sysopen
 443
 444                 my $mode = O_WRONLY | O_CREAT ;
 445                 $mode |= O_APPEND if $opts->{'append'} ;
 446                 $mode |= O_EXCL if $opts->{'no_clobber'} ;
 447
 448                 my $perms = $opts->{perms} ;
 449                 $perms = 0666 unless defined $perms ;
 450
 451 #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
 452
 453 # open the file and handle any error.
 454
 455                 $write_fh = gensym ;
 456                 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
 457
 458                         @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
 459                         goto &_error ;
 460                 }
 461         }
 462
 463         if ( my $binmode = $opts->{'binmode'} ) {
 464                 binmode( $write_fh, $binmode ) ;
 465         }
 466
 467         sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
 468
 469 #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 470
 471 # fix up newline to write cr/lf if this is a windows text file
 472
 473         if ( $is_win32 && !$opts->{'binmode'} ) {
 474
 475 # copy the write data if it was passed by ref so we don't clobber the
 476 # caller's data
 477                 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
 478                 ${$buf_ref} =~ s/\n/\015\012/g ;
 479         }
 480
 481 #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 482
 483 # get the size of how much we are writing and init the offset into that buffer
 484
 485         my $size_left = length( ${$buf_ref} ) ;
 486         my $offset = 0 ;
 487
 488 # loop until we have no more data left to write
 489
 490         do {
 491
 492 # do the write and track how much we just wrote
 493
 494                 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
 495                                 $size_left, $offset ) ;
 496
 497                 unless ( defined $write_cnt ) {
 498
 499                         @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
 500                         goto &_error ;
 501                 }
 502
 503 # track how much left to write and where to write from in the buffer
 504
 505                 $size_left -= $write_cnt ;
 506                 $offset += $write_cnt ;
 507
 508         } while( $size_left > 0 ) ;
 509
 510 # we truncate regular files in case we overwrite a long file with a shorter file
 511 # so seek to the current position to get it (same as tell()).
 512
 513         truncate( $write_fh,
 514                   sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
 515
 516         close( $write_fh ) ;
 517
 518 # handle the atomic mode - move the temp file to the original filename.
 519
 520         if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
 521
 522                 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
 523                 goto &_error ;
 524         }
 525
 526         return 1 ;
 527 }
 528
 529 # this is for backwards compatibility with the previous File::Slurp module.
 530 # write_file always overwrites an existing file
 531
 532 *overwrite_file = \&write_file ;
 533
 534 # the current write_file has an append mode so we use that. this
 535 # supports the same API with an optional second argument which is a
 536 # hash ref of options.
 537
 538 sub append_file {
 539
 540 # get the optional opts hash ref
 541         my $opts = $_[1] ;
 542         if ( ref $opts eq 'HASH' ) {
 543
 544 # we were passed an opts ref so just mark the append mode
 545
 546                 $opts->{append} = 1 ;
 547         }
 548         else {
 549
 550 # no opts hash so insert one with the append mode
 551
 552                 splice( @_, 1, 0, { append => 1 } ) ;
 553         }
 554
 555 # magic goto the main write_file sub. this overlays the sub without touching
 556 # the stack or @_
 557
 558         goto &write_file
 559 }
 560
 561 # prepend data to the beginning of a file
 562
 563 sub prepend_file {
 564
 565         my $file_name = shift ;
 566
 567 #print "FILE $file_name\n" ;
 568
 569         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 570
 571 # delete unsupported options
 572
 573         my @bad_opts =
 574                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 575
 576         delete @{$opts}{@bad_opts} ;
 577
 578         my $prepend_data = shift ;
 579         $prepend_data = '' unless defined $prepend_data ;
 580         $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
 581
 582 #print "PRE [$prepend_data]\n" ;
 583
 584         my $err_mode = delete $opts->{err_mode} ;
 585         $opts->{ err_mode } = 'croak' ;
 586         $opts->{ scalar_ref } = 1 ;
 587
 588         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 589
 590         if ( $@ ) {
 591
 592                 @_ = ( { err_mode => $err_mode },
 593                         "prepend_file '$file_name' - read_file: $!" ) ;
 594                 goto &_error ;
 595         }
 596
 597 #print "EXIST [$$existing_data]\n" ;
 598
 599         $opts->{atomic} = 1 ;
 600         my $write_result =
 601                 eval { write_file( $file_name, $opts,
 602                        $prepend_data, $$existing_data ) ;
 603         } ;
 604
 605         if ( $@ ) {
 606
 607                 @_ = ( { err_mode => $err_mode },
 608                         "prepend_file '$file_name' - write_file: $!" ) ;
 609                 goto &_error ;
 610         }
 611
 612         return $write_result ;
 613 }
 614
 615 # edit a file as a scalar in $_
 616
 617 sub edit_file(&$;$) {
 618
 619         my( $edit_code, $file_name, $opts ) = @_ ;
 620         $opts = {} unless ref $opts eq 'HASH' ;
 621
 622 #       my $edit_code = shift ;
 623 #       my $file_name = shift ;
 624 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 625
 626 #print "FILE $file_name\n" ;
 627
 628 # delete unsupported options
 629
 630         my @bad_opts =
 631                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 632
 633         delete @{$opts}{@bad_opts} ;
 634
 635 # keep the user err_mode and force croaking on internal errors
 636
 637         my $err_mode = delete $opts->{err_mode} ;
 638         $opts->{ err_mode } = 'croak' ;
 639
 640 # get a scalar ref for speed and slurp the file into a scalar
 641
 642         $opts->{ scalar_ref } = 1 ;
 643         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 644
 645         if ( $@ ) {
 646
 647                 @_ = ( { err_mode => $err_mode },
 648                         "edit_file '$file_name' - read_file: $!" ) ;
 649                 goto &_error ;
 650         }
 651
 652 #print "EXIST [$$existing_data]\n" ;
 653
 654         my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
 655
 656         $opts->{atomic} = 1 ;
 657         my $write_result =
 658                 eval { write_file( $file_name, $opts, $edited_data ) } ;
 659
 660         if ( $@ ) {
 661
 662                 @_ = ( { err_mode => $err_mode },
 663                         "edit_file '$file_name' - write_file: $!" ) ;
 664                 goto &_error ;
 665         }
 666
 667         return $write_result ;
 668 }
 669
 670 sub edit_file_lines(&$;$) {
 671
 672         my( $edit_code, $file_name, $opts ) = @_ ;
 673         $opts = {} unless ref $opts eq 'HASH' ;
 674
 675 #       my $edit_code = shift ;
 676 #       my $file_name = shift ;
 677 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 678
 679 #print "FILE $file_name\n" ;
 680
 681 # delete unsupported options
 682
 683         my @bad_opts =
 684                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 685
 686         delete @{$opts}{@bad_opts} ;
 687
 688 # keep the user err_mode and force croaking on internal errors
 689
 690         my $err_mode = delete $opts->{err_mode} ;
 691         $opts->{ err_mode } = 'croak' ;
 692
 693 # get an array ref for speed and slurp the file into lines
 694
 695         $opts->{ array_ref } = 1 ;
 696         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 697
 698         if ( $@ ) {
 699
 700                 @_ = ( { err_mode => $err_mode },
 701                         "edit_file_lines '$file_name' - read_file: $!" ) ;
 702                 goto &_error ;
 703         }
 704
 705 #print "EXIST [$$existing_data]\n" ;
 706
 707         my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
 708
 709         $opts->{atomic} = 1 ;
 710         my $write_result =
 711                 eval { write_file( $file_name, $opts, @edited_data ) } ;
 712
 713         if ( $@ ) {
 714
 715                 @_ = ( { err_mode => $err_mode },
 716                         "edit_file_lines '$file_name' - write_file: $!" ) ;
 717                 goto &_error ;
 718         }
 719
 720         return $write_result ;
 721 }
 722
 723 # basic wrapper around opendir/readdir
 724
 725 sub read_dir {
 726
 727         my $dir = shift ;
 728         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
 729
 730 # this handle will be destroyed upon return
 731
 732         local(*DIRH);
 733
 734 # open the dir and handle any errors
 735
 736         unless ( opendir( DIRH, $dir ) ) {
 737
 738                 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
 739                 goto &_error ;
 740         }
 741
 742         my @dir_entries = readdir(DIRH) ;
 743
 744         @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
 745                 unless $opts->{'keep_dot_dot'} ;
 746
 747         return @dir_entries if wantarray ;
 748         return \@dir_entries ;
 749 }
 750
 751 # error handling section
 752 #
 753 # all the error handling uses magic goto so the caller will get the
 754 # error message as if from their code and not this module. if we just
 755 # did a call on the error code, the carp/croak would report it from
 756 # this module since the error sub is one level down on the call stack
 757 # from read_file/write_file/read_dir.
 758
 759
 760 my %err_func = (
 761         'carp'  => \&carp,
 762         'croak' => \&croak,
 763 ) ;
 764
 765 sub _error {
 766
 767         my( $opts, $err_msg ) = @_ ;
 768
 769 # get the error function to use
 770
 771         my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
 772
 773 # if we didn't find it in our error function hash, they must have set
 774 # it to quiet and we don't do anything.
 775
 776         return unless $func ;
 777
 778 # call the carp/croak function
 779
 780         $func->($err_msg) if $func ;
 781
 782 # return a hard undef (in list context this will be a single value of
 783 # undef which is not a legal in-band value)
 784
 785         return undef ;
 786 }
 787
 788 1;
 789 __END__
 790
 791 =head1 NAME
 792
 793 File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
 794
 795 =head1 SYNOPSIS
 796
 797   use File::Slurp;
 798
 799 # read in a whole file into a scalar
 800
 801   my $text = read_file( 'filename' ) ;
 802
 803 # read in a whole file into an array of lines
 804
 805   my @lines = read_file( 'filename' ) ;
 806
 807 # write out a whole file from a scalar
 808
 809   write_file( 'filename', $text ) ;
 810
 811 # write out a whole file from an array of lines
 812
 813   write_file( 'filename', @lines ) ;
 814
 815 # Here is a simple and fast way to load and save a simple config file
 816 # made of key=value lines.
 817
 818   my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
 819   write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
 820
 821 # insert text at the beginning of a file
 822
 823   prepend_file( 'filename', $text ) ;
 824
 825 # read in a whole directory of file names (skipping . and ..)
 826
 827   my @files = read_dir( '/path/to/dir' ) ;
 828
 829 =head1 DESCRIPTION
 830
 831 This module provides subs that allow you to read or write entire files
 832 with one simple call. They are designed to be simple to use, have
 833 flexible ways to pass in or get the file contents and to be very
 834 efficient.  There is also a sub to read in all the files in a
 835 directory other than C<.> and C<..>
 836
 837 These slurp/spew subs work for files, pipes and sockets, stdio,
 838 pseudo-files, and the DATA handle. Read more about why slurping files is
 839 a good thing in the file 'slurp_article.pod' in the extras/ directory.
 840
 841 If you are interested in how fast these calls work, check out the
 842 slurp_bench.pl program in the extras/ directory. It compares many
 843 different forms of slurping. You can select the I/O direction, context
 844 and file sizes. Use the --help option to see how to run it.
 845
 846 =head2 B<read_file>
 847
 848 This sub reads in an entire file and returns its contents to the
 849 caller.  In scalar context it returns the entire file as a single
 850 scalar. In list context it will return a list of lines (using the
 851 current value of $/ as the separator including support for paragraph
 852 mode when it is set to '').
 853
 854   my $text = read_file( 'filename' ) ;
 855   my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
 856   my @lines = read_file( 'filename' ) ;
 857   my $lines = read_file( 'filename', array_ref => 1 ) ;
 858
 859 The first argument is the file to slurp in. If the next argument is a
 860 hash reference, then it is used as the options. Otherwise the rest of
 861 the argument list are is used as key/value options.
 862
 863 If the file argument is a handle (if it is a ref and is an IO or GLOB
 864 object), then that handle is slurped in. This mode is supported so you
 865 slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
 866 an example that does C<open( '-|' )> and the child process spews data
 867 to the parant which slurps it in.  All of the options that control how
 868 the data is returned to the caller still work in this case.
 869
 870 If the first argument is an overloaded object then its stringified value
 871 is used for the filename and that file is opened.  This is a new feature
 872 in 9999.14. See the stringify.t test for an example.
 873
 874 By default C<read_file> returns an undef in scalar contex or a single
 875 undef in list context if it encounters an error. Those are both
 876 impossible to get with a clean read_file call which means you can check
 877 the return value and always know if you had an error. You can change how
 878 errors are handled with the C<err_mode> option.
 879
 880 Speed Note: If you call read_file and just get a scalar return value
 881 it is now optimized to handle shorter files. This is only used if no
 882 options are used, the file is shorter then 100k bytes, the filename is
 883 a plain scalar and a scalar file is returned. If you want the fastest
 884 slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
 885
 886 NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
 887 handle. It used to need a sysseek workaround but that is now handled
 888 when needed by the module itself.
 889
 890 You can optionally request that C<slurp()> is exported to your code. This
 891 is an alias for read_file and is meant to be forward compatible with
 892 Perl 6 (which will have slurp() built-in).
 893
 894 The options for C<read_file> are:
 895
 896 =head3 binmode
 897
 898 If you set the binmode option, then its value is passed to a call to
 899 binmode on the opened handle. You can use this to set the file to be
 900 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 901
 902         my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
 903         my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
 904
 905 =head3 array_ref
 906
 907 If this boolean option is set, the return value (only in scalar
 908 context) will be an array reference which contains the lines of the
 909 slurped file. The following two calls are equivalent:
 910
 911         my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
 912         my $lines_ref = [ read_file( $bin_file ) ] ;
 913
 914 =head3 scalar_ref
 915
 916 If this boolean option is set, the return value (only in scalar
 917 context) will be an scalar reference to a string which is the contents
 918 of the slurped file. This will usually be faster than returning the
 919 plain scalar. It will also save memory as it will not make a copy of
 920 the file to return. Run the extras/slurp_bench.pl script to see speed
 921 comparisons.
 922
 923         my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
 924
 925 =head3 buf_ref
 926
 927 You can use this option to pass in a scalar reference and the slurped
 928 file contents will be stored in the scalar. This can be used in
 929 conjunction with any of the other options. This saves an extra copy of
 930 the slurped file and can lower ram usage vs returning the file. It is
 931 usually the fastest way to read a file into a scalar. Run the
 932 extras/slurp_bench.pl script to see speed comparisons.
 933
 934
 935         read_file( $bin_file, buf_ref => \$buffer ) ;
 936
 937 =head3 blk_size
 938
 939 You can use this option to set the block size used when slurping from
 940 an already open handle (like \*STDIN). It defaults to 1MB.
 941
 942         my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
 943                                              array_ref => 1 ) ;
 944
 945 =head3 err_mode
 946
 947 You can use this option to control how read_file behaves when an error
 948 occurs. This option defaults to 'croak'. You can set it to 'carp' or to
 949 'quiet to have no special error handling. This code wants to carp and
 950 then read another file if it fails.
 951
 952         my $text_ref = read_file( $file, err_mode => 'carp' ) ;
 953         unless ( $text_ref ) {
 954
 955                 # read a different file but croak if not found
 956                 $text_ref = read_file( $another_file ) ;
 957         }
 958
 959         # process ${$text_ref}
 960
 961 =head2 B<write_file>
 962
 963 This sub writes out an entire file in one call.
 964
 965   write_file( 'filename', @data ) ;
 966
 967 The first argument to C<write_file> is the filename. The next argument
 968 is an optional hash reference and it contains key/values that can
 969 modify the behavior of C<write_file>. The rest of the argument list is
 970 the data to be written to the file.
 971
 972   write_file( 'filename', {append => 1 }, @data ) ;
 973   write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
 974
 975 As a shortcut if the first data argument is a scalar or array reference,
 976 it is used as the only data to be written to the file. Any following
 977 arguments in @_ are ignored. This is a faster way to pass in the output
 978 to be written to the file and is equivalent to the C<buf_ref> option of
 979 C<read_file>. These following pairs are equivalent but the pass by
 980 reference call will be faster in most cases (especially with larger
 981 files).
 982
 983   write_file( 'filename', \$buffer ) ;
 984   write_file( 'filename', $buffer ) ;
 985
 986   write_file( 'filename', \@lines ) ;
 987   write_file( 'filename', @lines ) ;
 988
 989 If the first argument is a handle (if it is a ref and is an IO or GLOB
 990 object), then that handle is written to. This mode is supported so you
 991 spew to handles such as \*STDOUT. See the test handle.t for an example
 992 that does C<open( '-|' )> and child process spews data to the parent
 993 which slurps it in.  All of the options that control how the data are
 994 passed into C<write_file> still work in this case.
 995
 996 If the first argument is an overloaded object then its stringified value
 997 is used for the filename and that file is opened.  This is new feature
 998 in 9999.14. See the stringify.t test for an example.
 999
1000 By default C<write_file> returns 1 upon successfully writing the file or
1001 undef if it encountered an error. You can change how errors are handled
1002 with the C<err_mode> option.
1003
1004 The options are:
1005
1006 =head3 binmode
1007
1008 If you set the binmode option, then its value is passed to a call to
1009 binmode on the opened handle. You can use this to set the file to be
1010 read in binary mode, utf8, etc. See perldoc -f binmode for more.
1011
1012         write_file( $bin_file, {binmode => ':raw'}, @data ) ;
1013         write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1014
1015 =head3 perms
1016
1017 The perms option sets the permissions of newly-created files. This value
1018 is modified by your process's umask and defaults to 0666 (same as
1019 sysopen).
1020
1021 NOTE: this option is new as of File::Slurp version 9999.14;
1022
1023 =head3 buf_ref
1024
1025 You can use this option to pass in a scalar reference which has the
1026 data to be written. If this is set then any data arguments (including
1027 the scalar reference shortcut) in @_ will be ignored. These are
1028 equivalent:
1029
1030         write_file( $bin_file, { buf_ref => \$buffer } ) ;
1031         write_file( $bin_file, \$buffer ) ;
1032         write_file( $bin_file, $buffer ) ;
1033
1034 =head3 atomic
1035
1036 If you set this boolean option, the file will be written to in an
1037 atomic fashion. A temporary file name is created by appending the pid
1038 ($$) to the file name argument and that file is spewed to. After the
1039 file is closed it is renamed to the original file name (and rename is
1040 an atomic operation on most OS's). If the program using this were to
1041 crash in the middle of this, then the file with the pid suffix could
1042 be left behind.
1043
1044 =head3 append
1045
1046 If you set this boolean option, the data will be written at the end of
1047 the current file. Internally this sets the sysopen mode flag O_APPEND.
1048
1049         write_file( $file, {append => 1}, @data ) ;
1050
1051  You
1052 can import append_file and it does the same thing.
1053
1054 =head3 no_clobber
1055
1056 If you set this boolean option, an existing file will not be overwritten.
1057
1058         write_file( $file, {no_clobber => 1}, @data ) ;
1059
1060 =head3 err_mode
1061
1062 You can use this option to control how C<write_file> behaves when an
1063 error occurs. This option defaults to 'croak'. You can set it to
1064 'carp' or to 'quiet' to have no error handling other than the return
1065 value. If the first call to C<write_file> fails it will carp and then
1066 write to another file. If the second call to C<write_file> fails, it
1067 will croak.
1068
1069         unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1070
1071                 # write a different file but croak if not found
1072                 write_file( $other_file, \$data ) ;
1073         }
1074
1075 =head2 overwrite_file
1076
1077 This sub is just a typeglob alias to write_file since write_file
1078 always overwrites an existing file. This sub is supported for
1079 backwards compatibility with the original version of this module. See
1080 write_file for its API and behavior.
1081
1082 =head2 append_file
1083
1084 This sub will write its data to the end of the file. It is a wrapper
1085 around write_file and it has the same API so see that for the full
1086 documentation. These calls are equivalent:
1087
1088         append_file( $file, @data ) ;
1089         write_file( $file, {append => 1}, @data ) ;
1090
1091
1092 =head2 prepend_file
1093
1094 This sub writes data to the beginning of a file. The previously existing
1095 data is written after that so the effect is prepending data in front of
1096 a file. It is a counterpart to the append_file sub in this module. It
1097 works by first using C<read_file> to slurp in the file and then calling
1098 C<write_file> with the new data and the existing file data.
1099
1100 The first argument to C<prepend_file> is the filename. The next argument
1101 is an optional hash reference and it contains key/values that can modify
1102 the behavior of C<prepend_file>. The rest of the argument list is the
1103 data to be written to the file and that is passed to C<write_file> as is
1104 (see that for allowed data).
1105
1106 Only the C<binmode> and C<err_mode> options are supported. The
1107 C<write_file> call has the C<atomic> option set so you will always have
1108 a consistant file. See above for more about those options.
1109
1110 C<prepend_file> is not exported by default, you need to import it
1111 explicitly.
1112
1113         use File::Slurp qw( prepend_file ) ;
1114         prepend_file( $file, $header ) ;
1115         prepend_file( $file, \@lines ) ;
1116         prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1117
1118 =head2 read_dir
1119
1120 This sub reads all the file names from directory and returns them to
1121 the caller but C<.> and C<..> are removed by default.
1122
1123         my @files = read_dir( '/path/to/dir' ) ;
1124
1125 The first argument is the path to the directory to read.  If the next
1126 argument is a hash reference, then it is used as the options.
1127 Otherwise the rest of the argument list are is used as key/value
1128 options.
1129
1130 In list context C<read_dir> returns a list of the entries in the
1131 directory. In a scalar context it returns an array reference which has
1132 the entries.
1133
1134 =head3 err_mode
1135
1136 If the C<err_mode> option is set, it selects how errors are handled (see
1137 C<err_mode> in C<read_file> or C<write_file>).
1138
1139 =head3 keep_dot_dot
1140
1141 If this boolean option is set, C<.> and C<..> are not removed from the
1142 list of files.
1143
1144         my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1145
1146 =head2 EXPORT
1147
1148   read_file write_file overwrite_file append_file read_dir
1149
1150 =head2 LICENSE
1151
1152   Same as Perl.
1153
1154 =head2 SEE ALSO
1155
1156 An article on file slurping in extras/slurp_article.pod. There is
1157 also a benchmarking script in extras/slurp_bench.pl.
1158
1159 =head2 BUGS
1160
1161 If run under Perl 5.004, slurping from the DATA handle will fail as
1162 that requires B.pm which didn't get into core until 5.005.
1163
1164 =head1 AUTHOR
1165
1166 Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
1167
1168 =cut