lib/File/Slurp.pm

   1 package File::Slurp;
   2
   3 use 5.6.2 ;
   4
   5 use strict;
   6
   7 use Carp ;
   8 use Exporter ;
   9 use Fcntl qw( :DEFAULT ) ;
  10 use POSIX qw( :fcntl_h ) ;
  11 use Symbol ;
  12
  13 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
  14 @ISA = qw( Exporter ) ;
  15
  16 $VERSION = '9999.17';
  17
  18 @EXPORT_OK = qw( slurp prepend_file ) ;
  19 %EXPORT_TAGS = ( 'all' => [
  20         qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
  21 @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
  22
  23 my $max_fast_slurp_size = 1024 * 100 ;
  24
  25 my $is_win32 = $^O =~ /win32/i ;
  26
  27 # Install subs for various constants that aren't set in older perls
  28 # (< 5.005).  Fcntl on old perls uses Exporter to define subs without a
  29 # () prototype These can't be overridden with the constant pragma or
  30 # we get a prototype mismatch.  Hence this less than aesthetically
  31 # appealing BEGIN block:
  32
  33 BEGIN {
  34         unless( defined &SEEK_SET ) {
  35                 *SEEK_SET = sub { 0 };
  36                 *SEEK_CUR = sub { 1 };
  37                 *SEEK_END = sub { 2 };
  38         }
  39
  40         unless( defined &O_BINARY ) {
  41                 *O_BINARY = sub { 0 };
  42                 *O_RDONLY = sub { 0 };
  43                 *O_WRONLY = sub { 1 };
  44         }
  45
  46         unless ( defined &O_APPEND ) {
  47
  48                 if ( $^O =~ /olaris/ ) {
  49                         *O_APPEND = sub { 8 };
  50                         *O_CREAT = sub { 256 };
  51                         *O_EXCL = sub { 1024 };
  52                 }
  53                 elsif ( $^O =~ /inux/ ) {
  54                         *O_APPEND = sub { 1024 };
  55                         *O_CREAT = sub { 64 };
  56                         *O_EXCL = sub { 128 };
  57                 }
  58                 elsif ( $^O =~ /BSD/i ) {
  59                         *O_APPEND = sub { 8 };
  60                         *O_CREAT = sub { 512 };
  61                         *O_EXCL = sub { 2048 };
  62                 }
  63         }
  64 }
  65
  66 # print "OS [$^O]\n" ;
  67
  68 # print "O_BINARY = ", O_BINARY(), "\n" ;
  69 # print "O_RDONLY = ", O_RDONLY(), "\n" ;
  70 # print "O_WRONLY = ", O_WRONLY(), "\n" ;
  71 # print "O_APPEND = ", O_APPEND(), "\n" ;
  72 # print "O_CREAT   ", O_CREAT(), "\n" ;
  73 # print "O_EXCL   ", O_EXCL(), "\n" ;
  74
  75
  76 *slurp = \&read_file ;
  77
  78 sub read_file {
  79
  80         my $file_name = shift ;
  81         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
  82
  83 # this is the optimized read_file for shorter files.
  84 # the test for -s > 0 is to allow pseudo files to be read with the
  85 # regular loop since they return a size of 0.
  86
  87         if ( !ref $file_name && -e $file_name && -s _ > 0 &&
  88              -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
  89
  90
  91                 my $fh ;
  92                 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
  93
  94                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
  95                         goto &_error ;
  96                 }
  97
  98                 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
  99
 100                 unless ( defined $read_cnt ) {
 101
 102                         @_ = ( $opts,
 103                                 "read_file '$file_name' - small sysread: $!");
 104                         goto &_error ;
 105                 }
 106
 107                 $buf =~ s/\015\012/\n/g if $is_win32 ;
 108                 return $buf ;
 109         }
 110
 111 # set the buffer to either the passed in one or ours and init it to the null
 112 # string
 113
 114         my $buf ;
 115         my $buf_ref = $opts->{'buf_ref'} || \$buf ;
 116         ${$buf_ref} = '' ;
 117
 118         my( $read_fh, $size_left, $blk_size ) ;
 119
 120 # deal with ref for a file name
 121 # it could be an open handle or an overloaded object
 122
 123         if ( ref $file_name ) {
 124
 125                 my $ref_result = _check_ref( $file_name ) ;
 126
 127                 if ( ref $ref_result ) {
 128
 129 # we got an error, deal with it
 130
 131                         @_ = ( $opts, $ref_result ) ;
 132                         goto &_error ;
 133                 }
 134
 135                 if ( $ref_result ) {
 136
 137 # we got an overloaded object and the result is the stringified value
 138 # use it as the file name
 139
 140                         $file_name = $ref_result ;
 141                 }
 142                 else {
 143
 144 # here we have just an open handle. set $read_fh so we don't do a sysopen
 145
 146                         $read_fh = $file_name ;
 147                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 148                         $size_left = $blk_size ;
 149                 }
 150         }
 151
 152 # see if we have a path we need to open
 153
 154         unless ( $read_fh ) {
 155
 156 # a regular file. set the sysopen mode
 157
 158                 my $mode = O_RDONLY ;
 159
 160 #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
 161
 162                 $read_fh = gensym ;
 163                 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
 164                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 165                         goto &_error ;
 166                 }
 167
 168                 if ( my $binmode = $opts->{'binmode'} ) {
 169                         binmode( $read_fh, $binmode ) ;
 170                 }
 171
 172 # get the size of the file for use in the read loop
 173
 174                 $size_left = -s $read_fh ;
 175
 176 #print "SIZE $size_left\n" ;
 177
 178 # we need a blk_size if the size is 0 so we can handle pseudofiles like in
 179 # /proc. these show as 0 size but have data to be slurped.
 180
 181                 unless( $size_left ) {
 182
 183                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 184                         $size_left = $blk_size ;
 185                 }
 186         }
 187
 188
 189 #       if ( $size_left < 10000 && keys %{$opts} == 0 && !wantarray ) {
 190
 191 #               my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ;
 192
 193 #               unless ( defined $read_cnt ) {
 194
 195 #                       @_ = ( $opts, "read_file '$file_name' - small2 sysread: $!");
 196 #                       goto &_error ;
 197 #               }
 198
 199 #               return $buf ;
 200 #       }
 201
 202 # infinite read loop. we exit when we are done slurping
 203
 204         while( 1 ) {
 205
 206 # do the read and see how much we got
 207
 208                 my $read_cnt = sysread( $read_fh, ${$buf_ref},
 209                                 $size_left, length ${$buf_ref} ) ;
 210
 211                 unless ( defined $read_cnt ) {
 212
 213                         @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
 214                         goto &_error ;
 215                 }
 216
 217 # good read. see if we hit EOF (nothing left to read)
 218
 219                 last if $read_cnt == 0 ;
 220
 221 # loop if we are slurping a handle. we don't track $size_left then.
 222
 223                 next if $blk_size ;
 224
 225 # count down how much we read and loop if we have more to read.
 226
 227                 $size_left -= $read_cnt ;
 228                 last if $size_left <= 0 ;
 229         }
 230
 231 # fix up cr/lf to be a newline if this is a windows text file
 232
 233         ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
 234
 235         my $sep = $/ ;
 236         $sep = '\n\n+' if defined $sep && $sep eq '' ;
 237
 238 # see if caller wants lines
 239
 240         if( wantarray || $opts->{'array_ref'} ) {
 241
 242                 use re 'taint' ;
 243
 244                 my @lines = length(${$buf_ref}) ?
 245                         ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
 246
 247 # caller wants an array ref
 248
 249                 return \@lines if $opts->{'array_ref'} ;
 250
 251 # caller wants list of lines
 252
 253                 return @lines ;
 254         }
 255
 256 # caller wants a scalar ref to the slurped text
 257
 258         return $buf_ref if $opts->{'scalar_ref'} ;
 259
 260 # caller wants a scalar with the slurped text (normal scalar context)
 261
 262         return ${$buf_ref} if defined wantarray ;
 263
 264 # caller passed in an i/o buffer by reference (normal void context)
 265
 266         return ;
 267 }
 268
 269 # errors in this sub are returned as scalar refs
 270 # a normal IO/GLOB handle is an empty return
 271 # an overloaded object returns its stringified as a scalarfilename
 272
 273 sub _check_ref {
 274
 275         my( $handle ) = @_ ;
 276
 277 # check if we are reading from a handle (GLOB or IO object)
 278
 279         if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
 280
 281 # we have a handle. deal with seeking to it if it is DATA
 282
 283                 my $err = _seek_data_handle( $handle ) ;
 284
 285 # return the error string if any
 286
 287                 return \$err if $err ;
 288
 289 # we have good handle
 290                 return ;
 291         }
 292
 293         eval { require overload } ;
 294
 295 # return an error if we can't load the overload pragma
 296 # or if the object isn't overloaded
 297
 298         return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
 299                  if $@ || !overload::Overloaded( $handle ) ;
 300
 301 # must be overloaded so return its stringified value
 302
 303         return "$handle" ;
 304 }
 305
 306 sub _seek_data_handle {
 307
 308         my( $handle ) = @_ ;
 309
 310 # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
 311 # glob/handle. only the DATA handle is untainted (since it is from
 312 # trusted data in the source file). this allows us to test if this is
 313 # the DATA handle and then to do a sysseek to make sure it gets
 314 # slurped correctly. on some systems, the buffered i/o pointer is not
 315 # left at the same place as the fd pointer. this sysseek makes them
 316 # the same so slurping with sysread will work.
 317
 318         eval{ require B } ;
 319
 320         if ( $@ ) {
 321
 322                 return <<ERR ;
 323 Can't find B.pm with this Perl: $!.
 324 That module is needed to properly slurp the DATA handle.
 325 ERR
 326         }
 327
 328         if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
 329
 330 # set the seek position to the current tell.
 331
 332                 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
 333                         return "read_file '$handle' - sysseek: $!" ;
 334                 }
 335         }
 336
 337 # seek was successful, return no error string
 338
 339         return ;
 340 }
 341
 342
 343 sub write_file {
 344
 345         my $file_name = shift ;
 346
 347 # get the optional argument hash ref from @_ or an empty hash ref.
 348
 349         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 350
 351         my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
 352
 353 # get the buffer ref - it depends on how the data is passed into write_file
 354 # after this if/else $buf_ref will have a scalar ref to the data.
 355
 356         if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
 357
 358 # a scalar ref passed in %opts has the data
 359 # note that the data was passed by ref
 360
 361                 $buf_ref = $opts->{'buf_ref'} ;
 362                 $data_is_ref = 1 ;
 363         }
 364         elsif ( ref $_[0] eq 'SCALAR' ) {
 365
 366 # the first value in @_ is the scalar ref to the data
 367 # note that the data was passed by ref
 368
 369                 $buf_ref = shift ;
 370                 $data_is_ref = 1 ;
 371         }
 372         elsif ( ref $_[0] eq 'ARRAY' ) {
 373
 374 # the first value in @_ is the array ref to the data so join it.
 375
 376                 ${$buf_ref} = join '', @{$_[0]} ;
 377         }
 378         else {
 379
 380 # good old @_ has all the data so join it.
 381
 382                 ${$buf_ref} = join '', @_ ;
 383         }
 384
 385 # deal with ref for a file name
 386
 387         if ( ref $file_name ) {
 388
 389                 my $ref_result = _check_ref( $file_name ) ;
 390
 391                 if ( ref $ref_result ) {
 392
 393 # we got an error, deal with it
 394
 395                         @_ = ( $opts, $ref_result ) ;
 396                         goto &_error ;
 397                 }
 398
 399                 if ( $ref_result ) {
 400
 401 # we got an overloaded object and the result is the stringified value
 402 # use it as the file name
 403
 404                         $file_name = $ref_result ;
 405                 }
 406                 else {
 407
 408 # we now have a proper handle ref.
 409 # make sure we don't call truncate on it.
 410
 411                         $write_fh = $file_name ;
 412                         $no_truncate = 1 ;
 413                 }
 414         }
 415
 416 # see if we have a path we need to open
 417
 418         unless( $write_fh ) {
 419
 420 # spew to regular file.
 421
 422                 if ( $opts->{'atomic'} ) {
 423
 424 # in atomic mode, we spew to a temp file so make one and save the original
 425 # file name.
 426                         $orig_file_name = $file_name ;
 427                         $file_name .= ".$$" ;
 428                 }
 429
 430 # set the mode for the sysopen
 431
 432                 my $mode = O_WRONLY | O_CREAT ;
 433                 $mode |= O_APPEND if $opts->{'append'} ;
 434                 $mode |= O_EXCL if $opts->{'no_clobber'} ;
 435
 436                 my $perms = $opts->{perms} ;
 437                 $perms = 0666 unless defined $perms ;
 438
 439 #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
 440
 441 # open the file and handle any error.
 442
 443                 $write_fh = gensym ;
 444                 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
 445
 446                         @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
 447                         goto &_error ;
 448                 }
 449         }
 450
 451         if ( my $binmode = $opts->{'binmode'} ) {
 452                 binmode( $write_fh, $binmode ) ;
 453         }
 454
 455         sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
 456
 457 #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 458
 459 # fix up newline to write cr/lf if this is a windows text file
 460
 461         if ( $is_win32 && !$opts->{'binmode'} ) {
 462
 463 # copy the write data if it was passed by ref so we don't clobber the
 464 # caller's data
 465                 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
 466                 ${$buf_ref} =~ s/\n/\015\012/g ;
 467         }
 468
 469 #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 470
 471 # get the size of how much we are writing and init the offset into that buffer
 472
 473         my $size_left = length( ${$buf_ref} ) ;
 474         my $offset = 0 ;
 475
 476 # loop until we have no more data left to write
 477
 478         do {
 479
 480 # do the write and track how much we just wrote
 481
 482                 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
 483                                 $size_left, $offset ) ;
 484
 485                 unless ( defined $write_cnt ) {
 486
 487                         @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
 488                         goto &_error ;
 489                 }
 490
 491 # track how much left to write and where to write from in the buffer
 492
 493                 $size_left -= $write_cnt ;
 494                 $offset += $write_cnt ;
 495
 496         } while( $size_left > 0 ) ;
 497
 498 # we truncate regular files in case we overwrite a long file with a shorter file
 499 # so seek to the current position to get it (same as tell()).
 500
 501         truncate( $write_fh,
 502                   sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
 503
 504         close( $write_fh ) ;
 505
 506 # handle the atomic mode - move the temp file to the original filename.
 507
 508         if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
 509
 510                 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
 511                 goto &_error ;
 512         }
 513
 514         return 1 ;
 515 }
 516
 517 # this is for backwards compatibility with the previous File::Slurp module.
 518 # write_file always overwrites an existing file
 519
 520 *overwrite_file = \&write_file ;
 521
 522 # the current write_file has an append mode so we use that. this
 523 # supports the same API with an optional second argument which is a
 524 # hash ref of options.
 525
 526 sub append_file {
 527
 528 # get the optional opts hash ref
 529         my $opts = $_[1] ;
 530         if ( ref $opts eq 'HASH' ) {
 531
 532 # we were passed an opts ref so just mark the append mode
 533
 534                 $opts->{append} = 1 ;
 535         }
 536         else {
 537
 538 # no opts hash so insert one with the append mode
 539
 540                 splice( @_, 1, 0, { append => 1 } ) ;
 541         }
 542
 543 # magic goto the main write_file sub. this overlays the sub without touching
 544 # the stack or @_
 545
 546         goto &write_file
 547 }
 548
 549 # prepend data to the beginning of a file
 550
 551 sub prepend_file {
 552
 553         my $file_name = shift ;
 554
 555 #print "FILE $file_name\n" ;
 556
 557         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 558
 559 # delete unsupported options
 560
 561         my @bad_opts =
 562                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 563
 564         delete @{$opts}{@bad_opts} ;
 565
 566         my $prepend_data = shift ;
 567         $prepend_data = '' unless defined $prepend_data ;
 568         $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
 569
 570 #print "PRE [$prepend_data]\n" ;
 571
 572         my $err_mode = delete $opts->{err_mode} ;
 573         $opts->{ err_mode } = 'croak' ;
 574         $opts->{ scalar_ref } = 1 ;
 575
 576         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 577
 578         if ( $@ ) {
 579
 580                 @_ = ( { err_mode => $err_mode },
 581                         "prepend_file '$file_name' - read_file: $!" ) ;
 582                 goto &_error ;
 583         }
 584
 585 #print "EXIST [$$existing_data]\n" ;
 586
 587         $opts->{atomic} = 1 ;
 588         my $write_result =
 589                 eval { write_file( $file_name, $opts,
 590                        $prepend_data, $$existing_data ) ;
 591         } ;
 592
 593         if ( $@ ) {
 594
 595                 @_ = ( { err_mode => $err_mode },
 596                         "prepend_file '$file_name' - write_file: $!" ) ;
 597                 goto &_error ;
 598         }
 599
 600         return $write_result ;
 601 }
 602
 603 # edit a file as a scalar in $_
 604
 605 sub edit_file(&$;$) {
 606
 607         my( $edit_code, $file_name, $opts ) = @_ ;
 608         $opts = {} unless ref $opts eq 'HASH' ;
 609
 610 #       my $edit_code = shift ;
 611 #       my $file_name = shift ;
 612 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 613
 614 #print "FILE $file_name\n" ;
 615
 616 # delete unsupported options
 617
 618         my @bad_opts =
 619                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 620
 621         delete @{$opts}{@bad_opts} ;
 622
 623 # keep the user err_mode and force croaking on internal errors
 624
 625         my $err_mode = delete $opts->{err_mode} ;
 626         $opts->{ err_mode } = 'croak' ;
 627
 628 # get a scalar ref for speed and slurp the file into a scalar
 629
 630         $opts->{ scalar_ref } = 1 ;
 631         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 632
 633         if ( $@ ) {
 634
 635                 @_ = ( { err_mode => $err_mode },
 636                         "edit_file '$file_name' - read_file: $!" ) ;
 637                 goto &_error ;
 638         }
 639
 640 #print "EXIST [$$existing_data]\n" ;
 641
 642         my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
 643
 644         $opts->{atomic} = 1 ;
 645         my $write_result =
 646                 eval { write_file( $file_name, $opts, $edited_data ) } ;
 647
 648         if ( $@ ) {
 649
 650                 @_ = ( { err_mode => $err_mode },
 651                         "edit_file '$file_name' - write_file: $!" ) ;
 652                 goto &_error ;
 653         }
 654
 655         return $write_result ;
 656 }
 657
 658 sub edit_file_lines(&$;$) {
 659
 660         my( $edit_code, $file_name, $opts ) = @_ ;
 661         $opts = {} unless ref $opts eq 'HASH' ;
 662
 663 #       my $edit_code = shift ;
 664 #       my $file_name = shift ;
 665 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 666
 667 #print "FILE $file_name\n" ;
 668
 669 # delete unsupported options
 670
 671         my @bad_opts =
 672                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 673
 674         delete @{$opts}{@bad_opts} ;
 675
 676 # keep the user err_mode and force croaking on internal errors
 677
 678         my $err_mode = delete $opts->{err_mode} ;
 679         $opts->{ err_mode } = 'croak' ;
 680
 681 # get an array ref for speed and slurp the file into lines
 682
 683         $opts->{ array_ref } = 1 ;
 684         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 685
 686         if ( $@ ) {
 687
 688                 @_ = ( { err_mode => $err_mode },
 689                         "edit_file_lines '$file_name' - read_file: $!" ) ;
 690                 goto &_error ;
 691         }
 692
 693 #print "EXIST [$$existing_data]\n" ;
 694
 695         my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
 696
 697         $opts->{atomic} = 1 ;
 698         my $write_result =
 699                 eval { write_file( $file_name, $opts, @edited_data ) } ;
 700
 701         if ( $@ ) {
 702
 703                 @_ = ( { err_mode => $err_mode },
 704                         "edit_file_lines '$file_name' - write_file: $!" ) ;
 705                 goto &_error ;
 706         }
 707
 708         return $write_result ;
 709 }
 710
 711 # basic wrapper around opendir/readdir
 712
 713 sub read_dir {
 714
 715         my $dir = shift ;
 716         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
 717
 718 # this handle will be destroyed upon return
 719
 720         local(*DIRH);
 721
 722 # open the dir and handle any errors
 723
 724         unless ( opendir( DIRH, $dir ) ) {
 725
 726                 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
 727                 goto &_error ;
 728         }
 729
 730         my @dir_entries = readdir(DIRH) ;
 731
 732         @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
 733                 unless $opts->{'keep_dot_dot'} ;
 734
 735         return @dir_entries if wantarray ;
 736         return \@dir_entries ;
 737 }
 738
 739 # error handling section
 740 #
 741 # all the error handling uses magic goto so the caller will get the
 742 # error message as if from their code and not this module. if we just
 743 # did a call on the error code, the carp/croak would report it from
 744 # this module since the error sub is one level down on the call stack
 745 # from read_file/write_file/read_dir.
 746
 747
 748 my %err_func = (
 749         'carp'  => \&carp,
 750         'croak' => \&croak,
 751 ) ;
 752
 753 sub _error {
 754
 755         my( $opts, $err_msg ) = @_ ;
 756
 757 # get the error function to use
 758
 759         my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
 760
 761 # if we didn't find it in our error function hash, they must have set
 762 # it to quiet and we don't do anything.
 763
 764         return unless $func ;
 765
 766 # call the carp/croak function
 767
 768         $func->($err_msg) if $func ;
 769
 770 # return a hard undef (in list context this will be a single value of
 771 # undef which is not a legal in-band value)
 772
 773         return undef ;
 774 }
 775
 776 1;
 777 __END__
 778
 779 =head1 NAME
 780
 781 File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
 782
 783 =head1 SYNOPSIS
 784
 785   use File::Slurp;
 786
 787 # read in a whole file into a scalar
 788
 789   my $text = read_file( 'filename' ) ;
 790
 791 # read in a whole file into an array of lines
 792
 793   my @lines = read_file( 'filename' ) ;
 794
 795 # write out a whole file from a scalar
 796
 797   write_file( 'filename', $text ) ;
 798
 799 # write out a whole file from an array of lines
 800
 801   write_file( 'filename', @lines ) ;
 802
 803 # Here is a simple and fast way to load and save a simple config file
 804 # made of key=value lines.
 805
 806   my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
 807   write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
 808
 809 # insert text at the beginning of a file
 810
 811   prepend_file( 'filename', $text ) ;
 812
 813 # read in a whole directory of file names (skipping . and ..)
 814
 815   my @files = read_dir( '/path/to/dir' ) ;
 816
 817 =head1 DESCRIPTION
 818
 819 This module provides subs that allow you to read or write entire files
 820 with one simple call. They are designed to be simple to use, have
 821 flexible ways to pass in or get the file contents and to be very
 822 efficient.  There is also a sub to read in all the files in a
 823 directory other than C<.> and C<..>
 824
 825 These slurp/spew subs work for files, pipes and sockets, stdio,
 826 pseudo-files, and the DATA handle. Read more about why slurping files is
 827 a good thing in the file 'slurp_article.pod' in the extras/ directory.
 828
 829 If you are interested in how fast these calls work, check out the
 830 slurp_bench.pl program in the extras/ directory. It compares many
 831 different forms of slurping. You can select the I/O direction, context
 832 and file sizes. Use the --help option to see how to run it.
 833
 834 =head2 B<read_file>
 835
 836 This sub reads in an entire file and returns its contents to the
 837 caller.  In scalar context it returns the entire file as a single
 838 scalar. In list context it will return a list of lines (using the
 839 current value of $/ as the separator including support for paragraph
 840 mode when it is set to '').
 841
 842   my $text = read_file( 'filename' ) ;
 843   my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
 844   my @lines = read_file( 'filename' ) ;
 845   my $lines = read_file( 'filename', array_ref => 1 ) ;
 846
 847 The first argument is the file to slurp in. If the next argument is a
 848 hash reference, then it is used as the options. Otherwise the rest of
 849 the argument list are is used as key/value options.
 850
 851 If the file argument is a handle (if it is a ref and is an IO or GLOB
 852 object), then that handle is slurped in. This mode is supported so you
 853 slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
 854 an example that does C<open( '-|' )> and the child process spews data
 855 to the parant which slurps it in.  All of the options that control how
 856 the data is returned to the caller still work in this case.
 857
 858 If the first argument is an overloaded object then its stringified value
 859 is used for the filename and that file is opened.  This is a new feature
 860 in 9999.14. See the stringify.t test for an example.
 861
 862 By default C<read_file> returns an undef in scalar contex or a single
 863 undef in list context if it encounters an error. Those are both
 864 impossible to get with a clean read_file call which means you can check
 865 the return value and always know if you had an error. You can change how
 866 errors are handled with the C<err_mode> option.
 867
 868 Speed Note: If you call read_file and just get a scalar return value
 869 it is now optimized to handle shorter files. This is only used if no
 870 options are used, the file is shorter then 100k bytes, the filename is
 871 a plain scalar and a scalar file is returned. If you want the fastest
 872 slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
 873
 874 NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
 875 handle. It used to need a sysseek workaround but that is now handled
 876 when needed by the module itself.
 877
 878 You can optionally request that C<slurp()> is exported to your code. This
 879 is an alias for read_file and is meant to be forward compatible with
 880 Perl 6 (which will have slurp() built-in).
 881
 882 The options for C<read_file> are:
 883
 884 =head3 binmode
 885
 886 If you set the binmode option, then its value is passed to a call to
 887 binmode on the opened handle. You can use this to set the file to be
 888 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 889
 890         my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
 891         my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
 892
 893 =head3 array_ref
 894
 895 If this boolean option is set, the return value (only in scalar
 896 context) will be an array reference which contains the lines of the
 897 slurped file. The following two calls are equivalent:
 898
 899         my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
 900         my $lines_ref = [ read_file( $bin_file ) ] ;
 901
 902 =head3 scalar_ref
 903
 904 If this boolean option is set, the return value (only in scalar
 905 context) will be an scalar reference to a string which is the contents
 906 of the slurped file. This will usually be faster than returning the
 907 plain scalar. It will also save memory as it will not make a copy of
 908 the file to return. Run the extras/slurp_bench.pl script to see speed
 909 comparisons.
 910
 911         my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
 912
 913 =head3 buf_ref
 914
 915 You can use this option to pass in a scalar reference and the slurped
 916 file contents will be stored in the scalar. This can be used in
 917 conjunction with any of the other options. This saves an extra copy of
 918 the slurped file and can lower ram usage vs returning the file. It is
 919 usually the fastest way to read a file into a scalar. Run the
 920 extras/slurp_bench.pl script to see speed comparisons.
 921
 922
 923         read_file( $bin_file, buf_ref => \$buffer ) ;
 924
 925 =head3 blk_size
 926
 927 You can use this option to set the block size used when slurping from
 928 an already open handle (like \*STDIN). It defaults to 1MB.
 929
 930         my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
 931                                              array_ref => 1 ) ;
 932
 933 =head3 err_mode
 934
 935 You can use this option to control how read_file behaves when an error
 936 occurs. This option defaults to 'croak'. You can set it to 'carp' or to
 937 'quiet to have no special error handling. This code wants to carp and
 938 then read another file if it fails.
 939
 940         my $text_ref = read_file( $file, err_mode => 'carp' ) ;
 941         unless ( $text_ref ) {
 942
 943                 # read a different file but croak if not found
 944                 $text_ref = read_file( $another_file ) ;
 945         }
 946
 947         # process ${$text_ref}
 948
 949 =head2 B<write_file>
 950
 951 This sub writes out an entire file in one call.
 952
 953   write_file( 'filename', @data ) ;
 954
 955 The first argument to C<write_file> is the filename. The next argument
 956 is an optional hash reference and it contains key/values that can
 957 modify the behavior of C<write_file>. The rest of the argument list is
 958 the data to be written to the file.
 959
 960   write_file( 'filename', {append => 1 }, @data ) ;
 961   write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
 962
 963 As a shortcut if the first data argument is a scalar or array reference,
 964 it is used as the only data to be written to the file. Any following
 965 arguments in @_ are ignored. This is a faster way to pass in the output
 966 to be written to the file and is equivalent to the C<buf_ref> option of
 967 C<read_file>. These following pairs are equivalent but the pass by
 968 reference call will be faster in most cases (especially with larger
 969 files).
 970
 971   write_file( 'filename', \$buffer ) ;
 972   write_file( 'filename', $buffer ) ;
 973
 974   write_file( 'filename', \@lines ) ;
 975   write_file( 'filename', @lines ) ;
 976
 977 If the first argument is a handle (if it is a ref and is an IO or GLOB
 978 object), then that handle is written to. This mode is supported so you
 979 spew to handles such as \*STDOUT. See the test handle.t for an example
 980 that does C<open( '-|' )> and child process spews data to the parent
 981 which slurps it in.  All of the options that control how the data are
 982 passed into C<write_file> still work in this case.
 983
 984 If the first argument is an overloaded object then its stringified value
 985 is used for the filename and that file is opened.  This is new feature
 986 in 9999.14. See the stringify.t test for an example.
 987
 988 By default C<write_file> returns 1 upon successfully writing the file or
 989 undef if it encountered an error. You can change how errors are handled
 990 with the C<err_mode> option.
 991
 992 The options are:
 993
 994 =head3 binmode
 995
 996 If you set the binmode option, then its value is passed to a call to
 997 binmode on the opened handle. You can use this to set the file to be
 998 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 999
1000         write_file( $bin_file, {binmode => ':raw'}, @data ) ;
1001         write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1002
1003 =head3 perms
1004
1005 The perms option sets the permissions of newly-created files. This value
1006 is modified by your process's umask and defaults to 0666 (same as
1007 sysopen).
1008
1009 NOTE: this option is new as of File::Slurp version 9999.14;
1010
1011 =head3 buf_ref
1012
1013 You can use this option to pass in a scalar reference which has the
1014 data to be written. If this is set then any data arguments (including
1015 the scalar reference shortcut) in @_ will be ignored. These are
1016 equivalent:
1017
1018         write_file( $bin_file, { buf_ref => \$buffer } ) ;
1019         write_file( $bin_file, \$buffer ) ;
1020         write_file( $bin_file, $buffer ) ;
1021
1022 =head3 atomic
1023
1024 If you set this boolean option, the file will be written to in an
1025 atomic fashion. A temporary file name is created by appending the pid
1026 ($$) to the file name argument and that file is spewed to. After the
1027 file is closed it is renamed to the original file name (and rename is
1028 an atomic operation on most OS's). If the program using this were to
1029 crash in the middle of this, then the file with the pid suffix could
1030 be left behind.
1031
1032 =head3 append
1033
1034 If you set this boolean option, the data will be written at the end of
1035 the current file. Internally this sets the sysopen mode flag O_APPEND.
1036
1037         write_file( $file, {append => 1}, @data ) ;
1038
1039  You
1040 can import append_file and it does the same thing.
1041
1042 =head3 no_clobber
1043
1044 If you set this boolean option, an existing file will not be overwritten.
1045
1046         write_file( $file, {no_clobber => 1}, @data ) ;
1047
1048 =head3 err_mode
1049
1050 You can use this option to control how C<write_file> behaves when an
1051 error occurs. This option defaults to 'croak'. You can set it to
1052 'carp' or to 'quiet' to have no error handling other than the return
1053 value. If the first call to C<write_file> fails it will carp and then
1054 write to another file. If the second call to C<write_file> fails, it
1055 will croak.
1056
1057         unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1058
1059                 # write a different file but croak if not found
1060                 write_file( $other_file, \$data ) ;
1061         }
1062
1063 =head2 overwrite_file
1064
1065 This sub is just a typeglob alias to write_file since write_file
1066 always overwrites an existing file. This sub is supported for
1067 backwards compatibility with the original version of this module. See
1068 write_file for its API and behavior.
1069
1070 =head2 append_file
1071
1072 This sub will write its data to the end of the file. It is a wrapper
1073 around write_file and it has the same API so see that for the full
1074 documentation. These calls are equivalent:
1075
1076         append_file( $file, @data ) ;
1077         write_file( $file, {append => 1}, @data ) ;
1078
1079
1080 =head2 prepend_file
1081
1082 This sub writes data to the beginning of a file. The previously existing
1083 data is written after that so the effect is prepending data in front of
1084 a file. It is a counterpart to the append_file sub in this module. It
1085 works by first using C<read_file> to slurp in the file and then calling
1086 C<write_file> with the new data and the existing file data.
1087
1088 The first argument to C<prepend_file> is the filename. The next argument
1089 is an optional hash reference and it contains key/values that can modify
1090 the behavior of C<prepend_file>. The rest of the argument list is the
1091 data to be written to the file and that is passed to C<write_file> as is
1092 (see that for allowed data).
1093
1094 Only the C<binmode> and C<err_mode> options are supported. The
1095 C<write_file> call has the C<atomic> option set so you will always have
1096 a consistant file. See above for more about those options.
1097
1098 C<prepend_file> is not exported by default, you need to import it
1099 explicitly.
1100
1101         use File::Slurp qw( prepend_file ) ;
1102         prepend_file( $file, $header ) ;
1103         prepend_file( $file, \@lines ) ;
1104         prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1105
1106 =head2 read_dir
1107
1108 This sub reads all the file names from directory and returns them to
1109 the caller but C<.> and C<..> are removed by default.
1110
1111         my @files = read_dir( '/path/to/dir' ) ;
1112
1113 The first argument is the path to the directory to read.  If the next
1114 argument is a hash reference, then it is used as the options.
1115 Otherwise the rest of the argument list are is used as key/value
1116 options.
1117
1118 In list context C<read_dir> returns a list of the entries in the
1119 directory. In a scalar context it returns an array reference which has
1120 the entries.
1121
1122 =head3 err_mode
1123
1124 If the C<err_mode> option is set, it selects how errors are handled (see
1125 C<err_mode> in C<read_file> or C<write_file>).
1126
1127 =head3 keep_dot_dot
1128
1129 If this boolean option is set, C<.> and C<..> are not removed from the
1130 list of files.
1131
1132         my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1133
1134 =head2 EXPORT
1135
1136   read_file write_file overwrite_file append_file read_dir
1137
1138 =head2 LICENSE
1139
1140   Same as Perl.
1141
1142 =head2 SEE ALSO
1143
1144 An article on file slurping in extras/slurp_article.pod. There is
1145 also a benchmarking script in extras/slurp_bench.pl.
1146
1147 =head2 BUGS
1148
1149 If run under Perl 5.004, slurping from the DATA handle will fail as
1150 that requires B.pm which didn't get into core until 5.005.
1151
1152 =head1 AUTHOR
1153
1154 Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
1155
1156 =cut