lib/File/Slurp.pm

   1 package File::Slurp;
   2
   3 use 5.6.2 ;
   4
   5 use strict;
   6 use warnings ;
   7
   8 use Carp ;
   9 use Exporter ;
  10 use Fcntl qw( :DEFAULT ) ;
  11 use POSIX qw( :fcntl_h ) ;
  12 #use Symbol ;
  13
  14 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
  15 @ISA = qw( Exporter ) ;
  16
  17 $VERSION = '9999.17';
  18
  19 @EXPORT_OK = qw(
  20         slurp
  21         prepend_file
  22         edit_file
  23         edit_file_lines
  24 ) ;
  25
  26 %EXPORT_TAGS = ( 'all' => [ qw(
  27         read_file
  28         write_file
  29         overwrite_file
  30         append_file
  31         read_dir ),
  32         @EXPORT_OK
  33 ] ) ;
  34 @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
  35
  36 my $max_fast_slurp_size = 1024 * 100 ;
  37
  38 my $is_win32 = $^O =~ /win32/i ;
  39
  40 # Install subs for various constants that aren't set in older perls
  41 # (< 5.005).  Fcntl on old perls uses Exporter to define subs without a
  42 # () prototype These can't be overridden with the constant pragma or
  43 # we get a prototype mismatch.  Hence this less than aesthetically
  44 # appealing BEGIN block:
  45
  46 BEGIN {
  47         unless( defined &SEEK_SET ) {
  48                 *SEEK_SET = sub { 0 };
  49                 *SEEK_CUR = sub { 1 };
  50                 *SEEK_END = sub { 2 };
  51         }
  52
  53         unless( defined &O_BINARY ) {
  54                 *O_BINARY = sub { 0 };
  55                 *O_RDONLY = sub { 0 };
  56                 *O_WRONLY = sub { 1 };
  57         }
  58
  59         unless ( defined &O_APPEND ) {
  60
  61                 if ( $^O =~ /olaris/ ) {
  62                         *O_APPEND = sub { 8 };
  63                         *O_CREAT = sub { 256 };
  64                         *O_EXCL = sub { 1024 };
  65                 }
  66                 elsif ( $^O =~ /inux/ ) {
  67                         *O_APPEND = sub { 1024 };
  68                         *O_CREAT = sub { 64 };
  69                         *O_EXCL = sub { 128 };
  70                 }
  71                 elsif ( $^O =~ /BSD/i ) {
  72                         *O_APPEND = sub { 8 };
  73                         *O_CREAT = sub { 512 };
  74                         *O_EXCL = sub { 2048 };
  75                 }
  76         }
  77 }
  78
  79 # print "OS [$^O]\n" ;
  80
  81 # print "O_BINARY = ", O_BINARY(), "\n" ;
  82 # print "O_RDONLY = ", O_RDONLY(), "\n" ;
  83 # print "O_WRONLY = ", O_WRONLY(), "\n" ;
  84 # print "O_APPEND = ", O_APPEND(), "\n" ;
  85 # print "O_CREAT   ", O_CREAT(), "\n" ;
  86 # print "O_EXCL   ", O_EXCL(), "\n" ;
  87
  88
  89 *slurp = \&read_file ;
  90
  91 sub read_file {
  92
  93         my $file_name = shift ;
  94         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
  95
  96 # this is the optimized read_file for shorter files.
  97 # the test for -s > 0 is to allow pseudo files to be read with the
  98 # regular loop since they return a size of 0.
  99
 100         if ( !ref $file_name && -e $file_name && -s _ > 0 &&
 101              -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
 102
 103
 104                 my $fh ;
 105                 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
 106
 107                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 108                         goto &_error ;
 109                 }
 110
 111                 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
 112
 113                 unless ( defined $read_cnt ) {
 114
 115                         @_ = ( $opts,
 116                                 "read_file '$file_name' - small sysread: $!");
 117                         goto &_error ;
 118                 }
 119
 120                 $buf =~ s/\015\012/\n/g if $is_win32 ;
 121                 return $buf ;
 122         }
 123
 124 # set the buffer to either the passed in one or ours and init it to the null
 125 # string
 126
 127         my $buf ;
 128         my $buf_ref = $opts->{'buf_ref'} || \$buf ;
 129         ${$buf_ref} = '' ;
 130
 131         my( $read_fh, $size_left, $blk_size ) ;
 132
 133 # deal with ref for a file name
 134 # it could be an open handle or an overloaded object
 135
 136         if ( ref $file_name ) {
 137
 138                 my $ref_result = _check_ref( $file_name ) ;
 139
 140                 if ( ref $ref_result ) {
 141
 142 # we got an error, deal with it
 143
 144                         @_ = ( $opts, $ref_result ) ;
 145                         goto &_error ;
 146                 }
 147
 148                 if ( $ref_result ) {
 149
 150 # we got an overloaded object and the result is the stringified value
 151 # use it as the file name
 152
 153                         $file_name = $ref_result ;
 154                 }
 155                 else {
 156
 157 # here we have just an open handle. set $read_fh so we don't do a sysopen
 158
 159                         $read_fh = $file_name ;
 160                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 161                         $size_left = $blk_size ;
 162                 }
 163         }
 164
 165 # see if we have a path we need to open
 166
 167         unless ( $read_fh ) {
 168
 169 # a regular file. set the sysopen mode
 170
 171                 my $mode = O_RDONLY ;
 172
 173 #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
 174
 175                 $read_fh = local( *FH ) ;
 176 #               $read_fh = gensym ;
 177                 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
 178                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 179                         goto &_error ;
 180                 }
 181
 182                 if ( my $binmode = $opts->{'binmode'} ) {
 183                         binmode( $read_fh, $binmode ) ;
 184                 }
 185
 186 # get the size of the file for use in the read loop
 187
 188                 $size_left = -s $read_fh ;
 189
 190 #print "SIZE $size_left\n" ;
 191
 192 # we need a blk_size if the size is 0 so we can handle pseudofiles like in
 193 # /proc. these show as 0 size but have data to be slurped.
 194
 195                 unless( $size_left ) {
 196
 197                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 198                         $size_left = $blk_size ;
 199                 }
 200         }
 201
 202 # infinite read loop. we exit when we are done slurping
 203
 204         while( 1 ) {
 205
 206 # do the read and see how much we got
 207
 208                 my $read_cnt = sysread( $read_fh, ${$buf_ref},
 209                                 $size_left, length ${$buf_ref} ) ;
 210
 211                 unless ( defined $read_cnt ) {
 212
 213                         @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
 214                         goto &_error ;
 215                 }
 216
 217 # good read. see if we hit EOF (nothing left to read)
 218
 219                 last if $read_cnt == 0 ;
 220
 221 # loop if we are slurping a handle. we don't track $size_left then.
 222
 223                 next if $blk_size ;
 224
 225 # count down how much we read and loop if we have more to read.
 226
 227                 $size_left -= $read_cnt ;
 228                 last if $size_left <= 0 ;
 229         }
 230
 231 # fix up cr/lf to be a newline if this is a windows text file
 232
 233         ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
 234
 235         my $sep = $/ ;
 236         $sep = '\n\n+' if defined $sep && $sep eq '' ;
 237
 238 # see if caller wants lines
 239
 240         if( wantarray || $opts->{'array_ref'} ) {
 241
 242                 use re 'taint' ;
 243
 244                 my @lines = length(${$buf_ref}) ?
 245                         ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
 246
 247                 chomp @lines if $opts->{'chomp'} ;
 248
 249 # caller wants an array ref
 250
 251                 return \@lines if $opts->{'array_ref'} ;
 252
 253 # caller wants list of lines
 254
 255                 return @lines ;
 256         }
 257
 258 # caller wants a scalar ref to the slurped text
 259
 260         return $buf_ref if $opts->{'scalar_ref'} ;
 261
 262 # caller wants a scalar with the slurped text (normal scalar context)
 263
 264         return ${$buf_ref} if defined wantarray ;
 265
 266 # caller passed in an i/o buffer by reference (normal void context)
 267
 268         return ;
 269 }
 270
 271 # errors in this sub are returned as scalar refs
 272 # a normal IO/GLOB handle is an empty return
 273 # an overloaded object returns its stringified as a scalarfilename
 274
 275 sub _check_ref {
 276
 277         my( $handle ) = @_ ;
 278
 279 # check if we are reading from a handle (GLOB or IO object)
 280
 281         if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
 282
 283 # we have a handle. deal with seeking to it if it is DATA
 284
 285                 my $err = _seek_data_handle( $handle ) ;
 286
 287 # return the error string if any
 288
 289                 return \$err if $err ;
 290
 291 # we have good handle
 292                 return ;
 293         }
 294
 295         eval { require overload } ;
 296
 297 # return an error if we can't load the overload pragma
 298 # or if the object isn't overloaded
 299
 300         return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
 301                  if $@ || !overload::Overloaded( $handle ) ;
 302
 303 # must be overloaded so return its stringified value
 304
 305         return "$handle" ;
 306 }
 307
 308 sub _seek_data_handle {
 309
 310         my( $handle ) = @_ ;
 311
 312 # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
 313 # glob/handle. only the DATA handle is untainted (since it is from
 314 # trusted data in the source file). this allows us to test if this is
 315 # the DATA handle and then to do a sysseek to make sure it gets
 316 # slurped correctly. on some systems, the buffered i/o pointer is not
 317 # left at the same place as the fd pointer. this sysseek makes them
 318 # the same so slurping with sysread will work.
 319
 320         eval{ require B } ;
 321
 322         if ( $@ ) {
 323
 324                 return <<ERR ;
 325 Can't find B.pm with this Perl: $!.
 326 That module is needed to properly slurp the DATA handle.
 327 ERR
 328         }
 329
 330         if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
 331
 332 # set the seek position to the current tell.
 333
 334                 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
 335                         return "read_file '$handle' - sysseek: $!" ;
 336                 }
 337         }
 338
 339 # seek was successful, return no error string
 340
 341         return ;
 342 }
 343
 344
 345 sub write_file {
 346
 347         my $file_name = shift ;
 348
 349 # get the optional argument hash ref from @_ or an empty hash ref.
 350
 351         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 352
 353         my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
 354
 355 # get the buffer ref - it depends on how the data is passed into write_file
 356 # after this if/else $buf_ref will have a scalar ref to the data.
 357
 358         if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
 359
 360 # a scalar ref passed in %opts has the data
 361 # note that the data was passed by ref
 362
 363                 $buf_ref = $opts->{'buf_ref'} ;
 364                 $data_is_ref = 1 ;
 365         }
 366         elsif ( ref $_[0] eq 'SCALAR' ) {
 367
 368 # the first value in @_ is the scalar ref to the data
 369 # note that the data was passed by ref
 370
 371                 $buf_ref = shift ;
 372                 $data_is_ref = 1 ;
 373         }
 374         elsif ( ref $_[0] eq 'ARRAY' ) {
 375
 376 # the first value in @_ is the array ref to the data so join it.
 377
 378                 ${$buf_ref} = join '', @{$_[0]} ;
 379         }
 380         else {
 381
 382 # good old @_ has all the data so join it.
 383
 384                 ${$buf_ref} = join '', @_ ;
 385         }
 386
 387 # deal with ref for a file name
 388
 389         if ( ref $file_name ) {
 390
 391                 my $ref_result = _check_ref( $file_name ) ;
 392
 393                 if ( ref $ref_result ) {
 394
 395 # we got an error, deal with it
 396
 397                         @_ = ( $opts, $ref_result ) ;
 398                         goto &_error ;
 399                 }
 400
 401                 if ( $ref_result ) {
 402
 403 # we got an overloaded object and the result is the stringified value
 404 # use it as the file name
 405
 406                         $file_name = $ref_result ;
 407                 }
 408                 else {
 409
 410 # we now have a proper handle ref.
 411 # make sure we don't call truncate on it.
 412
 413                         $write_fh = $file_name ;
 414                         $no_truncate = 1 ;
 415                 }
 416         }
 417
 418 # see if we have a path we need to open
 419
 420         unless( $write_fh ) {
 421
 422 # spew to regular file.
 423
 424                 if ( $opts->{'atomic'} ) {
 425
 426 # in atomic mode, we spew to a temp file so make one and save the original
 427 # file name.
 428                         $orig_file_name = $file_name ;
 429                         $file_name .= ".$$" ;
 430                 }
 431
 432 # set the mode for the sysopen
 433
 434                 my $mode = O_WRONLY | O_CREAT ;
 435                 $mode |= O_APPEND if $opts->{'append'} ;
 436                 $mode |= O_EXCL if $opts->{'no_clobber'} ;
 437
 438                 my $perms = $opts->{perms} ;
 439                 $perms = 0666 unless defined $perms ;
 440
 441 #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
 442
 443 # open the file and handle any error.
 444
 445                 $write_fh = local( *FH ) ;
 446 #               $write_fh = gensym ;
 447                 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
 448
 449                         @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
 450                         goto &_error ;
 451                 }
 452         }
 453
 454         if ( my $binmode = $opts->{'binmode'} ) {
 455                 binmode( $write_fh, $binmode ) ;
 456         }
 457
 458         sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
 459
 460 #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 461
 462 # fix up newline to write cr/lf if this is a windows text file
 463
 464         if ( $is_win32 && !$opts->{'binmode'} ) {
 465
 466 # copy the write data if it was passed by ref so we don't clobber the
 467 # caller's data
 468                 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
 469                 ${$buf_ref} =~ s/\n/\015\012/g ;
 470         }
 471
 472 #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 473
 474 # get the size of how much we are writing and init the offset into that buffer
 475
 476         my $size_left = length( ${$buf_ref} ) ;
 477         my $offset = 0 ;
 478
 479 # loop until we have no more data left to write
 480
 481         do {
 482
 483 # do the write and track how much we just wrote
 484
 485                 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
 486                                 $size_left, $offset ) ;
 487
 488                 unless ( defined $write_cnt ) {
 489
 490                         @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
 491                         goto &_error ;
 492                 }
 493
 494 # track how much left to write and where to write from in the buffer
 495
 496                 $size_left -= $write_cnt ;
 497                 $offset += $write_cnt ;
 498
 499         } while( $size_left > 0 ) ;
 500
 501 # we truncate regular files in case we overwrite a long file with a shorter file
 502 # so seek to the current position to get it (same as tell()).
 503
 504         truncate( $write_fh,
 505                   sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
 506
 507         close( $write_fh ) ;
 508
 509 # handle the atomic mode - move the temp file to the original filename.
 510
 511         if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
 512
 513                 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
 514                 goto &_error ;
 515         }
 516
 517         return 1 ;
 518 }
 519
 520 # this is for backwards compatibility with the previous File::Slurp module.
 521 # write_file always overwrites an existing file
 522
 523 *overwrite_file = \&write_file ;
 524
 525 # the current write_file has an append mode so we use that. this
 526 # supports the same API with an optional second argument which is a
 527 # hash ref of options.
 528
 529 sub append_file {
 530
 531 # get the optional opts hash ref
 532         my $opts = $_[1] ;
 533         if ( ref $opts eq 'HASH' ) {
 534
 535 # we were passed an opts ref so just mark the append mode
 536
 537                 $opts->{append} = 1 ;
 538         }
 539         else {
 540
 541 # no opts hash so insert one with the append mode
 542
 543                 splice( @_, 1, 0, { append => 1 } ) ;
 544         }
 545
 546 # magic goto the main write_file sub. this overlays the sub without touching
 547 # the stack or @_
 548
 549         goto &write_file
 550 }
 551
 552 # prepend data to the beginning of a file
 553
 554 sub prepend_file {
 555
 556         my $file_name = shift ;
 557
 558 #print "FILE $file_name\n" ;
 559
 560         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 561
 562 # delete unsupported options
 563
 564         my @bad_opts =
 565                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 566
 567         delete @{$opts}{@bad_opts} ;
 568
 569         my $prepend_data = shift ;
 570         $prepend_data = '' unless defined $prepend_data ;
 571         $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
 572
 573 #print "PRE [$prepend_data]\n" ;
 574
 575         my $err_mode = delete $opts->{err_mode} ;
 576         $opts->{ err_mode } = 'croak' ;
 577         $opts->{ scalar_ref } = 1 ;
 578
 579         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 580
 581         if ( $@ ) {
 582
 583                 @_ = ( { err_mode => $err_mode },
 584                         "prepend_file '$file_name' - read_file: $!" ) ;
 585                 goto &_error ;
 586         }
 587
 588 #print "EXIST [$$existing_data]\n" ;
 589
 590         $opts->{atomic} = 1 ;
 591         my $write_result =
 592                 eval { write_file( $file_name, $opts,
 593                        $prepend_data, $$existing_data ) ;
 594         } ;
 595
 596         if ( $@ ) {
 597
 598                 @_ = ( { err_mode => $err_mode },
 599                         "prepend_file '$file_name' - write_file: $!" ) ;
 600                 goto &_error ;
 601         }
 602
 603         return $write_result ;
 604 }
 605
 606 # edit a file as a scalar in $_
 607
 608 sub edit_file(&$;$) {
 609
 610         my( $edit_code, $file_name, $opts ) = @_ ;
 611         $opts = {} unless ref $opts eq 'HASH' ;
 612
 613 #       my $edit_code = shift ;
 614 #       my $file_name = shift ;
 615 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 616
 617 #print "FILE $file_name\n" ;
 618
 619 # delete unsupported options
 620
 621         my @bad_opts =
 622                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 623
 624         delete @{$opts}{@bad_opts} ;
 625
 626 # keep the user err_mode and force croaking on internal errors
 627
 628         my $err_mode = delete $opts->{err_mode} ;
 629         $opts->{ err_mode } = 'croak' ;
 630
 631 # get a scalar ref for speed and slurp the file into a scalar
 632
 633         $opts->{ scalar_ref } = 1 ;
 634         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 635
 636         if ( $@ ) {
 637
 638                 @_ = ( { err_mode => $err_mode },
 639                         "edit_file '$file_name' - read_file: $!" ) ;
 640                 goto &_error ;
 641         }
 642
 643 #print "EXIST [$$existing_data]\n" ;
 644
 645         my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
 646
 647         $opts->{atomic} = 1 ;
 648         my $write_result =
 649                 eval { write_file( $file_name, $opts, $edited_data ) } ;
 650
 651         if ( $@ ) {
 652
 653                 @_ = ( { err_mode => $err_mode },
 654                         "edit_file '$file_name' - write_file: $!" ) ;
 655                 goto &_error ;
 656         }
 657
 658         return $write_result ;
 659 }
 660
 661 sub edit_file_lines(&$;$) {
 662
 663         my( $edit_code, $file_name, $opts ) = @_ ;
 664         $opts = {} unless ref $opts eq 'HASH' ;
 665
 666 #       my $edit_code = shift ;
 667 #       my $file_name = shift ;
 668 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 669
 670 #print "FILE $file_name\n" ;
 671
 672 # delete unsupported options
 673
 674         my @bad_opts =
 675                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 676
 677         delete @{$opts}{@bad_opts} ;
 678
 679 # keep the user err_mode and force croaking on internal errors
 680
 681         my $err_mode = delete $opts->{err_mode} ;
 682         $opts->{ err_mode } = 'croak' ;
 683
 684 # get an array ref for speed and slurp the file into lines
 685
 686         $opts->{ array_ref } = 1 ;
 687         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 688
 689         if ( $@ ) {
 690
 691                 @_ = ( { err_mode => $err_mode },
 692                         "edit_file_lines '$file_name' - read_file: $!" ) ;
 693                 goto &_error ;
 694         }
 695
 696 #print "EXIST [$$existing_data]\n" ;
 697
 698         my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
 699
 700         $opts->{atomic} = 1 ;
 701         my $write_result =
 702                 eval { write_file( $file_name, $opts, @edited_data ) } ;
 703
 704         if ( $@ ) {
 705
 706                 @_ = ( { err_mode => $err_mode },
 707                         "edit_file_lines '$file_name' - write_file: $!" ) ;
 708                 goto &_error ;
 709         }
 710
 711         return $write_result ;
 712 }
 713
 714 # basic wrapper around opendir/readdir
 715
 716 sub read_dir {
 717
 718         my $dir = shift ;
 719         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
 720
 721 # this handle will be destroyed upon return
 722
 723         local(*DIRH);
 724
 725 # open the dir and handle any errors
 726
 727         unless ( opendir( DIRH, $dir ) ) {
 728
 729                 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
 730                 goto &_error ;
 731         }
 732
 733         my @dir_entries = readdir(DIRH) ;
 734
 735         @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
 736                 unless $opts->{'keep_dot_dot'} ;
 737
 738         if ( $opts->{'prefix'} ) {
 739
 740                 substr( $_, 0, 0, "$dir/" ) for @dir_entries ;
 741         }
 742
 743         return @dir_entries if wantarray ;
 744         return \@dir_entries ;
 745 }
 746
 747 # error handling section
 748 #
 749 # all the error handling uses magic goto so the caller will get the
 750 # error message as if from their code and not this module. if we just
 751 # did a call on the error code, the carp/croak would report it from
 752 # this module since the error sub is one level down on the call stack
 753 # from read_file/write_file/read_dir.
 754
 755
 756 my %err_func = (
 757         'carp'  => \&carp,
 758         'croak' => \&croak,
 759 ) ;
 760
 761 sub _error {
 762
 763         my( $opts, $err_msg ) = @_ ;
 764
 765 # get the error function to use
 766
 767         my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
 768
 769 # if we didn't find it in our error function hash, they must have set
 770 # it to quiet and we don't do anything.
 771
 772         return unless $func ;
 773
 774 # call the carp/croak function
 775
 776         $func->($err_msg) if $func ;
 777
 778 # return a hard undef (in list context this will be a single value of
 779 # undef which is not a legal in-band value)
 780
 781         return undef ;
 782 }
 783
 784 1;
 785 __END__
 786
 787 =head1 NAME
 788
 789 File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
 790
 791 =head1 SYNOPSIS
 792
 793   use File::Slurp;
 794
 795 # read in a whole file into a scalar
 796   my $text = read_file( 'filename' ) ;
 797
 798 # read in a whole file into an array of lines
 799   my @lines = read_file( 'filename' ) ;
 800
 801 # write out a whole file from a scalar
 802   write_file( 'filename', $text ) ;
 803
 804 # write out a whole file from an array of lines
 805   write_file( 'filename', @lines ) ;
 806
 807 # Here is a simple and fast way to load and save a simple config file
 808 # made of key=value lines.
 809   my %conf = read_file( $file_name ) =~ /^(\w+)=(\.*)$/mg ;
 810   write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ;
 811
 812 # insert text at the beginning of a file
 813   prepend_file( 'filename', $text ) ;
 814
 815 # in-place edit to replace all 'foo' with 'bar' in file
 816   edit_file { s/foo/bar/g } 'filename' ;
 817
 818 # in-place edit to delete all lines with 'foo' from file
 819   edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
 820
 821 # read in a whole directory of file names (skipping . and ..)
 822   my @files = read_dir( '/path/to/dir' ) ;
 823
 824 =head1 DESCRIPTION
 825
 826 This module provides subs that allow you to read or write entire files
 827 with one simple call. They are designed to be simple to use, have
 828 flexible ways to pass in or get the file contents and to be very
 829 efficient.  There is also a sub to read in all the files in a
 830 directory other than C<.> and C<..>
 831
 832 These slurp/spew subs work for files, pipes and sockets, stdio,
 833 pseudo-files, and the DATA handle. Read more about why slurping files is
 834 a good thing in the file 'slurp_article.pod' in the extras/ directory.
 835
 836 If you are interested in how fast these calls work, check out the
 837 slurp_bench.pl program in the extras/ directory. It compares many
 838 different forms of slurping. You can select the I/O direction, context
 839 and file sizes. Use the --help option to see how to run it.
 840
 841 =head2 B<read_file>
 842
 843 This sub reads in an entire file and returns its contents to the
 844 caller.  In scalar context it returns the entire file as a single
 845 scalar. In list context it will return a list of lines (using the
 846 current value of $/ as the separator including support for paragraph
 847 mode when it is set to '').
 848
 849   my $text = read_file( 'filename' ) ;
 850   my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
 851   my @lines = read_file( 'filename' ) ;
 852   my $lines = read_file( 'filename', array_ref => 1 ) ;
 853
 854 The first argument is the file to slurp in. If the next argument is a
 855 hash reference, then it is used as the options. Otherwise the rest of
 856 the argument list are is used as key/value options.
 857
 858 If the file argument is a handle (if it is a ref and is an IO or GLOB
 859 object), then that handle is slurped in. This mode is supported so you
 860 slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
 861 an example that does C<open( '-|' )> and the child process spews data
 862 to the parant which slurps it in.  All of the options that control how
 863 the data is returned to the caller still work in this case.
 864
 865 If the first argument is an overloaded object then its stringified value
 866 is used for the filename and that file is opened.  This is a new feature
 867 in 9999.14. See the stringify.t test for an example.
 868
 869 By default C<read_file> returns an undef in scalar contex or a single
 870 undef in list context if it encounters an error. Those are both
 871 impossible to get with a clean read_file call which means you can check
 872 the return value and always know if you had an error. You can change how
 873 errors are handled with the C<err_mode> option.
 874
 875 Speed Note: If you call read_file and just get a scalar return value
 876 it is now optimized to handle shorter files. This is only used if no
 877 options are used, the file is shorter then 100k bytes, the filename is
 878 a plain scalar and a scalar file is returned. If you want the fastest
 879 slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
 880
 881 NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
 882 handle. It used to need a sysseek workaround but that is now handled
 883 when needed by the module itself.
 884
 885 You can optionally request that C<slurp()> is exported to your code. This
 886 is an alias for read_file and is meant to be forward compatible with
 887 Perl 6 (which will have slurp() built-in).
 888
 889 The options for C<read_file> are:
 890
 891 =head3 binmode
 892
 893 If you set the binmode option, then its value is passed to a call to
 894 binmode on the opened handle. You can use this to set the file to be
 895 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 896
 897         my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
 898         my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
 899
 900 =head3 array_ref
 901
 902 If this boolean option is set, the return value (only in scalar
 903 context) will be an array reference which contains the lines of the
 904 slurped file. The following two calls are equivalent:
 905
 906         my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
 907         my $lines_ref = [ read_file( $bin_file ) ] ;
 908
 909 =head3 chomp
 910
 911 If this boolean option is set, the lines are chomped. This only
 912 happens if you are slurping in a list context or using the
 913 C<array_ref> option.
 914
 915 =head3 scalar_ref
 916
 917 If this boolean option is set, the return value (only in scalar
 918 context) will be an scalar reference to a string which is the contents
 919 of the slurped file. This will usually be faster than returning the
 920 plain scalar. It will also save memory as it will not make a copy of
 921 the file to return. Run the extras/slurp_bench.pl script to see speed
 922 comparisons.
 923
 924         my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
 925
 926 =head3 buf_ref
 927
 928 You can use this option to pass in a scalar reference and the slurped
 929 file contents will be stored in the scalar. This can be used in
 930 conjunction with any of the other options. This saves an extra copy of
 931 the slurped file and can lower ram usage vs returning the file. It is
 932 usually the fastest way to read a file into a scalar. Run the
 933 extras/slurp_bench.pl script to see speed comparisons.
 934
 935
 936         read_file( $bin_file, buf_ref => \$buffer ) ;
 937
 938 =head3 blk_size
 939
 940 You can use this option to set the block size used when slurping from
 941 an already open handle (like \*STDIN). It defaults to 1MB.
 942
 943         my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
 944                                              array_ref => 1 ) ;
 945
 946 =head3 err_mode
 947
 948 You can use this option to control how read_file behaves when an error
 949 occurs. This option defaults to 'croak'. You can set it to 'carp' or to
 950 'quiet to have no special error handling. This code wants to carp and
 951 then read another file if it fails.
 952
 953         my $text_ref = read_file( $file, err_mode => 'carp' ) ;
 954         unless ( $text_ref ) {
 955
 956                 # read a different file but croak if not found
 957                 $text_ref = read_file( $another_file ) ;
 958         }
 959
 960         # process ${$text_ref}
 961
 962 =head2 B<write_file>
 963
 964 This sub writes out an entire file in one call.
 965
 966   write_file( 'filename', @data ) ;
 967
 968 The first argument to C<write_file> is the filename. The next argument
 969 is an optional hash reference and it contains key/values that can
 970 modify the behavior of C<write_file>. The rest of the argument list is
 971 the data to be written to the file.
 972
 973   write_file( 'filename', {append => 1 }, @data ) ;
 974   write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
 975
 976 As a shortcut if the first data argument is a scalar or array reference,
 977 it is used as the only data to be written to the file. Any following
 978 arguments in @_ are ignored. This is a faster way to pass in the output
 979 to be written to the file and is equivalent to the C<buf_ref> option of
 980 C<read_file>. These following pairs are equivalent but the pass by
 981 reference call will be faster in most cases (especially with larger
 982 files).
 983
 984   write_file( 'filename', \$buffer ) ;
 985   write_file( 'filename', $buffer ) ;
 986
 987   write_file( 'filename', \@lines ) ;
 988   write_file( 'filename', @lines ) ;
 989
 990 If the first argument is a handle (if it is a ref and is an IO or GLOB
 991 object), then that handle is written to. This mode is supported so you
 992 spew to handles such as \*STDOUT. See the test handle.t for an example
 993 that does C<open( '-|' )> and child process spews data to the parent
 994 which slurps it in.  All of the options that control how the data are
 995 passed into C<write_file> still work in this case.
 996
 997 If the first argument is an overloaded object then its stringified value
 998 is used for the filename and that file is opened.  This is new feature
 999 in 9999.14. See the stringify.t test for an example.
1000
1001 By default C<write_file> returns 1 upon successfully writing the file or
1002 undef if it encountered an error. You can change how errors are handled
1003 with the C<err_mode> option.
1004
1005 The options are:
1006
1007 =head3 binmode
1008
1009 If you set the binmode option, then its value is passed to a call to
1010 binmode on the opened handle. You can use this to set the file to be
1011 read in binary mode, utf8, etc. See perldoc -f binmode for more.
1012
1013         write_file( $bin_file, {binmode => ':raw'}, @data ) ;
1014         write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1015
1016 =head3 perms
1017
1018 The perms option sets the permissions of newly-created files. This value
1019 is modified by your process's umask and defaults to 0666 (same as
1020 sysopen).
1021
1022 NOTE: this option is new as of File::Slurp version 9999.14;
1023
1024 =head3 buf_ref
1025
1026 You can use this option to pass in a scalar reference which has the
1027 data to be written. If this is set then any data arguments (including
1028 the scalar reference shortcut) in @_ will be ignored. These are
1029 equivalent:
1030
1031         write_file( $bin_file, { buf_ref => \$buffer } ) ;
1032         write_file( $bin_file, \$buffer ) ;
1033         write_file( $bin_file, $buffer ) ;
1034
1035 =head3 atomic
1036
1037 If you set this boolean option, the file will be written to in an
1038 atomic fashion. A temporary file name is created by appending the pid
1039 ($$) to the file name argument and that file is spewed to. After the
1040 file is closed it is renamed to the original file name (and rename is
1041 an atomic operation on most OS's). If the program using this were to
1042 crash in the middle of this, then the file with the pid suffix could
1043 be left behind.
1044
1045 =head3 append
1046
1047 If you set this boolean option, the data will be written at the end of
1048 the current file. Internally this sets the sysopen mode flag O_APPEND.
1049
1050         write_file( $file, {append => 1}, @data ) ;
1051
1052  You
1053 can import append_file and it does the same thing.
1054
1055 =head3 no_clobber
1056
1057 If you set this boolean option, an existing file will not be overwritten.
1058
1059         write_file( $file, {no_clobber => 1}, @data ) ;
1060
1061 =head3 err_mode
1062
1063 You can use this option to control how C<write_file> behaves when an
1064 error occurs. This option defaults to 'croak'. You can set it to
1065 'carp' or to 'quiet' to have no error handling other than the return
1066 value. If the first call to C<write_file> fails it will carp and then
1067 write to another file. If the second call to C<write_file> fails, it
1068 will croak.
1069
1070         unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1071
1072                 # write a different file but croak if not found
1073                 write_file( $other_file, \$data ) ;
1074         }
1075
1076 =head2 overwrite_file
1077
1078 This sub is just a typeglob alias to write_file since write_file
1079 always overwrites an existing file. This sub is supported for
1080 backwards compatibility with the original version of this module. See
1081 write_file for its API and behavior.
1082
1083 =head2 append_file
1084
1085 This sub will write its data to the end of the file. It is a wrapper
1086 around write_file and it has the same API so see that for the full
1087 documentation. These calls are equivalent:
1088
1089         append_file( $file, @data ) ;
1090         write_file( $file, {append => 1}, @data ) ;
1091
1092
1093 =head2 prepend_file
1094
1095 This sub writes data to the beginning of a file. The previously existing
1096 data is written after that so the effect is prepending data in front of
1097 a file. It is a counterpart to the append_file sub in this module. It
1098 works by first using C<read_file> to slurp in the file and then calling
1099 C<write_file> with the new data and the existing file data.
1100
1101 The first argument to C<prepend_file> is the filename. The next argument
1102 is an optional hash reference and it contains key/values that can modify
1103 the behavior of C<prepend_file>. The rest of the argument list is the
1104 data to be written to the file and that is passed to C<write_file> as is
1105 (see that for allowed data).
1106
1107 Only the C<binmode> and C<err_mode> options are supported. The
1108 C<write_file> call has the C<atomic> option set so you will always have
1109 a consistant file. See above for more about those options.
1110
1111 C<prepend_file> is not exported by default, you need to import it
1112 explicitly.
1113
1114         use File::Slurp qw( prepend_file ) ;
1115         prepend_file( $file, $header ) ;
1116         prepend_file( $file, \@lines ) ;
1117         prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1118
1119
1120 =head2 edit_file, edit_file_lines
1121
1122 These subs read in a file into $_, execute a code block which should
1123 modify $_ and then write $_ back to the file. The difference between
1124 them is that C<edit_file> reads the whole file into $_ and calls the
1125 code block one time. With C<edit_file_lines> each line is read into $_
1126 and the code is called for each line. In both cases the code should
1127 modify $_ if desired and it will be written back out. These subs are
1128 the equivilent of the -pi command line options of Perl but you can
1129 call them from inside your program and not fork out a process.
1130
1131 The first argument to C<edit_file> and C<edit_file_lines> is a code
1132 block or a code reference. The code block is not followed by a comma
1133 (as with grep and map) but a code reference is followed by a
1134 comma. See the examples below for both styles. The next argument is
1135 the filename. The last argument is an optional hash reference and it
1136 contains key/values that can modify the behavior of
1137 C<prepend_file>.
1138
1139 Only the C<binmode> and C<err_mode> options are supported. The
1140 C<write_file> call has the C<atomic> option set so you will always
1141 have a consistant file. See above for more about those options.
1142
1143 Each group of calls below show a Perl command line instance and the
1144 equivilent calls to C<edit_file> and C<edit_file_lines>.
1145
1146         perl -0777 -pi -e 's/foo/bar/g' filename
1147         use File::Slurp ;
1148         edit_file { s/foo/bar/g } 'filename' ;
1149         edit_file sub { s/foo/bar/g }, 'filename' ;
1150         edit_file \&replace_foo, 'filename' ;
1151         sub replace_foo { s/foo/bar/g }
1152
1153         perl -pi -e '$_ = '' if /foo/' filename
1154         use File::Slurp ;
1155         edit_file_lines { $_ = '' if /foo/ } 'filename' ;
1156         edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
1157         edit_file \&delete_foo, 'filename' ;
1158         sub delete_foo { $_ = '' if /foo/ }
1159
1160 =head2 read_dir
1161
1162 This sub reads all the file names from directory and returns them to
1163 the caller but C<.> and C<..> are removed by default.
1164
1165         my @files = read_dir( '/path/to/dir' ) ;
1166
1167 The first argument is the path to the directory to read.  If the next
1168 argument is a hash reference, then it is used as the options.
1169 Otherwise the rest of the argument list are is used as key/value
1170 options.
1171
1172 In list context C<read_dir> returns a list of the entries in the
1173 directory. In a scalar context it returns an array reference which has
1174 the entries.
1175
1176 =head3 err_mode
1177
1178 If the C<err_mode> option is set, it selects how errors are handled (see
1179 C<err_mode> in C<read_file> or C<write_file>).
1180
1181 =head3 keep_dot_dot
1182
1183 If this boolean option is set, C<.> and C<..> are not removed from the
1184 list of files.
1185
1186         my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1187
1188 =head3 prefix
1189
1190 If this boolean option is set, the string "$dir/" is prefixed to each
1191 dir entry. This means you can directly use the results to open
1192 files. A common newbie mistake is not putting the directory in front
1193 of entries when opening themn.
1194
1195         my @paths = read_dir( '/path/to/dir', prefix => 1 ) ;
1196
1197 =head2 EXPORT
1198
1199   read_file write_file overwrite_file append_file read_dir
1200
1201 =head2 LICENSE
1202
1203   Same as Perl.
1204
1205 =head2 SEE ALSO
1206
1207 An article on file slurping in extras/slurp_article.pod. There is
1208 also a benchmarking script in extras/slurp_bench.pl.
1209
1210 =head2 BUGS
1211
1212 If run under Perl 5.004, slurping from the DATA handle will fail as
1213 that requires B.pm which didn't get into core until 5.005.
1214
1215 =head1 AUTHOR
1216
1217 Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
1218
1219 =cut