lib/File/Slurp.pm

   1 package File::Slurp;
   2
   3 use 5.006002 ;
   4
   5 use strict;
   6 use warnings ;
   7
   8 use Carp ;
   9 use Exporter ;
  10 use Fcntl qw( :DEFAULT ) ;
  11 use POSIX qw( :fcntl_h ) ;
  12 use Errno ;
  13 #use Symbol ;
  14
  15 use vars qw( @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS $VERSION ) ;
  16 @ISA = qw( Exporter ) ;
  17
  18 $VERSION = '9999.20';
  19
  20 my @std_export = qw(
  21         read_file
  22         write_file
  23         overwrite_file
  24         append_file
  25         read_dir
  26 ) ;
  27
  28 my @edit_export = qw(
  29         edit_file
  30         edit_file_lines
  31 ) ;
  32
  33 my @ok_export = qw(
  34 ) ;
  35
  36 my @abbrev_export = qw(
  37         rf
  38         wf
  39         ef
  40         efl
  41 ) ;
  42
  43 @EXPORT_OK = (
  44         @edit_export,
  45         @abbrev_export,
  46         qw(
  47                 slurp
  48                 prepend_file
  49         ),
  50 ) ;
  51
  52 %EXPORT_TAGS = (
  53         'all'   => [ @std_export, @edit_export, @abbrev_export, @EXPORT_OK ],
  54         'edit'  => [ @edit_export ],
  55         'std'   => [ @std_export ],
  56         'abr'   => [ @abbrev_export ],
  57 ) ;
  58
  59 @EXPORT = @std_export ;
  60
  61 my $max_fast_slurp_size = 1024 * 100 ;
  62
  63 my $is_win32 = $^O =~ /win32/i ;
  64
  65 # Install subs for various constants that aren't set in older perls
  66 # (< 5.005).  Fcntl on old perls uses Exporter to define subs without a
  67 # () prototype These can't be overridden with the constant pragma or
  68 # we get a prototype mismatch.  Hence this less than aesthetically
  69 # appealing BEGIN block:
  70
  71 BEGIN {
  72         unless( defined &SEEK_SET ) {
  73                 *SEEK_SET = sub { 0 };
  74                 *SEEK_CUR = sub { 1 };
  75                 *SEEK_END = sub { 2 };
  76         }
  77
  78         unless( defined &O_BINARY ) {
  79                 *O_BINARY = sub { 0 };
  80                 *O_RDONLY = sub { 0 };
  81                 *O_WRONLY = sub { 1 };
  82         }
  83
  84         unless ( defined &O_APPEND ) {
  85
  86                 if ( $^O =~ /olaris/ ) {
  87                         *O_APPEND = sub { 8 };
  88                         *O_CREAT = sub { 256 };
  89                         *O_EXCL = sub { 1024 };
  90                 }
  91                 elsif ( $^O =~ /inux/ ) {
  92                         *O_APPEND = sub { 1024 };
  93                         *O_CREAT = sub { 64 };
  94                         *O_EXCL = sub { 128 };
  95                 }
  96                 elsif ( $^O =~ /BSD/i ) {
  97                         *O_APPEND = sub { 8 };
  98                         *O_CREAT = sub { 512 };
  99                         *O_EXCL = sub { 2048 };
 100                 }
 101         }
 102 }
 103
 104 # print "OS [$^O]\n" ;
 105
 106 # print "O_BINARY = ", O_BINARY(), "\n" ;
 107 # print "O_RDONLY = ", O_RDONLY(), "\n" ;
 108 # print "O_WRONLY = ", O_WRONLY(), "\n" ;
 109 # print "O_APPEND = ", O_APPEND(), "\n" ;
 110 # print "O_CREAT   ", O_CREAT(), "\n" ;
 111 # print "O_EXCL   ", O_EXCL(), "\n" ;
 112
 113
 114 *slurp = \&read_file ;
 115 *rf = \&read_file ;
 116
 117 sub read_file {
 118
 119         my $file_name = shift ;
 120         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
 121
 122 # this is the optimized read_file for shorter files.
 123 # the test for -s > 0 is to allow pseudo files to be read with the
 124 # regular loop since they return a size of 0.
 125
 126         if ( !ref $file_name && -e $file_name && -s _ > 0 &&
 127              -s _ < $max_fast_slurp_size && !%{$opts} && !wantarray ) {
 128
 129
 130                 my $fh ;
 131                 unless( sysopen( $fh, $file_name, O_RDONLY ) ) {
 132
 133                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 134                         goto &_error ;
 135                 }
 136
 137                 my $read_cnt = sysread( $fh, my $buf, -s _ ) ;
 138
 139                 unless ( defined $read_cnt ) {
 140
 141                         @_ = ( $opts,
 142                                 "read_file '$file_name' - small sysread: $!");
 143                         goto &_error ;
 144                 }
 145
 146                 $buf =~ s/\015\012/\n/g if $is_win32 ;
 147                 return $buf ;
 148         }
 149
 150 # set the buffer to either the passed in one or ours and init it to the null
 151 # string
 152
 153         my $buf ;
 154         my $buf_ref = $opts->{'buf_ref'} || \$buf ;
 155         ${$buf_ref} = '' ;
 156
 157         my( $read_fh, $size_left, $blk_size ) ;
 158
 159 # deal with ref for a file name
 160 # it could be an open handle or an overloaded object
 161
 162         if ( ref $file_name ) {
 163
 164                 my $ref_result = _check_ref( $file_name ) ;
 165
 166                 if ( ref $ref_result ) {
 167
 168 # we got an error, deal with it
 169
 170                         @_ = ( $opts, $ref_result ) ;
 171                         goto &_error ;
 172                 }
 173
 174                 if ( $ref_result ) {
 175
 176 # we got an overloaded object and the result is the stringified value
 177 # use it as the file name
 178
 179                         $file_name = $ref_result ;
 180                 }
 181                 else {
 182
 183 # here we have just an open handle. set $read_fh so we don't do a sysopen
 184
 185                         $read_fh = $file_name ;
 186                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 187                         $size_left = $blk_size ;
 188                 }
 189         }
 190
 191 # see if we have a path we need to open
 192
 193         unless ( $read_fh ) {
 194
 195 # a regular file. set the sysopen mode
 196
 197                 my $mode = O_RDONLY ;
 198
 199 #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ;
 200
 201                 $read_fh = local( *FH ) ;
 202 #               $read_fh = gensym ;
 203                 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
 204                         @_ = ( $opts, "read_file '$file_name' - sysopen: $!");
 205                         goto &_error ;
 206                 }
 207
 208                 if ( my $binmode = $opts->{'binmode'} ) {
 209                         binmode( $read_fh, $binmode ) ;
 210                 }
 211
 212 # get the size of the file for use in the read loop
 213
 214                 $size_left = -s $read_fh ;
 215
 216 #print "SIZE $size_left\n" ;
 217
 218 # we need a blk_size if the size is 0 so we can handle pseudofiles like in
 219 # /proc. these show as 0 size but have data to be slurped.
 220
 221                 unless( $size_left ) {
 222
 223                         $blk_size = $opts->{'blk_size'} || 1024 * 1024 ;
 224                         $size_left = $blk_size ;
 225                 }
 226         }
 227
 228 # infinite read loop. we exit when we are done slurping
 229
 230         while( 1 ) {
 231
 232 # do the read and see how much we got
 233
 234                 my $read_cnt = sysread( $read_fh, ${$buf_ref},
 235                                 $size_left, length ${$buf_ref} ) ;
 236
 237 # since we're using sysread Perl won't automatically restart the call
 238 # when interrupted by a signal.
 239
 240                 next if $!{EINTR};
 241
 242                 unless ( defined $read_cnt ) {
 243
 244                         @_ = ( $opts, "read_file '$file_name' - loop sysread: $!");
 245                         goto &_error ;
 246                 }
 247
 248 # good read. see if we hit EOF (nothing left to read)
 249
 250                 last if $read_cnt == 0 ;
 251
 252 # loop if we are slurping a handle. we don't track $size_left then.
 253
 254                 next if $blk_size ;
 255
 256 # count down how much we read and loop if we have more to read.
 257
 258                 $size_left -= $read_cnt ;
 259                 last if $size_left <= 0 ;
 260         }
 261
 262 # fix up cr/lf to be a newline if this is a windows text file
 263
 264         ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$opts->{'binmode'} ;
 265
 266         my $sep = $/ ;
 267         $sep = '\n\n+' if defined $sep && $sep eq '' ;
 268
 269 # see if caller wants lines
 270
 271         if( wantarray || $opts->{'array_ref'} ) {
 272
 273                 use re 'taint' ;
 274
 275                 my @lines = length(${$buf_ref}) ?
 276                         ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ;
 277
 278                 chomp @lines if $opts->{'chomp'} ;
 279
 280 # caller wants an array ref
 281
 282                 return \@lines if $opts->{'array_ref'} ;
 283
 284 # caller wants list of lines
 285
 286                 return @lines ;
 287         }
 288
 289 # caller wants a scalar ref to the slurped text
 290
 291         return $buf_ref if $opts->{'scalar_ref'} ;
 292
 293 # caller wants a scalar with the slurped text (normal scalar context)
 294
 295         return ${$buf_ref} if defined wantarray ;
 296
 297 # caller passed in an i/o buffer by reference (normal void context)
 298
 299         return ;
 300 }
 301
 302 # errors in this sub are returned as scalar refs
 303 # a normal IO/GLOB handle is an empty return
 304 # an overloaded object returns its stringified as a scalarfilename
 305
 306 sub _check_ref {
 307
 308         my( $handle ) = @_ ;
 309
 310 # check if we are reading from a handle (GLOB or IO object)
 311
 312         if ( eval { $handle->isa( 'GLOB' ) || $handle->isa( 'IO' ) } ) {
 313
 314 # we have a handle. deal with seeking to it if it is DATA
 315
 316                 my $err = _seek_data_handle( $handle ) ;
 317
 318 # return the error string if any
 319
 320                 return \$err if $err ;
 321
 322 # we have good handle
 323                 return ;
 324         }
 325
 326         eval { require overload } ;
 327
 328 # return an error if we can't load the overload pragma
 329 # or if the object isn't overloaded
 330
 331         return \"Bad handle '$handle' is not a GLOB or IO object or overloaded"
 332                  if $@ || !overload::Overloaded( $handle ) ;
 333
 334 # must be overloaded so return its stringified value
 335
 336         return "$handle" ;
 337 }
 338
 339 sub _seek_data_handle {
 340
 341         my( $handle ) = @_ ;
 342
 343 # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a
 344 # glob/handle. only the DATA handle is untainted (since it is from
 345 # trusted data in the source file). this allows us to test if this is
 346 # the DATA handle and then to do a sysseek to make sure it gets
 347 # slurped correctly. on some systems, the buffered i/o pointer is not
 348 # left at the same place as the fd pointer. this sysseek makes them
 349 # the same so slurping with sysread will work.
 350
 351         eval{ require B } ;
 352
 353         if ( $@ ) {
 354
 355                 return <<ERR ;
 356 Can't find B.pm with this Perl: $!.
 357 That module is needed to properly slurp the DATA handle.
 358 ERR
 359         }
 360
 361         if ( B::svref_2object( $handle )->IO->IoFLAGS & 16 ) {
 362
 363 # set the seek position to the current tell.
 364
 365                 unless( sysseek( $handle, tell( $handle ), SEEK_SET ) ) {
 366                         return "read_file '$handle' - sysseek: $!" ;
 367                 }
 368         }
 369
 370 # seek was successful, return no error string
 371
 372         return ;
 373 }
 374
 375
 376 *wf = \&write_file ;
 377
 378 sub write_file {
 379
 380         my $file_name = shift ;
 381
 382 # get the optional argument hash ref from @_ or an empty hash ref.
 383
 384         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 385
 386         my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ;
 387
 388 # get the buffer ref - it depends on how the data is passed into write_file
 389 # after this if/else $buf_ref will have a scalar ref to the data.
 390
 391         if ( ref $opts->{'buf_ref'} eq 'SCALAR' ) {
 392
 393 # a scalar ref passed in %opts has the data
 394 # note that the data was passed by ref
 395
 396                 $buf_ref = $opts->{'buf_ref'} ;
 397                 $data_is_ref = 1 ;
 398         }
 399         elsif ( ref $_[0] eq 'SCALAR' ) {
 400
 401 # the first value in @_ is the scalar ref to the data
 402 # note that the data was passed by ref
 403
 404                 $buf_ref = shift ;
 405                 $data_is_ref = 1 ;
 406         }
 407         elsif ( ref $_[0] eq 'ARRAY' ) {
 408
 409 # the first value in @_ is the array ref to the data so join it.
 410
 411                 ${$buf_ref} = join '', @{$_[0]} ;
 412         }
 413         else {
 414
 415 # good old @_ has all the data so join it.
 416
 417                 ${$buf_ref} = join '', @_ ;
 418         }
 419
 420 # deal with ref for a file name
 421
 422         if ( ref $file_name ) {
 423
 424                 my $ref_result = _check_ref( $file_name ) ;
 425
 426                 if ( ref $ref_result ) {
 427
 428 # we got an error, deal with it
 429
 430                         @_ = ( $opts, $ref_result ) ;
 431                         goto &_error ;
 432                 }
 433
 434                 if ( $ref_result ) {
 435
 436 # we got an overloaded object and the result is the stringified value
 437 # use it as the file name
 438
 439                         $file_name = $ref_result ;
 440                 }
 441                 else {
 442
 443 # we now have a proper handle ref.
 444 # make sure we don't call truncate on it.
 445
 446                         $write_fh = $file_name ;
 447                         $no_truncate = 1 ;
 448                 }
 449         }
 450
 451 # see if we have a path we need to open
 452
 453         unless( $write_fh ) {
 454
 455 # spew to regular file.
 456
 457                 if ( $opts->{'atomic'} ) {
 458
 459 # in atomic mode, we spew to a temp file so make one and save the original
 460 # file name.
 461                         $orig_file_name = $file_name ;
 462                         $file_name .= ".$$" ;
 463                 }
 464
 465 # set the mode for the sysopen
 466
 467                 my $mode = O_WRONLY | O_CREAT ;
 468                 $mode |= O_APPEND if $opts->{'append'} ;
 469                 $mode |= O_EXCL if $opts->{'no_clobber'} ;
 470
 471                 my $perms = $opts->{perms} ;
 472                 $perms = 0666 unless defined $perms ;
 473
 474 #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ;
 475
 476 # open the file and handle any error.
 477
 478                 $write_fh = local( *FH ) ;
 479 #               $write_fh = gensym ;
 480                 unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) {
 481
 482                         @_ = ( $opts, "write_file '$file_name' - sysopen: $!");
 483                         goto &_error ;
 484                 }
 485         }
 486
 487         if ( my $binmode = $opts->{'binmode'} ) {
 488                 binmode( $write_fh, $binmode ) ;
 489         }
 490
 491         sysseek( $write_fh, 0, SEEK_END ) if $opts->{'append'} ;
 492
 493 #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 494
 495 # fix up newline to write cr/lf if this is a windows text file
 496
 497         if ( $is_win32 && !$opts->{'binmode'} ) {
 498
 499 # copy the write data if it was passed by ref so we don't clobber the
 500 # caller's data
 501                 $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ;
 502                 ${$buf_ref} =~ s/\n/\015\012/g ;
 503         }
 504
 505 #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ;
 506
 507 # get the size of how much we are writing and init the offset into that buffer
 508
 509         my $size_left = length( ${$buf_ref} ) ;
 510         my $offset = 0 ;
 511
 512 # loop until we have no more data left to write
 513
 514         do {
 515
 516 # do the write and track how much we just wrote
 517
 518                 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
 519                                 $size_left, $offset ) ;
 520
 521 # since we're using syswrite Perl won't automatically restart the call
 522 # when interrupted by a signal.
 523
 524                 next if $!{EINTR};
 525
 526                 unless ( defined $write_cnt ) {
 527
 528                         @_ = ( $opts, "write_file '$file_name' - syswrite: $!");
 529                         goto &_error ;
 530                 }
 531
 532 # track how much left to write and where to write from in the buffer
 533
 534                 $size_left -= $write_cnt ;
 535                 $offset += $write_cnt ;
 536
 537         } while( $size_left > 0 ) ;
 538
 539 # we truncate regular files in case we overwrite a long file with a shorter file
 540 # so seek to the current position to get it (same as tell()).
 541
 542         truncate( $write_fh,
 543                   sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
 544
 545         close( $write_fh ) ;
 546
 547 # handle the atomic mode - move the temp file to the original filename.
 548
 549         if ( $opts->{'atomic'} && !rename( $file_name, $orig_file_name ) ) {
 550
 551                 @_ = ( $opts, "write_file '$file_name' - rename: $!" ) ;
 552                 goto &_error ;
 553         }
 554
 555         return 1 ;
 556 }
 557
 558 # this is for backwards compatibility with the previous File::Slurp module.
 559 # write_file always overwrites an existing file
 560
 561 *overwrite_file = \&write_file ;
 562
 563 # the current write_file has an append mode so we use that. this
 564 # supports the same API with an optional second argument which is a
 565 # hash ref of options.
 566
 567 sub append_file {
 568
 569 # get the optional opts hash ref
 570         my $opts = $_[1] ;
 571         if ( ref $opts eq 'HASH' ) {
 572
 573 # we were passed an opts ref so just mark the append mode
 574
 575                 $opts->{append} = 1 ;
 576         }
 577         else {
 578
 579 # no opts hash so insert one with the append mode
 580
 581                 splice( @_, 1, 0, { append => 1 } ) ;
 582         }
 583
 584 # magic goto the main write_file sub. this overlays the sub without touching
 585 # the stack or @_
 586
 587         goto &write_file
 588 }
 589
 590 # prepend data to the beginning of a file
 591
 592 sub prepend_file {
 593
 594         my $file_name = shift ;
 595
 596 #print "FILE $file_name\n" ;
 597
 598         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 599
 600 # delete unsupported options
 601
 602         my @bad_opts =
 603                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 604
 605         delete @{$opts}{@bad_opts} ;
 606
 607         my $prepend_data = shift ;
 608         $prepend_data = '' unless defined $prepend_data ;
 609         $prepend_data = ${$prepend_data} if ref $prepend_data eq 'SCALAR' ;
 610
 611 #print "PRE [$prepend_data]\n" ;
 612
 613         my $err_mode = delete $opts->{err_mode} ;
 614         $opts->{ err_mode } = 'croak' ;
 615         $opts->{ scalar_ref } = 1 ;
 616
 617         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 618
 619         if ( $@ ) {
 620
 621                 @_ = ( { err_mode => $err_mode },
 622                         "prepend_file '$file_name' - read_file: $!" ) ;
 623                 goto &_error ;
 624         }
 625
 626 #print "EXIST [$$existing_data]\n" ;
 627
 628         $opts->{atomic} = 1 ;
 629         my $write_result =
 630                 eval { write_file( $file_name, $opts,
 631                        $prepend_data, $$existing_data ) ;
 632         } ;
 633
 634         if ( $@ ) {
 635
 636                 @_ = ( { err_mode => $err_mode },
 637                         "prepend_file '$file_name' - write_file: $!" ) ;
 638                 goto &_error ;
 639         }
 640
 641         return $write_result ;
 642 }
 643
 644 # edit a file as a scalar in $_
 645
 646 *ef = \&edit_file ;
 647
 648 sub edit_file(&$;$) {
 649
 650         my( $edit_code, $file_name, $opts ) = @_ ;
 651         $opts = {} unless ref $opts eq 'HASH' ;
 652
 653 #       my $edit_code = shift ;
 654 #       my $file_name = shift ;
 655 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 656
 657 #print "FILE $file_name\n" ;
 658
 659 # delete unsupported options
 660
 661         my @bad_opts =
 662                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 663
 664         delete @{$opts}{@bad_opts} ;
 665
 666 # keep the user err_mode and force croaking on internal errors
 667
 668         my $err_mode = delete $opts->{err_mode} ;
 669         $opts->{ err_mode } = 'croak' ;
 670
 671 # get a scalar ref for speed and slurp the file into a scalar
 672
 673         $opts->{ scalar_ref } = 1 ;
 674         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 675
 676         if ( $@ ) {
 677
 678                 @_ = ( { err_mode => $err_mode },
 679                         "edit_file '$file_name' - read_file: $!" ) ;
 680                 goto &_error ;
 681         }
 682
 683 #print "EXIST [$$existing_data]\n" ;
 684
 685         my( $edited_data ) = map { $edit_code->(); $_ } $$existing_data ;
 686
 687         $opts->{atomic} = 1 ;
 688         my $write_result =
 689                 eval { write_file( $file_name, $opts, $edited_data ) } ;
 690
 691         if ( $@ ) {
 692
 693                 @_ = ( { err_mode => $err_mode },
 694                         "edit_file '$file_name' - write_file: $!" ) ;
 695                 goto &_error ;
 696         }
 697
 698         return $write_result ;
 699 }
 700
 701 *efl = \&edit_file_lines ;
 702
 703 sub edit_file_lines(&$;$) {
 704
 705         my( $edit_code, $file_name, $opts ) = @_ ;
 706         $opts = {} unless ref $opts eq 'HASH' ;
 707
 708 #       my $edit_code = shift ;
 709 #       my $file_name = shift ;
 710 #       my $opts = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 711
 712 #print "FILE $file_name\n" ;
 713
 714 # delete unsupported options
 715
 716         my @bad_opts =
 717                 grep $_ ne 'err_mode' && $_ ne 'binmode', keys %{$opts} ;
 718
 719         delete @{$opts}{@bad_opts} ;
 720
 721 # keep the user err_mode and force croaking on internal errors
 722
 723         my $err_mode = delete $opts->{err_mode} ;
 724         $opts->{ err_mode } = 'croak' ;
 725
 726 # get an array ref for speed and slurp the file into lines
 727
 728         $opts->{ array_ref } = 1 ;
 729         my $existing_data = eval { read_file( $file_name, $opts ) } ;
 730
 731         if ( $@ ) {
 732
 733                 @_ = ( { err_mode => $err_mode },
 734                         "edit_file_lines '$file_name' - read_file: $!" ) ;
 735                 goto &_error ;
 736         }
 737
 738 #print "EXIST [$$existing_data]\n" ;
 739
 740         my @edited_data = map { $edit_code->(); $_ } @$existing_data ;
 741
 742         $opts->{atomic} = 1 ;
 743         my $write_result =
 744                 eval { write_file( $file_name, $opts, @edited_data ) } ;
 745
 746         if ( $@ ) {
 747
 748                 @_ = ( { err_mode => $err_mode },
 749                         "edit_file_lines '$file_name' - write_file: $!" ) ;
 750                 goto &_error ;
 751         }
 752
 753         return $write_result ;
 754 }
 755
 756 # basic wrapper around opendir/readdir
 757
 758 sub read_dir {
 759
 760         my $dir = shift ;
 761         my $opts = ( ref $_[0] eq 'HASH' ) ? shift : { @_ } ;
 762
 763 # this handle will be destroyed upon return
 764
 765         local(*DIRH);
 766
 767 # open the dir and handle any errors
 768
 769         unless ( opendir( DIRH, $dir ) ) {
 770
 771                 @_ = ( $opts, "read_dir '$dir' - opendir: $!" ) ;
 772                 goto &_error ;
 773         }
 774
 775         my @dir_entries = readdir(DIRH) ;
 776
 777         @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries )
 778                 unless $opts->{'keep_dot_dot'} ;
 779
 780         if ( $opts->{'prefix'} ) {
 781
 782                 substr( $_, 0, 0, "$dir/" ) for @dir_entries ;
 783         }
 784
 785         return @dir_entries if wantarray ;
 786         return \@dir_entries ;
 787 }
 788
 789 # error handling section
 790 #
 791 # all the error handling uses magic goto so the caller will get the
 792 # error message as if from their code and not this module. if we just
 793 # did a call on the error code, the carp/croak would report it from
 794 # this module since the error sub is one level down on the call stack
 795 # from read_file/write_file/read_dir.
 796
 797
 798 my %err_func = (
 799         'carp'  => \&carp,
 800         'croak' => \&croak,
 801 ) ;
 802
 803 sub _error {
 804
 805         my( $opts, $err_msg ) = @_ ;
 806
 807 # get the error function to use
 808
 809         my $func = $err_func{ $opts->{'err_mode'} || 'croak' } ;
 810
 811 # if we didn't find it in our error function hash, they must have set
 812 # it to quiet and we don't do anything.
 813
 814         return unless $func ;
 815
 816 # call the carp/croak function
 817
 818         $func->($err_msg) if $func ;
 819
 820 # return a hard undef (in list context this will be a single value of
 821 # undef which is not a legal in-band value)
 822
 823         return undef ;
 824 }
 825
 826 1;
 827 __END__
 828
 829 =head1 NAME
 830
 831 File::Slurp - Simple and Efficient Reading/Writing/Modifying of Complete Files
 832
 833 =head1 SYNOPSIS
 834
 835   use File::Slurp;
 836
 837 # read in a whole file into a scalar
 838   my $text = read_file( 'filename' ) ;
 839
 840 # read in a whole file into an array of lines
 841   my @lines = read_file( 'filename' ) ;
 842
 843 # write out a whole file from a scalar
 844   write_file( 'filename', $text ) ;
 845
 846 # write out a whole file from an array of lines
 847   write_file( 'filename', @lines ) ;
 848
 849 # Here is a simple and fast way to load and save a simple config file
 850 # made of key=value lines.
 851   my %conf = read_file( $file_name ) =~ /^(\w+)=(.*)$/mg ;
 852   write_file( $file_name, {atomic => 1}, map "$_=$conf{$_}\n", keys %conf ) ;
 853
 854 # insert text at the beginning of a file
 855   prepend_file( 'filename', $text ) ;
 856
 857 # in-place edit to replace all 'foo' with 'bar' in file
 858   edit_file { s/foo/bar/g } 'filename' ;
 859
 860 # in-place edit to delete all lines with 'foo' from file
 861   edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
 862
 863 # read in a whole directory of file names (skipping . and ..)
 864   my @files = read_dir( '/path/to/dir' ) ;
 865
 866 =head1 DESCRIPTION
 867
 868 This module provides subs that allow you to read or write entire files
 869 with one simple call. They are designed to be simple to use, have
 870 flexible ways to pass in or get the file contents and to be very
 871 efficient.  There is also a sub to read in all the files in a
 872 directory other than C<.> and C<..>
 873
 874 These slurp/spew subs work for files, pipes and sockets, stdio,
 875 pseudo-files, and the DATA handle. Read more about why slurping files is
 876 a good thing in the file 'slurp_article.pod' in the extras/ directory.
 877
 878 If you are interested in how fast these calls work, check out the
 879 slurp_bench.pl program in the extras/ directory. It compares many
 880 different forms of slurping. You can select the I/O direction, context
 881 and file sizes. Use the --help option to see how to run it.
 882
 883 =head2 B<read_file>
 884
 885 This sub reads in an entire file and returns its contents to the
 886 caller.  In scalar context it returns the entire file as a single
 887 scalar. In list context it will return a list of lines (using the
 888 current value of $/ as the separator including support for paragraph
 889 mode when it is set to '').
 890
 891   my $text = read_file( 'filename' ) ;
 892   my $bin = read_file( 'filename' { binmode => ':raw' } ) ;
 893   my @lines = read_file( 'filename' ) ;
 894   my $lines = read_file( 'filename', array_ref => 1 ) ;
 895
 896 The first argument is the file to slurp in. If the next argument is a
 897 hash reference, then it is used as the options. Otherwise the rest of
 898 the argument list are is used as key/value options.
 899
 900 If the file argument is a handle (if it is a ref and is an IO or GLOB
 901 object), then that handle is slurped in. This mode is supported so you
 902 slurp handles such as C<DATA> and C<STDIN>. See the test handle.t for
 903 an example that does C<open( '-|' )> and the child process spews data
 904 to the parant which slurps it in.  All of the options that control how
 905 the data is returned to the caller still work in this case.
 906
 907 If the first argument is an overloaded object then its stringified value
 908 is used for the filename and that file is opened.  This is a new feature
 909 in 9999.14. See the stringify.t test for an example.
 910
 911 By default C<read_file> returns an undef in scalar contex or a single
 912 undef in list context if it encounters an error. Those are both
 913 impossible to get with a clean read_file call which means you can check
 914 the return value and always know if you had an error. You can change how
 915 errors are handled with the C<err_mode> option.
 916
 917 Speed Note: If you call read_file and just get a scalar return value
 918 it is now optimized to handle shorter files. This is only used if no
 919 options are used, the file is shorter then 100k bytes, the filename is
 920 a plain scalar and a scalar file is returned. If you want the fastest
 921 slurping, use the C<buf_ref> or C<scalar_ref> options (see below)
 922
 923 NOTE: as of version 9999.06, read_file works correctly on the C<DATA>
 924 handle. It used to need a sysseek workaround but that is now handled
 925 when needed by the module itself.
 926
 927 You can optionally request that C<slurp()> is exported to your code. This
 928 is an alias for read_file and is meant to be forward compatible with
 929 Perl 6 (which will have slurp() built-in).
 930
 931 The options for C<read_file> are:
 932
 933 =head3 binmode
 934
 935 If you set the binmode option, then its value is passed to a call to
 936 binmode on the opened handle. You can use this to set the file to be
 937 read in binary mode, utf8, etc. See perldoc -f binmode for more.
 938
 939         my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
 940         my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ;
 941
 942 =head3 array_ref
 943
 944 If this boolean option is set, the return value (only in scalar
 945 context) will be an array reference which contains the lines of the
 946 slurped file. The following two calls are equivalent:
 947
 948         my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
 949         my $lines_ref = [ read_file( $bin_file ) ] ;
 950
 951 =head3 chomp
 952
 953 If this boolean option is set, the lines are chomped. This only
 954 happens if you are slurping in a list context or using the
 955 C<array_ref> option.
 956
 957 =head3 scalar_ref
 958
 959 If this boolean option is set, the return value (only in scalar
 960 context) will be an scalar reference to a string which is the contents
 961 of the slurped file. This will usually be faster than returning the
 962 plain scalar. It will also save memory as it will not make a copy of
 963 the file to return. Run the extras/slurp_bench.pl script to see speed
 964 comparisons.
 965
 966         my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
 967
 968 =head3 buf_ref
 969
 970 You can use this option to pass in a scalar reference and the slurped
 971 file contents will be stored in the scalar. This can be used in
 972 conjunction with any of the other options. This saves an extra copy of
 973 the slurped file and can lower ram usage vs returning the file. It is
 974 usually the fastest way to read a file into a scalar. Run the
 975 extras/slurp_bench.pl script to see speed comparisons.
 976
 977
 978         read_file( $bin_file, buf_ref => \$buffer ) ;
 979
 980 =head3 blk_size
 981
 982 You can use this option to set the block size used when slurping from
 983 an already open handle (like \*STDIN). It defaults to 1MB.
 984
 985         my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
 986                                              array_ref => 1 ) ;
 987
 988 =head3 err_mode
 989
 990 You can use this option to control how read_file behaves when an error
 991 occurs. This option defaults to 'croak'. You can set it to 'carp' or to
 992 'quiet' to have no special error handling. This code wants to carp and
 993 then read another file if it fails.
 994
 995         my $text_ref = read_file( $file, err_mode => 'carp' ) ;
 996         unless ( $text_ref ) {
 997
 998                 # read a different file but croak if not found
 999                 $text_ref = read_file( $another_file ) ;
1000         }
1001
1002         # process ${$text_ref}
1003
1004 =head2 B<write_file>
1005
1006 This sub writes out an entire file in one call.
1007
1008   write_file( 'filename', @data ) ;
1009
1010 The first argument to C<write_file> is the filename. The next argument
1011 is an optional hash reference and it contains key/values that can
1012 modify the behavior of C<write_file>. The rest of the argument list is
1013 the data to be written to the file.
1014
1015   write_file( 'filename', {append => 1 }, @data ) ;
1016   write_file( 'filename', {binmode => ':raw'}, $buffer ) ;
1017
1018 As a shortcut if the first data argument is a scalar or array reference,
1019 it is used as the only data to be written to the file. Any following
1020 arguments in @_ are ignored. This is a faster way to pass in the output
1021 to be written to the file and is equivalent to the C<buf_ref> option of
1022 C<read_file>. These following pairs are equivalent but the pass by
1023 reference call will be faster in most cases (especially with larger
1024 files).
1025
1026   write_file( 'filename', \$buffer ) ;
1027   write_file( 'filename', $buffer ) ;
1028
1029   write_file( 'filename', \@lines ) ;
1030   write_file( 'filename', @lines ) ;
1031
1032 If the first argument is a handle (if it is a ref and is an IO or GLOB
1033 object), then that handle is written to. This mode is supported so you
1034 spew to handles such as \*STDOUT. See the test handle.t for an example
1035 that does C<open( '-|' )> and child process spews data to the parent
1036 which slurps it in.  All of the options that control how the data are
1037 passed into C<write_file> still work in this case.
1038
1039 If the first argument is an overloaded object then its stringified value
1040 is used for the filename and that file is opened.  This is new feature
1041 in 9999.14. See the stringify.t test for an example.
1042
1043 By default C<write_file> returns 1 upon successfully writing the file or
1044 undef if it encountered an error. You can change how errors are handled
1045 with the C<err_mode> option.
1046
1047 The options are:
1048
1049 =head3 binmode
1050
1051 If you set the binmode option, then its value is passed to a call to
1052 binmode on the opened handle. You can use this to set the file to be
1053 read in binary mode, utf8, etc. See perldoc -f binmode for more.
1054
1055         write_file( $bin_file, {binmode => ':raw'}, @data ) ;
1056         write_file( $bin_file, {binmode => ':utf8'}, $utf_text ) ;
1057
1058 =head3 perms
1059
1060 The perms option sets the permissions of newly-created files. This value
1061 is modified by your process's umask and defaults to 0666 (same as
1062 sysopen).
1063
1064 NOTE: this option is new as of File::Slurp version 9999.14;
1065
1066 =head3 buf_ref
1067
1068 You can use this option to pass in a scalar reference which has the
1069 data to be written. If this is set then any data arguments (including
1070 the scalar reference shortcut) in @_ will be ignored. These are
1071 equivalent:
1072
1073         write_file( $bin_file, { buf_ref => \$buffer } ) ;
1074         write_file( $bin_file, \$buffer ) ;
1075         write_file( $bin_file, $buffer ) ;
1076
1077 =head3 atomic
1078
1079 If you set this boolean option, the file will be written to in an
1080 atomic fashion. A temporary file name is created by appending the pid
1081 ($$) to the file name argument and that file is spewed to. After the
1082 file is closed it is renamed to the original file name (and rename is
1083 an atomic operation on most OS's). If the program using this were to
1084 crash in the middle of this, then the file with the pid suffix could
1085 be left behind.
1086
1087 =head3 append
1088
1089 If you set this boolean option, the data will be written at the end of
1090 the current file. Internally this sets the sysopen mode flag O_APPEND.
1091
1092         write_file( $file, {append => 1}, @data ) ;
1093
1094  You
1095 can import append_file and it does the same thing.
1096
1097 =head3 no_clobber
1098
1099 If you set this boolean option, an existing file will not be overwritten.
1100
1101         write_file( $file, {no_clobber => 1}, @data ) ;
1102
1103 =head3 err_mode
1104
1105 You can use this option to control how C<write_file> behaves when an
1106 error occurs. This option defaults to 'croak'. You can set it to
1107 'carp' or to 'quiet' to have no error handling other than the return
1108 value. If the first call to C<write_file> fails it will carp and then
1109 write to another file. If the second call to C<write_file> fails, it
1110 will croak.
1111
1112         unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
1113
1114                 # write a different file but croak if not found
1115                 write_file( $other_file, \$data ) ;
1116         }
1117
1118 =head2 overwrite_file
1119
1120 This sub is just a typeglob alias to write_file since write_file
1121 always overwrites an existing file. This sub is supported for
1122 backwards compatibility with the original version of this module. See
1123 write_file for its API and behavior.
1124
1125 =head2 append_file
1126
1127 This sub will write its data to the end of the file. It is a wrapper
1128 around write_file and it has the same API so see that for the full
1129 documentation. These calls are equivalent:
1130
1131         append_file( $file, @data ) ;
1132         write_file( $file, {append => 1}, @data ) ;
1133
1134
1135 =head2 prepend_file
1136
1137 This sub writes data to the beginning of a file. The previously existing
1138 data is written after that so the effect is prepending data in front of
1139 a file. It is a counterpart to the append_file sub in this module. It
1140 works by first using C<read_file> to slurp in the file and then calling
1141 C<write_file> with the new data and the existing file data.
1142
1143 The first argument to C<prepend_file> is the filename. The next argument
1144 is an optional hash reference and it contains key/values that can modify
1145 the behavior of C<prepend_file>. The rest of the argument list is the
1146 data to be written to the file and that is passed to C<write_file> as is
1147 (see that for allowed data).
1148
1149 Only the C<binmode> and C<err_mode> options are supported. The
1150 C<write_file> call has the C<atomic> option set so you will always have
1151 a consistant file. See above for more about those options.
1152
1153 C<prepend_file> is not exported by default, you need to import it
1154 explicitly.
1155
1156         use File::Slurp qw( prepend_file ) ;
1157         prepend_file( $file, $header ) ;
1158         prepend_file( $file, \@lines ) ;
1159         prepend_file( $file, { binmode => 'raw:'}, $bin_data ) ;
1160
1161
1162 =head2 edit_file, edit_file_lines
1163
1164 These subs read in a file into $_, execute a code block which should
1165 modify $_ and then write $_ back to the file. The difference between
1166 them is that C<edit_file> reads the whole file into $_ and calls the
1167 code block one time. With C<edit_file_lines> each line is read into $_
1168 and the code is called for each line. In both cases the code should
1169 modify $_ if desired and it will be written back out. These subs are
1170 the equivalent of the -pi command line options of Perl but you can
1171 call them from inside your program and not fork out a process. They
1172 are in @EXPORT_OK so you need to request them to be imported on the
1173 use line or you can import both of them with:
1174
1175         use File::Slurp qw( :edit ) ;
1176
1177 The first argument to C<edit_file> and C<edit_file_lines> is a code
1178 block or a code reference. The code block is not followed by a comma
1179 (as with grep and map) but a code reference is followed by a
1180 comma. See the examples below for both styles. The next argument is
1181 the filename. The last argument is an optional hash reference and it
1182 contains key/values that can modify the behavior of
1183 C<prepend_file>.
1184
1185 Only the C<binmode> and C<err_mode> options are supported. The
1186 C<write_file> call has the C<atomic> option set so you will always
1187 have a consistant file. See above for more about those options.
1188
1189 Each group of calls below show a Perl command line instance and the
1190 equivalent calls to C<edit_file> and C<edit_file_lines>.
1191
1192         perl -0777 -pi -e 's/foo/bar/g' filename
1193         use File::Slurp qw( edit_file ) ;
1194         edit_file { s/foo/bar/g } 'filename' ;
1195         edit_file sub { s/foo/bar/g }, 'filename' ;
1196         edit_file \&replace_foo, 'filename' ;
1197         sub replace_foo { s/foo/bar/g }
1198
1199         perl -pi -e '$_ = "" if /foo/' filename
1200         use File::Slurp qw( edit_file_lines ) ;
1201         use File::Slurp ;
1202         edit_file_lines { $_ = '' if /foo/ } 'filename' ;
1203         edit_file_lines sub { $_ = '' if /foo/ }, 'filename' ;
1204         edit_file \&delete_foo, 'filename' ;
1205         sub delete_foo { $_ = '' if /foo/ }
1206
1207 =head2 read_dir
1208
1209 This sub reads all the file names from directory and returns them to
1210 the caller but C<.> and C<..> are removed by default.
1211
1212         my @files = read_dir( '/path/to/dir' ) ;
1213
1214 The first argument is the path to the directory to read.  If the next
1215 argument is a hash reference, then it is used as the options.
1216 Otherwise the rest of the argument list are is used as key/value
1217 options.
1218
1219 In list context C<read_dir> returns a list of the entries in the
1220 directory. In a scalar context it returns an array reference which has
1221 the entries.
1222
1223 =head3 err_mode
1224
1225 If the C<err_mode> option is set, it selects how errors are handled (see
1226 C<err_mode> in C<read_file> or C<write_file>).
1227
1228 =head3 keep_dot_dot
1229
1230 If this boolean option is set, C<.> and C<..> are not removed from the
1231 list of files.
1232
1233         my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ;
1234
1235 =head3 prefix
1236
1237 If this boolean option is set, the string "$dir/" is prefixed to each
1238 dir entry. This means you can directly use the results to open
1239 files. A common newbie mistake is not putting the directory in front
1240 of entries when opening themn.
1241
1242         my @paths = read_dir( '/path/to/dir', prefix => 1 ) ;
1243
1244 =head2 EXPORT
1245
1246   These are exported by default or with
1247         use File::Slurp qw( :std ) ;
1248
1249   read_file write_file overwrite_file append_file read_dir
1250
1251   These are exported with
1252         use File::Slurp qw( :edit ) ;
1253
1254   edit_file edit_file_lines
1255
1256   You can get all subs in the module exported with
1257         use File::Slurp qw( :all ) ;
1258
1259 =head2 LICENSE
1260
1261   Same as Perl.
1262
1263 =head2 SEE ALSO
1264
1265 An article on file slurping in extras/slurp_article.pod. There is
1266 also a benchmarking script in extras/slurp_bench.pl.
1267
1268 =head2 BUGS
1269
1270 If run under Perl 5.004, slurping from the DATA handle will fail as
1271 that requires B.pm which didn't get into core until 5.005.
1272
1273 =head1 AUTHOR
1274
1275 Uri Guttman, E<lt>uri AT stemsystems DOT comE<gt>
1276
1277 =cut