slurp_talk/Slurp.pm

   1 package File::Slurp;
   2
   3 use strict;
   4
   5 use Carp ;
   6 use Fcntl qw( :DEFAULT :seek ) ;
   7 use Symbol ;
   8
   9 use base 'Exporter' ;
  10 use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION  @EXPORT) ;
  11
  12 %EXPORT_TAGS = ( 'all' => [
  13         qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
  14
  15 #@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
  16 @EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
  17
  18 $VERSION = '9999.01';
  19
  20
  21 sub read_file {
  22
  23         my( $file_name, %args ) = @_ ;
  24
  25         my $buf ;
  26         my $buf_ref = $args{'buf_ref'} || \$buf ;
  27
  28         ${$buf_ref} = '' ;
  29
  30         my( $read_fh, $size_left, $blk_size ) ;
  31
  32         if ( defined( fileno( $file_name ) ) ) {
  33
  34                 $read_fh = $file_name ;
  35                 $blk_size = $args{'blk_size'} || 1024 * 1024 ;
  36                 $size_left = $blk_size ;
  37         }
  38         else {
  39
  40                 my $mode = O_RDONLY ;
  41                 $mode |= O_BINARY if $args{'binmode'} ;
  42
  43
  44                 $read_fh = gensym ;
  45                 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
  46                         @_ = ( \%args, "read_file '$file_name' - sysopen: $!");
  47                         goto &error ;
  48                 }
  49
  50                 $size_left = -s $read_fh ;
  51         }
  52
  53         while( 1 ) {
  54
  55                 my $read_cnt = sysread( $read_fh, ${$buf_ref},
  56                                 $size_left, length ${$buf_ref} ) ;
  57
  58                 if ( defined $read_cnt ) {
  59
  60                         last if $read_cnt == 0 ;
  61                         next if $blk_size ;
  62
  63                         $size_left -= $read_cnt ;
  64                         last if $size_left <= 0 ;
  65                         next ;
  66                 }
  67
  68 # handle the read error
  69
  70                 @_ = ( \%args, "read_file '$file_name' - sysread: $!");
  71                 goto &error ;
  72         }
  73
  74 # handle array ref
  75
  76         return [ split( m|(?<=$/)|, ${$buf_ref} ) ] if $args{'array_ref'}  ;
  77
  78 # handle list context
  79
  80         return split( m|(?<=$/)|, ${$buf_ref} ) if wantarray ;
  81
  82 # handle scalar ref
  83
  84         return $buf_ref if $args{'scalar_ref'} ;
  85
  86 # handle scalar context
  87
  88         return ${$buf_ref} if defined wantarray ;
  89
  90 # handle void context (return scalar by buffer reference)
  91
  92         return ;
  93 }
  94
  95 sub write_file {
  96
  97         my $file_name = shift ;
  98
  99         my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
 100
 101         my( $buf_ref, $write_fh, $no_truncate ) ;
 102
 103 # get the buffer ref - either passed by name or first data arg or autovivified
 104 # ${$buf_ref} will have the data after this
 105
 106         if ( ref $args->{'buf_ref'} eq 'SCALAR' ) {
 107
 108                 $buf_ref = $args->{'buf_ref'} ;
 109         }
 110         elsif ( ref $_[0] eq 'SCALAR' ) {
 111
 112                 $buf_ref = shift ;
 113         }
 114         elsif ( ref $_[0] eq 'ARRAY' ) {
 115
 116                 ${$buf_ref} = join '', @{$_[0]} ;
 117         }
 118         else {
 119
 120                 ${$buf_ref} = join '', @_ ;
 121         }
 122
 123         if ( defined( fileno( $file_name ) ) ) {
 124
 125                 $write_fh = $file_name ;
 126                 $no_truncate = 1 ;
 127         }
 128         else {
 129
 130                 my $mode = O_WRONLY | O_CREAT ;
 131                 $mode |= O_BINARY if $args->{'binmode'} ;
 132                 $mode |= O_APPEND if $args->{'append'} ;
 133
 134                 $write_fh = gensym ;
 135                 unless ( sysopen( $write_fh, $file_name, $mode ) ) {
 136                         @_ = ( $args, "write_file '$file_name' - sysopen: $!");
 137                         goto &error ;
 138                 }
 139
 140         }
 141
 142         my $size_left = length( ${$buf_ref} ) ;
 143         my $offset = 0 ;
 144
 145         do {
 146                 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
 147                                 $size_left, $offset ) ;
 148
 149                 unless ( defined $write_cnt ) {
 150
 151                         @_ = ( $args, "write_file '$file_name' - syswrite: $!");
 152                         goto &error ;
 153                 }
 154
 155                 $size_left -= $write_cnt ;
 156                 $offset += $write_cnt ;
 157
 158         } while( $size_left > 0 ) ;
 159
 160         truncate( $write_fh,
 161                   sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
 162
 163         close( $write_fh ) ;
 164
 165         return 1 ;
 166 }
 167
 168 # this is for backwards compatibility with the previous File::Slurp module.
 169 # write_file always overwrites an existing file
 170
 171 *overwrite_file = \&write_file ;
 172
 173 # the current write_file has an append mode so we use that. this
 174 # supports the same API with an optional second argument which is a
 175 # hash ref of options.
 176
 177 sub append_file {
 178
 179         my $args = $_[1] ;
 180         if ( ref $args eq 'HASH' ) {
 181                 $args->{append} = 1 ;
 182         }
 183         else {
 184
 185                 splice( @_, 1, 0, { append => 1 } ) ;
 186         }
 187
 188         goto &write_file
 189 }
 190
 191 sub read_dir {
 192         my ($dir, %args ) = @_;
 193
 194         local(*DIRH);
 195
 196         if ( opendir( DIRH, $dir ) ) {
 197                 return grep( $_ ne "." && $_ ne "..", readdir(DIRH));
 198         }
 199
 200         @_ = ( \%args, "read_dir '$dir' - opendir: $!" ) ; goto &error ;
 201
 202         return undef ;
 203 }
 204
 205 my %err_func = (
 206         carp => \&carp,
 207         croak => \&croak,
 208 ) ;
 209
 210 sub error {
 211
 212         my( $args, $err_msg ) = @_ ;
 213
 214 #print $err_msg ;
 215
 216         my $func = $err_func{ $args->{'err_mode'} || 'croak' } ;
 217
 218         return unless $func ;
 219
 220         $func->($err_msg) ;
 221
 222         return undef ;
 223 }
 224
 225 1;
 226 __END__
 227
 228 =head1 NAME
 229
 230 File::Slurp - Efficient Reading/Writing of Complete Files
 231
 232 =head1 SYNOPSIS
 233
 234   use File::Slurp;
 235
 236   my $text = read_file( 'filename' ) ;
 237   my @lines = read_file( 'filename' ) ;
 238
 239   write_file( 'filename', @lines ) ;
 240
 241 =head1 DESCRIPTION
 242
 243 This module provides subs that allow you to read or write entire files
 244 with one simple call. They are designed to be simple to use, have
 245 flexible ways to pass in or get the file contents and to be very
 246 efficient.  There is also a sub to read in all the files in a
 247 directory other than C<.> and C<..>
 248
 249 Note that these slurp/spew subs work only for files and not for pipes
 250 or stdio. If you want to slurp the latter, use the standard techniques
 251 such as setting $/ to undef, reading <> in a list context, or printing
 252 all you want to STDOUT.
 253
 254 =head2 B<read_file>
 255
 256 This sub reads in an entire file and returns its contents to the
 257 caller. In list context it will return a list of lines (using the
 258 current value of $/ as the separator. In scalar context it returns the
 259 entire file as a single scalar.
 260
 261   my $text = read_file( 'filename' ) ;
 262   my @lines = read_file( 'filename' ) ;
 263
 264 The first argument to C<read_file> is the filename and the rest of the
 265 arguments are key/value pairs which are optional and which modify the
 266 behavior of the call. Other than binmode the options all control how
 267 the slurped file is returned to the caller.
 268
 269 If the first argument is a file handle reference or I/O object (if
 270 fileno returns a defined value), then that handle is slurped in. This
 271 mode is supported so you slurp handles such as <DATA>, \*STDIN. See
 272 the test handle.t for an example that does C<open( '-|' )> and child
 273 process spews data to the parant which slurps it in.  All of the
 274 options that control how the data is returned to the caller still work
 275 in this case.
 276
 277 The options are:
 278
 279 =head3 binmode
 280
 281 If you set the binmode option, then the file will be slurped in binary
 282 mode.
 283
 284         my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
 285
 286 NOTE: this actually sets the O_BINARY mode flag for sysopen. It
 287 probably should call binmode and pass its argument to support other
 288 file modes.
 289
 290 =head3 array_ref
 291
 292 If this boolean option is set, the return value (only in scalar
 293 context) will be an array reference which contains the lines of the
 294 slurped file. The following two calls are equivilent:
 295
 296         my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
 297         my $lines_ref = [ read_file( $bin_file ) ] ;
 298
 299 =head3 scalar_ref
 300
 301 If this boolean option is set, the return value (only in scalar
 302 context) will be an scalar reference to a string which is the contents
 303 of the slurped file. This will usually be faster than returning the
 304 plain scalar.
 305
 306         my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
 307
 308 =head3 buf_ref
 309
 310 You can use this option to pass in a scalar reference and the slurped
 311 file contents will be stored in the scalar. This can be used in
 312 conjunction with any of the other options.
 313
 314         my $text_ref = read_file( $bin_file, buf_ref => \$buffer,
 315                                              array_ref => 1 ) ;
 316         my @lines = read_file( $bin_file, buf_ref => \$buffer ) ;
 317
 318 =head3 blk_size
 319
 320 You can use this option to set the block size used when slurping from an already open handle (like \*STDIN). It defaults to 1MB.
 321
 322         my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
 323                                              array_ref => 1 ) ;
 324
 325 =head3 err_mode
 326
 327 You can use this option to control how read_file behaves when an error
 328 occurs. This option defaults to 'croak'. You can set it to 'carp' or
 329 to 'quiet to have no error handling. This code wants to carp and then
 330 read abother file if it fails.
 331
 332         my $text_ref = read_file( $file, err_mode => 'carp' ) ;
 333         unless ( $text_ref ) {
 334
 335                 # read a different file but croak if not found
 336                 $text_ref = read_file( $another_file ) ;
 337         }
 338
 339         # process ${$text_ref}
 340
 341 =head2 B<write_file>
 342
 343 This sub writes out an entire file in one call.
 344
 345   write_file( 'filename', @data ) ;
 346
 347 The first argument to C<write_file> is the filename. The next argument
 348 is an optional hash reference and it contains key/values that can
 349 modify the behavior of C<write_file>. The rest of the argument list is
 350 the data to be written to the file.
 351
 352   write_file( 'filename', {append => 1 }, @data ) ;
 353   write_file( 'filename', {binmode => ':raw' }, $buffer ) ;
 354
 355 As a shortcut if the first data argument is a scalar or array
 356 reference, it is used as the only data to be written to the file. Any
 357 following arguments in @_ are ignored. This is a faster way to pass in
 358 the output to be written to the file and is equivilent to the
 359 C<buf_ref> option. These following pairs are equivilent but the pass
 360 by reference call will be faster in most cases (especially with larger
 361 files).
 362
 363   write_file( 'filename', \$buffer ) ;
 364   write_file( 'filename', $buffer ) ;
 365
 366   write_file( 'filename', \@lines ) ;
 367   write_file( 'filename', @lines ) ;
 368
 369 If the first argument is a file handle reference or I/O object (if
 370 fileno returns a defined value), then that handle is slurped in. This
 371 mode is supported so you spew to handles such as \*STDOUT. See the
 372 test handle.t for an example that does C<open( '-|' )> and child
 373 process spews data to the parant which slurps it in.  All of the
 374 options that control how the data is passes into C<write_file> still
 375 work in this case.
 376
 377 The options are:
 378
 379 =head3 binmode
 380
 381 If you set the binmode option, then the file will be written in binary
 382 mode.
 383
 384         write_file( $bin_file, {binmode => ':raw'}, @data ) ;
 385
 386 NOTE: this actually sets the O_BINARY mode flag for sysopen. It
 387 probably should call binmode and pass its argument to support other
 388 file modes.
 389
 390 =head3 buf_ref
 391
 392 You can use this option to pass in a scalar reference which has the
 393 data to be written. If this is set then any data arguments (including
 394 the scalar reference shortcut) in @_ will be ignored. These are
 395 equivilent:
 396
 397         write_file( $bin_file, { buf_ref => \$buffer } ) ;
 398         write_file( $bin_file, \$buffer ) ;
 399         write_file( $bin_file, $buffer ) ;
 400
 401 =head3 append
 402
 403 If you set this boolean option, the data will be written at the end of
 404 the current file.
 405
 406         write_file( $file, {append => 1}, @data ) ;
 407
 408 C<write_file> croaks if it cannot open the file. It returns true if it
 409 succeeded in writing out the file and undef if there was an
 410 error. (Yes, I know if it croaks it can't return anything but that is
 411 for when I add the options to select the error handling mode).
 412
 413 =head3 err_mode
 414
 415 You can use this option to control how C<write_file> behaves when an
 416 error occurs. This option defaults to 'croak'. You can set it to
 417 'carp' or to 'quiet to have no error handling. If the first call to
 418 C<write_file> fails it will carp and then write to another file. If the
 419 second call to C<write_file> fails, it will croak.
 420
 421         unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
 422
 423                 # write a different file but croak if not found
 424                 write_file( $other_file, \$data ) ;
 425         }
 426
 427 =head2 overwrite_file
 428
 429 This sub is just a typeglob alias to write_file since write_file
 430 always overwrites an existing file. This sub is supported for
 431 backwards compatibility with the original version of this module. See
 432 write_file for its API and behavior.
 433
 434 =head2 append_file
 435
 436 This sub will write its data to the end of the file. It is a wrapper
 437 around write_file and it has the same API so see that for the full
 438 documentation. These calls are equivilent:
 439
 440         append_file( $file, @data ) ;
 441         write_file( $file, {append => 1}, @data ) ;
 442
 443 =head2 read_dir
 444
 445 This sub reads all the file names from directory and returns them to
 446 the caller but C<.> and C<..> are removed.
 447
 448         my @files = read_dir( '/path/to/dir' ) ;
 449
 450 It croaks if it cannot open the directory.
 451
 452 =head2 EXPORT
 453
 454   read_file write_file overwrite_file append_file read_dir
 455
 456 =head1 AUTHOR
 457
 458 Uri Guttman, E<lt>uri@stemsystems.comE<gt>
 459
 460 =cut