Commit | Line | Data |
635c7876 |
1 | package File::Slurp; |
2 | |
e2c51d31 |
3 | my $printed ; |
4 | |
635c7876 |
5 | use strict; |
6 | |
7 | use Carp ; |
635c7876 |
8 | use Fcntl qw( :DEFAULT ) ; |
e2c51d31 |
9 | use POSIX qw( :fcntl_h ) ; |
635c7876 |
10 | use Symbol ; |
11 | |
e2c51d31 |
12 | use base 'Exporter' ; |
13 | use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ; |
14 | |
15 | %EXPORT_TAGS = ( 'all' => [ |
16 | qw( read_file write_file overwrite_file append_file read_dir ) ] ) ; |
17 | |
18 | @EXPORT = ( @{ $EXPORT_TAGS{'all'} } ); |
19 | @EXPORT_OK = qw( slurp ) ; |
20 | |
8ed110f9 |
21 | $VERSION = '9999.14'; |
e2c51d31 |
22 | |
9aab46ab |
23 | our $max_fast_slurp_size = 1024 * 100 ; |
24 | |
635c7876 |
25 | my $is_win32 = $^O =~ /win32/i ; |
26 | |
27 | # Install subs for various constants that aren't set in older perls |
28 | # (< 5.005). Fcntl on old perls uses Exporter to define subs without a |
29 | # () prototype These can't be overridden with the constant pragma or |
30 | # we get a prototype mismatch. Hence this less than aesthetically |
31 | # appealing BEGIN block: |
32 | |
33 | BEGIN { |
8ed110f9 |
34 | unless( defined &SEEK_SET ) { |
635c7876 |
35 | *SEEK_SET = sub { 0 }; |
36 | *SEEK_CUR = sub { 1 }; |
37 | *SEEK_END = sub { 2 }; |
38 | } |
39 | |
8ed110f9 |
40 | unless( defined &O_BINARY ) { |
635c7876 |
41 | *O_BINARY = sub { 0 }; |
42 | *O_RDONLY = sub { 0 }; |
43 | *O_WRONLY = sub { 1 }; |
44 | } |
45 | |
f02156f2 |
46 | unless ( defined &O_APPEND ) { |
635c7876 |
47 | |
48 | if ( $^O =~ /olaris/ ) { |
49 | *O_APPEND = sub { 8 }; |
50 | *O_CREAT = sub { 256 }; |
51 | *O_EXCL = sub { 1024 }; |
52 | } |
53 | elsif ( $^O =~ /inux/ ) { |
54 | *O_APPEND = sub { 1024 }; |
55 | *O_CREAT = sub { 64 }; |
56 | *O_EXCL = sub { 128 }; |
57 | } |
58 | elsif ( $^O =~ /BSD/i ) { |
59 | *O_APPEND = sub { 8 }; |
60 | *O_CREAT = sub { 512 }; |
61 | *O_EXCL = sub { 2048 }; |
62 | } |
63 | } |
64 | } |
65 | |
66 | # print "OS [$^O]\n" ; |
67 | |
68 | # print "O_BINARY = ", O_BINARY(), "\n" ; |
69 | # print "O_RDONLY = ", O_RDONLY(), "\n" ; |
70 | # print "O_WRONLY = ", O_WRONLY(), "\n" ; |
71 | # print "O_APPEND = ", O_APPEND(), "\n" ; |
72 | # print "O_CREAT ", O_CREAT(), "\n" ; |
73 | # print "O_EXCL ", O_EXCL(), "\n" ; |
74 | |
635c7876 |
75 | |
76 | *slurp = \&read_file ; |
77 | |
78 | sub read_file { |
79 | |
80 | my( $file_name, %args ) = @_ ; |
81 | |
8ed110f9 |
82 | if ( !ref $file_name && 0 && |
9aab46ab |
83 | -e $file_name && -s _ < $max_fast_slurp_size && ! %args && !wantarray ) { |
e2c51d31 |
84 | |
85 | local( *FH ) ; |
86 | |
e2c51d31 |
87 | unless( open( FH, $file_name ) ) { |
88 | |
89 | @_ = ( \%args, "read_file '$file_name' - sysopen: $!"); |
90 | goto &_error ; |
91 | } |
92 | |
e2c51d31 |
93 | my $read_cnt = sysread( FH, my $buf, -s _ ) ; |
94 | |
95 | unless ( defined $read_cnt ) { |
96 | |
97 | # handle the read error |
98 | |
8ed110f9 |
99 | @_ = ( \%args, |
100 | "read_file '$file_name' - small sysread: $!"); |
e2c51d31 |
101 | goto &_error ; |
102 | } |
103 | |
104 | return $buf ; |
105 | } |
106 | |
635c7876 |
107 | # set the buffer to either the passed in one or ours and init it to the null |
108 | # string |
109 | |
110 | my $buf ; |
111 | my $buf_ref = $args{'buf_ref'} || \$buf ; |
112 | ${$buf_ref} = '' ; |
113 | |
114 | my( $read_fh, $size_left, $blk_size ) ; |
115 | |
116 | # check if we are reading from a handle (glob ref or IO:: object) |
117 | |
118 | if ( ref $file_name ) { |
119 | |
120 | # slurping a handle so use it and don't open anything. |
121 | # set the block size so we know it is a handle and read that amount |
122 | |
123 | $read_fh = $file_name ; |
124 | $blk_size = $args{'blk_size'} || 1024 * 1024 ; |
125 | $size_left = $blk_size ; |
126 | |
127 | # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a |
128 | # glob/handle. only the DATA handle is untainted (since it is from |
129 | # trusted data in the source file). this allows us to test if this is |
130 | # the DATA handle and then to do a sysseek to make sure it gets |
131 | # slurped correctly. on some systems, the buffered i/o pointer is not |
132 | # left at the same place as the fd pointer. this sysseek makes them |
133 | # the same so slurping with sysread will work. |
134 | |
135 | eval{ require B } ; |
136 | |
137 | if ( $@ ) { |
138 | |
139 | @_ = ( \%args, <<ERR ) ; |
140 | Can't find B.pm with this Perl: $!. |
141 | That module is needed to slurp the DATA handle. |
142 | ERR |
143 | goto &_error ; |
144 | } |
145 | |
146 | if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) { |
147 | |
148 | # set the seek position to the current tell. |
149 | |
150 | sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) || |
151 | croak "sysseek $!" ; |
152 | } |
153 | } |
154 | else { |
155 | |
156 | # a regular file. set the sysopen mode |
157 | |
158 | my $mode = O_RDONLY ; |
635c7876 |
159 | |
160 | #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ; |
161 | |
162 | # open the file and handle any error |
163 | |
164 | $read_fh = gensym ; |
165 | unless ( sysopen( $read_fh, $file_name, $mode ) ) { |
166 | @_ = ( \%args, "read_file '$file_name' - sysopen: $!"); |
167 | goto &_error ; |
168 | } |
169 | |
cee624ab |
170 | if ( my $binmode = $args{'binmode'} ) { |
171 | binmode( $read_fh, $binmode ) ; |
172 | } |
173 | |
635c7876 |
174 | # get the size of the file for use in the read loop |
175 | |
176 | $size_left = -s $read_fh ; |
177 | |
f9940db7 |
178 | #print "SIZE $size_left\n" ; |
8ed110f9 |
179 | |
635c7876 |
180 | |
f9940db7 |
181 | # we need a blk_size if the size is 0 so we can handle pseudofiles like in |
182 | # /proc. these show as 0 size but have data to be slurped. |
183 | |
184 | unless( $size_left ) { |
185 | |
186 | $blk_size = $args{'blk_size'} || 1024 * 1024 ; |
187 | $size_left = $blk_size ; |
188 | } |
e2c51d31 |
189 | } |
190 | |
191 | |
8ed110f9 |
192 | # if ( $size_left < 10000 && keys %args == 0 && !wantarray ) { |
e2c51d31 |
193 | |
8ed110f9 |
194 | # #print "OPT\n" and $printed++ unless $printed ; |
e2c51d31 |
195 | |
8ed110f9 |
196 | # my $read_cnt = sysread( $read_fh, my $buf, $size_left ) ; |
e2c51d31 |
197 | |
8ed110f9 |
198 | # unless ( defined $read_cnt ) { |
e2c51d31 |
199 | |
8ed110f9 |
200 | # # handle the read error |
e2c51d31 |
201 | |
8ed110f9 |
202 | # @_ = ( \%args, "read_file '$file_name' - small2 sysread: $!"); |
203 | # goto &_error ; |
204 | # } |
e2c51d31 |
205 | |
8ed110f9 |
206 | # return $buf ; |
207 | # } |
635c7876 |
208 | |
209 | # infinite read loop. we exit when we are done slurping |
210 | |
211 | while( 1 ) { |
212 | |
213 | # do the read and see how much we got |
214 | |
215 | my $read_cnt = sysread( $read_fh, ${$buf_ref}, |
216 | $size_left, length ${$buf_ref} ) ; |
217 | |
e2c51d31 |
218 | unless ( defined $read_cnt ) { |
219 | |
220 | # handle the read error |
221 | |
8ed110f9 |
222 | @_ = ( \%args, "read_file '$file_name' - loop sysread: $!"); |
e2c51d31 |
223 | goto &_error ; |
224 | } |
635c7876 |
225 | |
226 | # good read. see if we hit EOF (nothing left to read) |
227 | |
e2c51d31 |
228 | last if $read_cnt == 0 ; |
635c7876 |
229 | |
230 | # loop if we are slurping a handle. we don't track $size_left then. |
231 | |
e2c51d31 |
232 | next if $blk_size ; |
635c7876 |
233 | |
234 | # count down how much we read and loop if we have more to read. |
635c7876 |
235 | |
e2c51d31 |
236 | $size_left -= $read_cnt ; |
237 | last if $size_left <= 0 ; |
635c7876 |
238 | } |
239 | |
240 | # fix up cr/lf to be a newline if this is a windows text file |
241 | |
242 | ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ; |
243 | |
244 | # this is the 5 returns in a row. each handles one possible |
245 | # combination of caller context and requested return type |
246 | |
247 | my $sep = $/ ; |
248 | $sep = '\n\n+' if defined $sep && $sep eq '' ; |
249 | |
250 | # caller wants to get an array ref of lines |
251 | |
252 | # this split doesn't work since it tries to use variable length lookbehind |
253 | # the m// line works. |
254 | # return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ; |
255 | return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ] |
256 | if $args{'array_ref'} ; |
257 | |
258 | # caller wants a list of lines (normal list context) |
259 | |
260 | # same problem with this split as before. |
261 | # return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ; |
262 | return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () |
263 | if wantarray ; |
264 | |
265 | # caller wants a scalar ref to the slurped text |
266 | |
267 | return $buf_ref if $args{'scalar_ref'} ; |
268 | |
269 | # caller wants a scalar with the slurped text (normal scalar context) |
270 | |
271 | return ${$buf_ref} if defined wantarray ; |
272 | |
273 | # caller passed in an i/o buffer by reference (normal void context) |
274 | |
275 | return ; |
276 | } |
277 | |
278 | sub write_file { |
279 | |
280 | my $file_name = shift ; |
281 | |
282 | # get the optional argument hash ref from @_ or an empty hash ref. |
283 | |
284 | my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ; |
285 | |
286 | my( $buf_ref, $write_fh, $no_truncate, $orig_file_name, $data_is_ref ) ; |
287 | |
288 | # get the buffer ref - it depends on how the data is passed into write_file |
289 | # after this if/else $buf_ref will have a scalar ref to the data. |
290 | |
291 | if ( ref $args->{'buf_ref'} eq 'SCALAR' ) { |
292 | |
293 | # a scalar ref passed in %args has the data |
294 | # note that the data was passed by ref |
295 | |
296 | $buf_ref = $args->{'buf_ref'} ; |
297 | $data_is_ref = 1 ; |
298 | } |
299 | elsif ( ref $_[0] eq 'SCALAR' ) { |
300 | |
301 | # the first value in @_ is the scalar ref to the data |
302 | # note that the data was passed by ref |
303 | |
304 | $buf_ref = shift ; |
305 | $data_is_ref = 1 ; |
306 | } |
307 | elsif ( ref $_[0] eq 'ARRAY' ) { |
308 | |
309 | # the first value in @_ is the array ref to the data so join it. |
310 | |
311 | ${$buf_ref} = join '', @{$_[0]} ; |
312 | } |
313 | else { |
314 | |
315 | # good old @_ has all the data so join it. |
316 | |
317 | ${$buf_ref} = join '', @_ ; |
318 | } |
319 | |
320 | # see if we were passed a open handle to spew to. |
321 | |
322 | if ( ref $file_name ) { |
323 | |
324 | # we have a handle. make sure we don't call truncate on it. |
325 | |
326 | $write_fh = $file_name ; |
327 | $no_truncate = 1 ; |
328 | } |
329 | else { |
330 | |
331 | # spew to regular file. |
332 | |
333 | if ( $args->{'atomic'} ) { |
334 | |
335 | # in atomic mode, we spew to a temp file so make one and save the original |
336 | # file name. |
337 | $orig_file_name = $file_name ; |
338 | $file_name .= ".$$" ; |
339 | } |
340 | |
341 | # set the mode for the sysopen |
342 | |
343 | my $mode = O_WRONLY | O_CREAT ; |
635c7876 |
344 | $mode |= O_APPEND if $args->{'append'} ; |
345 | $mode |= O_EXCL if $args->{'no_clobber'} ; |
346 | |
f02156f2 |
347 | my $perms = $args->{perms} ; |
348 | $perms = 0666 unless defined $perms ; |
349 | |
635c7876 |
350 | #printf "WR: BINARY %x MODE %x\n", O_BINARY, $mode ; |
351 | |
352 | # open the file and handle any error. |
353 | |
354 | $write_fh = gensym ; |
f02156f2 |
355 | unless ( sysopen( $write_fh, $file_name, $mode, $perms ) ) { |
635c7876 |
356 | @_ = ( $args, "write_file '$file_name' - sysopen: $!"); |
357 | goto &_error ; |
358 | } |
359 | } |
360 | |
9aab46ab |
361 | if ( my $binmode = $args->{'binmode'} ) { |
cee624ab |
362 | binmode( $write_fh, $binmode ) ; |
363 | } |
364 | |
635c7876 |
365 | sysseek( $write_fh, 0, SEEK_END ) if $args->{'append'} ; |
366 | |
367 | |
368 | #print 'WR before data ', unpack( 'H*', ${$buf_ref}), "\n" ; |
369 | |
370 | # fix up newline to write cr/lf if this is a windows text file |
371 | |
372 | if ( $is_win32 && !$args->{'binmode'} ) { |
373 | |
374 | # copy the write data if it was passed by ref so we don't clobber the |
375 | # caller's data |
376 | $buf_ref = \do{ my $copy = ${$buf_ref}; } if $data_is_ref ; |
377 | ${$buf_ref} =~ s/\n/\015\012/g ; |
378 | } |
379 | |
380 | #print 'after data ', unpack( 'H*', ${$buf_ref}), "\n" ; |
381 | |
382 | # get the size of how much we are writing and init the offset into that buffer |
383 | |
384 | my $size_left = length( ${$buf_ref} ) ; |
385 | my $offset = 0 ; |
386 | |
387 | # loop until we have no more data left to write |
388 | |
389 | do { |
390 | |
391 | # do the write and track how much we just wrote |
392 | |
393 | my $write_cnt = syswrite( $write_fh, ${$buf_ref}, |
394 | $size_left, $offset ) ; |
395 | |
396 | unless ( defined $write_cnt ) { |
397 | |
398 | # the write failed |
399 | @_ = ( $args, "write_file '$file_name' - syswrite: $!"); |
400 | goto &_error ; |
401 | } |
402 | |
403 | # track much left to write and where to write from in the buffer |
404 | |
405 | $size_left -= $write_cnt ; |
406 | $offset += $write_cnt ; |
407 | |
408 | } while( $size_left > 0 ) ; |
409 | |
410 | # we truncate regular files in case we overwrite a long file with a shorter file |
411 | # so seek to the current position to get it (same as tell()). |
412 | |
413 | truncate( $write_fh, |
414 | sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ; |
415 | |
416 | close( $write_fh ) ; |
417 | |
418 | # handle the atomic mode - move the temp file to the original filename. |
419 | |
e2c51d31 |
420 | if ( $args->{'atomic'} && !rename( $file_name, $orig_file_name ) ) { |
421 | |
422 | |
423 | @_ = ( $args, "write_file '$file_name' - rename: $!" ) ; |
424 | goto &_error ; |
425 | } |
635c7876 |
426 | |
427 | return 1 ; |
428 | } |
429 | |
430 | # this is for backwards compatibility with the previous File::Slurp module. |
431 | # write_file always overwrites an existing file |
432 | |
433 | *overwrite_file = \&write_file ; |
434 | |
435 | # the current write_file has an append mode so we use that. this |
436 | # supports the same API with an optional second argument which is a |
437 | # hash ref of options. |
438 | |
439 | sub append_file { |
440 | |
441 | # get the optional args hash ref |
442 | my $args = $_[1] ; |
443 | if ( ref $args eq 'HASH' ) { |
444 | |
445 | # we were passed an args ref so just mark the append mode |
446 | |
447 | $args->{append} = 1 ; |
448 | } |
449 | else { |
450 | |
451 | # no args hash so insert one with the append mode |
452 | |
453 | splice( @_, 1, 0, { append => 1 } ) ; |
454 | } |
455 | |
456 | # magic goto the main write_file sub. this overlays the sub without touching |
457 | # the stack or @_ |
458 | |
459 | goto &write_file |
460 | } |
461 | |
462 | # basic wrapper around opendir/readdir |
463 | |
464 | sub read_dir { |
465 | |
466 | my ($dir, %args ) = @_; |
467 | |
468 | # this handle will be destroyed upon return |
469 | |
470 | local(*DIRH); |
471 | |
472 | # open the dir and handle any errors |
473 | |
474 | unless ( opendir( DIRH, $dir ) ) { |
475 | |
476 | @_ = ( \%args, "read_dir '$dir' - opendir: $!" ) ; |
477 | goto &_error ; |
478 | } |
479 | |
480 | my @dir_entries = readdir(DIRH) ; |
481 | |
482 | @dir_entries = grep( $_ ne "." && $_ ne "..", @dir_entries ) |
483 | unless $args{'keep_dot_dot'} ; |
484 | |
485 | return @dir_entries if wantarray ; |
486 | return \@dir_entries ; |
487 | } |
488 | |
489 | # error handling section |
490 | # |
491 | # all the error handling uses magic goto so the caller will get the |
492 | # error message as if from their code and not this module. if we just |
493 | # did a call on the error code, the carp/croak would report it from |
494 | # this module since the error sub is one level down on the call stack |
495 | # from read_file/write_file/read_dir. |
496 | |
497 | |
498 | my %err_func = ( |
499 | 'carp' => \&carp, |
500 | 'croak' => \&croak, |
501 | ) ; |
502 | |
503 | sub _error { |
504 | |
505 | my( $args, $err_msg ) = @_ ; |
506 | |
507 | # get the error function to use |
508 | |
509 | my $func = $err_func{ $args->{'err_mode'} || 'croak' } ; |
510 | |
511 | # if we didn't find it in our error function hash, they must have set |
512 | # it to quiet and we don't do anything. |
513 | |
514 | return unless $func ; |
515 | |
516 | # call the carp/croak function |
517 | |
f02156f2 |
518 | $func->($err_msg) if $func ; |
635c7876 |
519 | |
520 | # return a hard undef (in list context this will be a single value of |
521 | # undef which is not a legal in-band value) |
522 | |
523 | return undef ; |
524 | } |
525 | |
526 | 1; |
527 | __END__ |
528 | |
529 | =head1 NAME |
530 | |
531 | File::Slurp - Efficient Reading/Writing of Complete Files |
532 | |
533 | =head1 SYNOPSIS |
534 | |
535 | use File::Slurp; |
536 | |
537 | my $text = read_file( 'filename' ) ; |
538 | my @lines = read_file( 'filename' ) ; |
539 | |
540 | write_file( 'filename', @lines ) ; |
541 | |
542 | use File::Slurp qw( slurp ) ; |
543 | |
544 | my $text = slurp( 'filename' ) ; |
545 | |
546 | |
547 | =head1 DESCRIPTION |
548 | |
549 | This module provides subs that allow you to read or write entire files |
550 | with one simple call. They are designed to be simple to use, have |
551 | flexible ways to pass in or get the file contents and to be very |
552 | efficient. There is also a sub to read in all the files in a |
553 | directory other than C<.> and C<..> |
554 | |
555 | These slurp/spew subs work for files, pipes and |
556 | sockets, and stdio, pseudo-files, and DATA. |
557 | |
558 | =head2 B<read_file> |
559 | |
560 | This sub reads in an entire file and returns its contents to the |
561 | caller. In list context it will return a list of lines (using the |
562 | current value of $/ as the separator including support for paragraph |
563 | mode when it is set to ''). In scalar context it returns the entire |
564 | file as a single scalar. |
565 | |
566 | my $text = read_file( 'filename' ) ; |
567 | my @lines = read_file( 'filename' ) ; |
568 | |
569 | The first argument to C<read_file> is the filename and the rest of the |
570 | arguments are key/value pairs which are optional and which modify the |
571 | behavior of the call. Other than binmode the options all control how |
572 | the slurped file is returned to the caller. |
573 | |
574 | If the first argument is a file handle reference or I/O object (if ref |
575 | is true), then that handle is slurped in. This mode is supported so |
576 | you slurp handles such as C<DATA>, C<STDIN>. See the test handle.t |
577 | for an example that does C<open( '-|' )> and child process spews data |
578 | to the parant which slurps it in. All of the options that control how |
579 | the data is returned to the caller still work in this case. |
580 | |
581 | NOTE: as of version 9999.06, read_file works correctly on the C<DATA> |
582 | handle. It used to need a sysseek workaround but that is now handled |
583 | when needed by the module itself. |
584 | |
585 | You can optionally request that C<slurp()> is exported to your code. This |
586 | is an alias for read_file and is meant to be forward compatible with |
587 | Perl 6 (which will have slurp() built-in). |
588 | |
589 | The options are: |
590 | |
591 | =head3 binmode |
592 | |
9aab46ab |
593 | If you set the binmode option, then the option will be passed to a |
594 | binmode call on the opened filehandle. |
635c7876 |
595 | |
596 | my $bin_data = read_file( $bin_file, binmode => ':raw' ) ; |
9aab46ab |
597 | my $utf_text = read_file( $bin_file, binmode => ':utf8' ) ; |
635c7876 |
598 | |
599 | =head3 array_ref |
600 | |
601 | If this boolean option is set, the return value (only in scalar |
602 | context) will be an array reference which contains the lines of the |
603 | slurped file. The following two calls are equivalent: |
604 | |
605 | my $lines_ref = read_file( $bin_file, array_ref => 1 ) ; |
606 | my $lines_ref = [ read_file( $bin_file ) ] ; |
607 | |
608 | =head3 scalar_ref |
609 | |
f02156f2 |
610 | If this boolean option is set, the return value (only in scalar context) |
611 | will be an scalar reference to a string which is the contents of the |
612 | slurped file. This will usually be faster than returning the plain |
613 | scalar. It will also save memory as it will not make a copy of the file |
614 | to return. |
635c7876 |
615 | |
616 | my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ; |
617 | |
f02156f2 |
618 | =head3 perms |
619 | |
620 | The perms option sets the permissions of newly-created files. This value |
621 | is modified by your process's umask and defaults to 0666 (same as |
622 | sysopen). |
623 | |
624 | NOTE: this option is new as of File::Slurp version 9999.14; |
625 | |
626 | |
635c7876 |
627 | =head3 buf_ref |
628 | |
629 | You can use this option to pass in a scalar reference and the slurped |
630 | file contents will be stored in the scalar. This can be used in |
f02156f2 |
631 | conjunction with any of the other options. This saves an extra copy of |
632 | the slurped file and can lower ram usage vs returning the file. |
635c7876 |
633 | |
634 | my $text_ref = read_file( $bin_file, buf_ref => \$buffer, |
635 | array_ref => 1 ) ; |
636 | my @lines = read_file( $bin_file, buf_ref => \$buffer ) ; |
637 | |
638 | =head3 blk_size |
639 | |
640 | You can use this option to set the block size used when slurping from an already open handle (like \*STDIN). It defaults to 1MB. |
641 | |
642 | my $text_ref = read_file( $bin_file, blk_size => 10_000_000, |
643 | array_ref => 1 ) ; |
644 | |
645 | =head3 err_mode |
646 | |
647 | You can use this option to control how read_file behaves when an error |
648 | occurs. This option defaults to 'croak'. You can set it to 'carp' or |
649 | to 'quiet to have no error handling. This code wants to carp and then |
650 | read abother file if it fails. |
651 | |
652 | my $text_ref = read_file( $file, err_mode => 'carp' ) ; |
653 | unless ( $text_ref ) { |
654 | |
655 | # read a different file but croak if not found |
656 | $text_ref = read_file( $another_file ) ; |
657 | } |
658 | |
659 | # process ${$text_ref} |
660 | |
661 | =head2 B<write_file> |
662 | |
663 | This sub writes out an entire file in one call. |
664 | |
665 | write_file( 'filename', @data ) ; |
666 | |
667 | The first argument to C<write_file> is the filename. The next argument |
668 | is an optional hash reference and it contains key/values that can |
669 | modify the behavior of C<write_file>. The rest of the argument list is |
670 | the data to be written to the file. |
671 | |
672 | write_file( 'filename', {append => 1 }, @data ) ; |
673 | write_file( 'filename', {binmode => ':raw' }, $buffer ) ; |
674 | |
675 | As a shortcut if the first data argument is a scalar or array |
676 | reference, it is used as the only data to be written to the file. Any |
677 | following arguments in @_ are ignored. This is a faster way to pass in |
678 | the output to be written to the file and is equivilent to the |
679 | C<buf_ref> option. These following pairs are equivilent but the pass |
680 | by reference call will be faster in most cases (especially with larger |
681 | files). |
682 | |
683 | write_file( 'filename', \$buffer ) ; |
684 | write_file( 'filename', $buffer ) ; |
685 | |
686 | write_file( 'filename', \@lines ) ; |
687 | write_file( 'filename', @lines ) ; |
688 | |
689 | If the first argument is a file handle reference or I/O object (if ref |
690 | is true), then that handle is slurped in. This mode is supported so |
691 | you spew to handles such as \*STDOUT. See the test handle.t for an |
692 | example that does C<open( '-|' )> and child process spews data to the |
693 | parant which slurps it in. All of the options that control how the |
694 | data is passes into C<write_file> still work in this case. |
695 | |
696 | C<write_file> returns 1 upon successfully writing the file or undef if |
697 | it encountered an error. |
698 | |
699 | The options are: |
700 | |
701 | =head3 binmode |
702 | |
703 | If you set the binmode option, then the file will be written in binary |
704 | mode. |
705 | |
706 | write_file( $bin_file, {binmode => ':raw'}, @data ) ; |
707 | |
708 | NOTE: this actually sets the O_BINARY mode flag for sysopen. It |
709 | probably should call binmode and pass its argument to support other |
710 | file modes. |
711 | |
712 | =head3 buf_ref |
713 | |
714 | You can use this option to pass in a scalar reference which has the |
715 | data to be written. If this is set then any data arguments (including |
716 | the scalar reference shortcut) in @_ will be ignored. These are |
717 | equivilent: |
718 | |
719 | write_file( $bin_file, { buf_ref => \$buffer } ) ; |
720 | write_file( $bin_file, \$buffer ) ; |
721 | write_file( $bin_file, $buffer ) ; |
722 | |
723 | =head3 atomic |
724 | |
725 | If you set this boolean option, the file will be written to in an |
726 | atomic fashion. A temporary file name is created by appending the pid |
727 | ($$) to the file name argument and that file is spewed to. After the |
728 | file is closed it is renamed to the original file name (and rename is |
729 | an atomic operation on most OS's). If the program using this were to |
730 | crash in the middle of this, then the file with the pid suffix could |
731 | be left behind. |
732 | |
733 | =head3 append |
734 | |
735 | If you set this boolean option, the data will be written at the end of |
f02156f2 |
736 | the current file. Internally this sets the sysopen mode flag O_APPEND. |
635c7876 |
737 | |
738 | write_file( $file, {append => 1}, @data ) ; |
739 | |
740 | C<write_file> croaks if it cannot open the file. It returns true if it |
f02156f2 |
741 | succeeded in writing out the file and undef if there was an error. |
635c7876 |
742 | |
743 | =head3 no_clobber |
744 | |
745 | If you set this boolean option, an existing file will not be overwritten. |
746 | |
747 | write_file( $file, {no_clobber => 1}, @data ) ; |
748 | |
749 | =head3 err_mode |
750 | |
751 | You can use this option to control how C<write_file> behaves when an |
752 | error occurs. This option defaults to 'croak'. You can set it to |
753 | 'carp' or to 'quiet' to have no error handling other than the return |
754 | value. If the first call to C<write_file> fails it will carp and then |
755 | write to another file. If the second call to C<write_file> fails, it |
756 | will croak. |
757 | |
758 | unless ( write_file( $file, { err_mode => 'carp', \$data ) ; |
759 | |
760 | # write a different file but croak if not found |
761 | write_file( $other_file, \$data ) ; |
762 | } |
763 | |
764 | =head2 overwrite_file |
765 | |
766 | This sub is just a typeglob alias to write_file since write_file |
767 | always overwrites an existing file. This sub is supported for |
768 | backwards compatibility with the original version of this module. See |
769 | write_file for its API and behavior. |
770 | |
771 | =head2 append_file |
772 | |
773 | This sub will write its data to the end of the file. It is a wrapper |
774 | around write_file and it has the same API so see that for the full |
775 | documentation. These calls are equivilent: |
776 | |
777 | append_file( $file, @data ) ; |
778 | write_file( $file, {append => 1}, @data ) ; |
779 | |
780 | =head2 read_dir |
781 | |
782 | This sub reads all the file names from directory and returns them to |
783 | the caller but C<.> and C<..> are removed by default. |
784 | |
785 | my @files = read_dir( '/path/to/dir' ) ; |
786 | |
787 | It croaks if it cannot open the directory. |
788 | |
789 | In a list context C<read_dir> returns a list of the entries in the |
790 | directory. In a scalar context it returns an array reference which has |
791 | the entries. |
792 | |
793 | =head3 keep_dot_dot |
794 | |
795 | If this boolean option is set, C<.> and C<..> are not removed from the |
796 | list of files. |
797 | |
798 | my @all_files = read_dir( '/path/to/dir', keep_dot_dot => 1 ) ; |
799 | |
800 | =head2 EXPORT |
801 | |
802 | read_file write_file overwrite_file append_file read_dir |
803 | |
f02156f2 |
804 | =head2 LICENSE |
805 | |
806 | Same as Perl. |
807 | |
635c7876 |
808 | =head2 SEE ALSO |
809 | |
810 | An article on file slurping in extras/slurp_article.pod. There is |
811 | also a benchmarking script in extras/slurp_bench.pl. |
812 | |
813 | =head2 BUGS |
814 | |
815 | If run under Perl 5.004, slurping from the DATA handle will fail as |
816 | that requires B.pm which didn't get into core until 5.005. |
817 | |
818 | =head1 AUTHOR |
819 | |
820 | Uri Guttman, E<lt>uri@stemsystems.comE<gt> |
821 | |
822 | =cut |