Commit | Line | Data |
635c7876 |
1 | package File::Slurp; |
2 | |
3 | use strict; |
4 | |
5 | use Carp ; |
6 | use Fcntl qw( :DEFAULT :seek ) ; |
7 | use Symbol ; |
8 | |
9 | use base 'Exporter' ; |
10 | use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT) ; |
11 | |
12 | %EXPORT_TAGS = ( 'all' => [ |
13 | qw( read_file write_file overwrite_file append_file read_dir ) ] ) ; |
14 | |
15 | #@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
16 | @EXPORT = ( @{ $EXPORT_TAGS{'all'} } ); |
17 | |
18 | $VERSION = '9999.01'; |
19 | |
20 | |
21 | sub read_file { |
22 | |
23 | my( $file_name, %args ) = @_ ; |
24 | |
25 | my $buf ; |
26 | my $buf_ref = $args{'buf_ref'} || \$buf ; |
27 | |
28 | ${$buf_ref} = '' ; |
29 | |
30 | my( $read_fh, $size_left, $blk_size ) ; |
31 | |
32 | if ( defined( fileno( $file_name ) ) ) { |
33 | |
34 | $read_fh = $file_name ; |
35 | $blk_size = $args{'blk_size'} || 1024 * 1024 ; |
36 | $size_left = $blk_size ; |
37 | } |
38 | else { |
39 | |
40 | my $mode = O_RDONLY ; |
41 | $mode |= O_BINARY if $args{'binmode'} ; |
42 | |
43 | |
44 | $read_fh = gensym ; |
45 | unless ( sysopen( $read_fh, $file_name, $mode ) ) { |
46 | @_ = ( \%args, "read_file '$file_name' - sysopen: $!"); |
47 | goto &error ; |
48 | } |
49 | |
50 | $size_left = -s $read_fh ; |
51 | } |
52 | |
53 | while( 1 ) { |
54 | |
55 | my $read_cnt = sysread( $read_fh, ${$buf_ref}, |
56 | $size_left, length ${$buf_ref} ) ; |
57 | |
58 | if ( defined $read_cnt ) { |
59 | |
60 | last if $read_cnt == 0 ; |
61 | next if $blk_size ; |
62 | |
63 | $size_left -= $read_cnt ; |
64 | last if $size_left <= 0 ; |
65 | next ; |
66 | } |
67 | |
68 | # handle the read error |
69 | |
70 | @_ = ( \%args, "read_file '$file_name' - sysread: $!"); |
71 | goto &error ; |
72 | } |
73 | |
74 | # handle array ref |
75 | |
76 | return [ split( m|(?<=$/)|, ${$buf_ref} ) ] if $args{'array_ref'} ; |
77 | |
78 | # handle list context |
79 | |
80 | return split( m|(?<=$/)|, ${$buf_ref} ) if wantarray ; |
81 | |
82 | # handle scalar ref |
83 | |
84 | return $buf_ref if $args{'scalar_ref'} ; |
85 | |
86 | # handle scalar context |
87 | |
88 | return ${$buf_ref} if defined wantarray ; |
89 | |
90 | # handle void context (return scalar by buffer reference) |
91 | |
92 | return ; |
93 | } |
94 | |
95 | sub write_file { |
96 | |
97 | my $file_name = shift ; |
98 | |
99 | my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ; |
100 | |
101 | my( $buf_ref, $write_fh, $no_truncate ) ; |
102 | |
103 | # get the buffer ref - either passed by name or first data arg or autovivified |
104 | # ${$buf_ref} will have the data after this |
105 | |
106 | if ( ref $args->{'buf_ref'} eq 'SCALAR' ) { |
107 | |
108 | $buf_ref = $args->{'buf_ref'} ; |
109 | } |
110 | elsif ( ref $_[0] eq 'SCALAR' ) { |
111 | |
112 | $buf_ref = shift ; |
113 | } |
114 | elsif ( ref $_[0] eq 'ARRAY' ) { |
115 | |
116 | ${$buf_ref} = join '', @{$_[0]} ; |
117 | } |
118 | else { |
119 | |
120 | ${$buf_ref} = join '', @_ ; |
121 | } |
122 | |
123 | if ( defined( fileno( $file_name ) ) ) { |
124 | |
125 | $write_fh = $file_name ; |
126 | $no_truncate = 1 ; |
127 | } |
128 | else { |
129 | |
130 | my $mode = O_WRONLY | O_CREAT ; |
131 | $mode |= O_BINARY if $args->{'binmode'} ; |
132 | $mode |= O_APPEND if $args->{'append'} ; |
133 | |
134 | $write_fh = gensym ; |
135 | unless ( sysopen( $write_fh, $file_name, $mode ) ) { |
136 | @_ = ( $args, "write_file '$file_name' - sysopen: $!"); |
137 | goto &error ; |
138 | } |
139 | |
140 | } |
141 | |
142 | my $size_left = length( ${$buf_ref} ) ; |
143 | my $offset = 0 ; |
144 | |
145 | do { |
146 | my $write_cnt = syswrite( $write_fh, ${$buf_ref}, |
147 | $size_left, $offset ) ; |
148 | |
149 | unless ( defined $write_cnt ) { |
150 | |
151 | @_ = ( $args, "write_file '$file_name' - syswrite: $!"); |
152 | goto &error ; |
153 | } |
154 | |
155 | $size_left -= $write_cnt ; |
156 | $offset += $write_cnt ; |
157 | |
158 | } while( $size_left > 0 ) ; |
159 | |
160 | truncate( $write_fh, |
161 | sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ; |
162 | |
163 | close( $write_fh ) ; |
164 | |
165 | return 1 ; |
166 | } |
167 | |
168 | # this is for backwards compatibility with the previous File::Slurp module. |
169 | # write_file always overwrites an existing file |
170 | |
171 | *overwrite_file = \&write_file ; |
172 | |
173 | # the current write_file has an append mode so we use that. this |
174 | # supports the same API with an optional second argument which is a |
175 | # hash ref of options. |
176 | |
177 | sub append_file { |
178 | |
179 | my $args = $_[1] ; |
180 | if ( ref $args eq 'HASH' ) { |
181 | $args->{append} = 1 ; |
182 | } |
183 | else { |
184 | |
185 | splice( @_, 1, 0, { append => 1 } ) ; |
186 | } |
187 | |
188 | goto &write_file |
189 | } |
190 | |
191 | sub read_dir { |
192 | my ($dir, %args ) = @_; |
193 | |
194 | local(*DIRH); |
195 | |
196 | if ( opendir( DIRH, $dir ) ) { |
197 | return grep( $_ ne "." && $_ ne "..", readdir(DIRH)); |
198 | } |
199 | |
200 | @_ = ( \%args, "read_dir '$dir' - opendir: $!" ) ; goto &error ; |
201 | |
202 | return undef ; |
203 | } |
204 | |
205 | my %err_func = ( |
206 | carp => \&carp, |
207 | croak => \&croak, |
208 | ) ; |
209 | |
210 | sub error { |
211 | |
212 | my( $args, $err_msg ) = @_ ; |
213 | |
214 | #print $err_msg ; |
215 | |
216 | my $func = $err_func{ $args->{'err_mode'} || 'croak' } ; |
217 | |
218 | return unless $func ; |
219 | |
220 | $func->($err_msg) ; |
221 | |
222 | return undef ; |
223 | } |
224 | |
225 | 1; |
226 | __END__ |
227 | |
228 | =head1 NAME |
229 | |
230 | File::Slurp - Efficient Reading/Writing of Complete Files |
231 | |
232 | =head1 SYNOPSIS |
233 | |
234 | use File::Slurp; |
235 | |
236 | my $text = read_file( 'filename' ) ; |
237 | my @lines = read_file( 'filename' ) ; |
238 | |
239 | write_file( 'filename', @lines ) ; |
240 | |
241 | =head1 DESCRIPTION |
242 | |
243 | This module provides subs that allow you to read or write entire files |
244 | with one simple call. They are designed to be simple to use, have |
245 | flexible ways to pass in or get the file contents and to be very |
246 | efficient. There is also a sub to read in all the files in a |
247 | directory other than C<.> and C<..> |
248 | |
249 | Note that these slurp/spew subs work only for files and not for pipes |
250 | or stdio. If you want to slurp the latter, use the standard techniques |
251 | such as setting $/ to undef, reading <> in a list context, or printing |
252 | all you want to STDOUT. |
253 | |
254 | =head2 B<read_file> |
255 | |
256 | This sub reads in an entire file and returns its contents to the |
257 | caller. In list context it will return a list of lines (using the |
258 | current value of $/ as the separator. In scalar context it returns the |
259 | entire file as a single scalar. |
260 | |
261 | my $text = read_file( 'filename' ) ; |
262 | my @lines = read_file( 'filename' ) ; |
263 | |
264 | The first argument to C<read_file> is the filename and the rest of the |
265 | arguments are key/value pairs which are optional and which modify the |
266 | behavior of the call. Other than binmode the options all control how |
267 | the slurped file is returned to the caller. |
268 | |
269 | If the first argument is a file handle reference or I/O object (if |
270 | fileno returns a defined value), then that handle is slurped in. This |
271 | mode is supported so you slurp handles such as <DATA>, \*STDIN. See |
272 | the test handle.t for an example that does C<open( '-|' )> and child |
273 | process spews data to the parant which slurps it in. All of the |
274 | options that control how the data is returned to the caller still work |
275 | in this case. |
276 | |
277 | The options are: |
278 | |
279 | =head3 binmode |
280 | |
281 | If you set the binmode option, then the file will be slurped in binary |
282 | mode. |
283 | |
284 | my $bin_data = read_file( $bin_file, binmode => ':raw' ) ; |
285 | |
286 | NOTE: this actually sets the O_BINARY mode flag for sysopen. It |
287 | probably should call binmode and pass its argument to support other |
288 | file modes. |
289 | |
290 | =head3 array_ref |
291 | |
292 | If this boolean option is set, the return value (only in scalar |
293 | context) will be an array reference which contains the lines of the |
294 | slurped file. The following two calls are equivilent: |
295 | |
296 | my $lines_ref = read_file( $bin_file, array_ref => 1 ) ; |
297 | my $lines_ref = [ read_file( $bin_file ) ] ; |
298 | |
299 | =head3 scalar_ref |
300 | |
301 | If this boolean option is set, the return value (only in scalar |
302 | context) will be an scalar reference to a string which is the contents |
303 | of the slurped file. This will usually be faster than returning the |
304 | plain scalar. |
305 | |
306 | my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ; |
307 | |
308 | =head3 buf_ref |
309 | |
310 | You can use this option to pass in a scalar reference and the slurped |
311 | file contents will be stored in the scalar. This can be used in |
312 | conjunction with any of the other options. |
313 | |
314 | my $text_ref = read_file( $bin_file, buf_ref => \$buffer, |
315 | array_ref => 1 ) ; |
316 | my @lines = read_file( $bin_file, buf_ref => \$buffer ) ; |
317 | |
318 | =head3 blk_size |
319 | |
320 | You can use this option to set the block size used when slurping from an already open handle (like \*STDIN). It defaults to 1MB. |
321 | |
322 | my $text_ref = read_file( $bin_file, blk_size => 10_000_000, |
323 | array_ref => 1 ) ; |
324 | |
325 | =head3 err_mode |
326 | |
327 | You can use this option to control how read_file behaves when an error |
328 | occurs. This option defaults to 'croak'. You can set it to 'carp' or |
329 | to 'quiet to have no error handling. This code wants to carp and then |
330 | read abother file if it fails. |
331 | |
332 | my $text_ref = read_file( $file, err_mode => 'carp' ) ; |
333 | unless ( $text_ref ) { |
334 | |
335 | # read a different file but croak if not found |
336 | $text_ref = read_file( $another_file ) ; |
337 | } |
338 | |
339 | # process ${$text_ref} |
340 | |
341 | =head2 B<write_file> |
342 | |
343 | This sub writes out an entire file in one call. |
344 | |
345 | write_file( 'filename', @data ) ; |
346 | |
347 | The first argument to C<write_file> is the filename. The next argument |
348 | is an optional hash reference and it contains key/values that can |
349 | modify the behavior of C<write_file>. The rest of the argument list is |
350 | the data to be written to the file. |
351 | |
352 | write_file( 'filename', {append => 1 }, @data ) ; |
353 | write_file( 'filename', {binmode => ':raw' }, $buffer ) ; |
354 | |
355 | As a shortcut if the first data argument is a scalar or array |
356 | reference, it is used as the only data to be written to the file. Any |
357 | following arguments in @_ are ignored. This is a faster way to pass in |
358 | the output to be written to the file and is equivilent to the |
359 | C<buf_ref> option. These following pairs are equivilent but the pass |
360 | by reference call will be faster in most cases (especially with larger |
361 | files). |
362 | |
363 | write_file( 'filename', \$buffer ) ; |
364 | write_file( 'filename', $buffer ) ; |
365 | |
366 | write_file( 'filename', \@lines ) ; |
367 | write_file( 'filename', @lines ) ; |
368 | |
369 | If the first argument is a file handle reference or I/O object (if |
370 | fileno returns a defined value), then that handle is slurped in. This |
371 | mode is supported so you spew to handles such as \*STDOUT. See the |
372 | test handle.t for an example that does C<open( '-|' )> and child |
373 | process spews data to the parant which slurps it in. All of the |
374 | options that control how the data is passes into C<write_file> still |
375 | work in this case. |
376 | |
377 | The options are: |
378 | |
379 | =head3 binmode |
380 | |
381 | If you set the binmode option, then the file will be written in binary |
382 | mode. |
383 | |
384 | write_file( $bin_file, {binmode => ':raw'}, @data ) ; |
385 | |
386 | NOTE: this actually sets the O_BINARY mode flag for sysopen. It |
387 | probably should call binmode and pass its argument to support other |
388 | file modes. |
389 | |
390 | =head3 buf_ref |
391 | |
392 | You can use this option to pass in a scalar reference which has the |
393 | data to be written. If this is set then any data arguments (including |
394 | the scalar reference shortcut) in @_ will be ignored. These are |
395 | equivilent: |
396 | |
397 | write_file( $bin_file, { buf_ref => \$buffer } ) ; |
398 | write_file( $bin_file, \$buffer ) ; |
399 | write_file( $bin_file, $buffer ) ; |
400 | |
401 | =head3 append |
402 | |
403 | If you set this boolean option, the data will be written at the end of |
404 | the current file. |
405 | |
406 | write_file( $file, {append => 1}, @data ) ; |
407 | |
408 | C<write_file> croaks if it cannot open the file. It returns true if it |
409 | succeeded in writing out the file and undef if there was an |
410 | error. (Yes, I know if it croaks it can't return anything but that is |
411 | for when I add the options to select the error handling mode). |
412 | |
413 | =head3 err_mode |
414 | |
415 | You can use this option to control how C<write_file> behaves when an |
416 | error occurs. This option defaults to 'croak'. You can set it to |
417 | 'carp' or to 'quiet to have no error handling. If the first call to |
418 | C<write_file> fails it will carp and then write to another file. If the |
419 | second call to C<write_file> fails, it will croak. |
420 | |
421 | unless ( write_file( $file, { err_mode => 'carp', \$data ) ; |
422 | |
423 | # write a different file but croak if not found |
424 | write_file( $other_file, \$data ) ; |
425 | } |
426 | |
427 | =head2 overwrite_file |
428 | |
429 | This sub is just a typeglob alias to write_file since write_file |
430 | always overwrites an existing file. This sub is supported for |
431 | backwards compatibility with the original version of this module. See |
432 | write_file for its API and behavior. |
433 | |
434 | =head2 append_file |
435 | |
436 | This sub will write its data to the end of the file. It is a wrapper |
437 | around write_file and it has the same API so see that for the full |
438 | documentation. These calls are equivilent: |
439 | |
440 | append_file( $file, @data ) ; |
441 | write_file( $file, {append => 1}, @data ) ; |
442 | |
443 | =head2 read_dir |
444 | |
445 | This sub reads all the file names from directory and returns them to |
446 | the caller but C<.> and C<..> are removed. |
447 | |
448 | my @files = read_dir( '/path/to/dir' ) ; |
449 | |
450 | It croaks if it cannot open the directory. |
451 | |
452 | =head2 EXPORT |
453 | |
454 | read_file write_file overwrite_file append_file read_dir |
455 | |
456 | =head1 AUTHOR |
457 | |
458 | Uri Guttman, E<lt>uri@stemsystems.comE<gt> |
459 | |
460 | =cut |