updated changes file
[urisagit/Perl-Docs.git] / slurp_talk / Slurp.pm
CommitLineData
635c7876 1package File::Slurp;
2
3use strict;
4
5use Carp ;
6use Fcntl qw( :DEFAULT :seek ) ;
7use Symbol ;
8
9use base 'Exporter' ;
10use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT) ;
11
12%EXPORT_TAGS = ( 'all' => [
13 qw( read_file write_file overwrite_file append_file read_dir ) ] ) ;
14
15#@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
16@EXPORT = ( @{ $EXPORT_TAGS{'all'} } );
17
18$VERSION = '9999.01';
19
20
21sub read_file {
22
23 my( $file_name, %args ) = @_ ;
24
25 my $buf ;
26 my $buf_ref = $args{'buf_ref'} || \$buf ;
27
28 ${$buf_ref} = '' ;
29
30 my( $read_fh, $size_left, $blk_size ) ;
31
32 if ( defined( fileno( $file_name ) ) ) {
33
34 $read_fh = $file_name ;
35 $blk_size = $args{'blk_size'} || 1024 * 1024 ;
36 $size_left = $blk_size ;
37 }
38 else {
39
40 my $mode = O_RDONLY ;
41 $mode |= O_BINARY if $args{'binmode'} ;
42
43
44 $read_fh = gensym ;
45 unless ( sysopen( $read_fh, $file_name, $mode ) ) {
46 @_ = ( \%args, "read_file '$file_name' - sysopen: $!");
47 goto &error ;
48 }
49
50 $size_left = -s $read_fh ;
51 }
52
53 while( 1 ) {
54
55 my $read_cnt = sysread( $read_fh, ${$buf_ref},
56 $size_left, length ${$buf_ref} ) ;
57
58 if ( defined $read_cnt ) {
59
60 last if $read_cnt == 0 ;
61 next if $blk_size ;
62
63 $size_left -= $read_cnt ;
64 last if $size_left <= 0 ;
65 next ;
66 }
67
68# handle the read error
69
70 @_ = ( \%args, "read_file '$file_name' - sysread: $!");
71 goto &error ;
72 }
73
74# handle array ref
75
76 return [ split( m|(?<=$/)|, ${$buf_ref} ) ] if $args{'array_ref'} ;
77
78# handle list context
79
80 return split( m|(?<=$/)|, ${$buf_ref} ) if wantarray ;
81
82# handle scalar ref
83
84 return $buf_ref if $args{'scalar_ref'} ;
85
86# handle scalar context
87
88 return ${$buf_ref} if defined wantarray ;
89
90# handle void context (return scalar by buffer reference)
91
92 return ;
93}
94
95sub write_file {
96
97 my $file_name = shift ;
98
99 my $args = ( ref $_[0] eq 'HASH' ) ? shift : {} ;
100
101 my( $buf_ref, $write_fh, $no_truncate ) ;
102
103# get the buffer ref - either passed by name or first data arg or autovivified
104# ${$buf_ref} will have the data after this
105
106 if ( ref $args->{'buf_ref'} eq 'SCALAR' ) {
107
108 $buf_ref = $args->{'buf_ref'} ;
109 }
110 elsif ( ref $_[0] eq 'SCALAR' ) {
111
112 $buf_ref = shift ;
113 }
114 elsif ( ref $_[0] eq 'ARRAY' ) {
115
116 ${$buf_ref} = join '', @{$_[0]} ;
117 }
118 else {
119
120 ${$buf_ref} = join '', @_ ;
121 }
122
123 if ( defined( fileno( $file_name ) ) ) {
124
125 $write_fh = $file_name ;
126 $no_truncate = 1 ;
127 }
128 else {
129
130 my $mode = O_WRONLY | O_CREAT ;
131 $mode |= O_BINARY if $args->{'binmode'} ;
132 $mode |= O_APPEND if $args->{'append'} ;
133
134 $write_fh = gensym ;
135 unless ( sysopen( $write_fh, $file_name, $mode ) ) {
136 @_ = ( $args, "write_file '$file_name' - sysopen: $!");
137 goto &error ;
138 }
139
140 }
141
142 my $size_left = length( ${$buf_ref} ) ;
143 my $offset = 0 ;
144
145 do {
146 my $write_cnt = syswrite( $write_fh, ${$buf_ref},
147 $size_left, $offset ) ;
148
149 unless ( defined $write_cnt ) {
150
151 @_ = ( $args, "write_file '$file_name' - syswrite: $!");
152 goto &error ;
153 }
154
155 $size_left -= $write_cnt ;
156 $offset += $write_cnt ;
157
158 } while( $size_left > 0 ) ;
159
160 truncate( $write_fh,
161 sysseek( $write_fh, 0, SEEK_CUR ) ) unless $no_truncate ;
162
163 close( $write_fh ) ;
164
165 return 1 ;
166}
167
168# this is for backwards compatibility with the previous File::Slurp module.
169# write_file always overwrites an existing file
170
171*overwrite_file = \&write_file ;
172
173# the current write_file has an append mode so we use that. this
174# supports the same API with an optional second argument which is a
175# hash ref of options.
176
177sub append_file {
178
179 my $args = $_[1] ;
180 if ( ref $args eq 'HASH' ) {
181 $args->{append} = 1 ;
182 }
183 else {
184
185 splice( @_, 1, 0, { append => 1 } ) ;
186 }
187
188 goto &write_file
189}
190
191sub read_dir {
192 my ($dir, %args ) = @_;
193
194 local(*DIRH);
195
196 if ( opendir( DIRH, $dir ) ) {
197 return grep( $_ ne "." && $_ ne "..", readdir(DIRH));
198 }
199
200 @_ = ( \%args, "read_dir '$dir' - opendir: $!" ) ; goto &error ;
201
202 return undef ;
203}
204
205my %err_func = (
206 carp => \&carp,
207 croak => \&croak,
208) ;
209
210sub error {
211
212 my( $args, $err_msg ) = @_ ;
213
214#print $err_msg ;
215
216 my $func = $err_func{ $args->{'err_mode'} || 'croak' } ;
217
218 return unless $func ;
219
220 $func->($err_msg) ;
221
222 return undef ;
223}
224
2251;
226__END__
227
228=head1 NAME
229
230File::Slurp - Efficient Reading/Writing of Complete Files
231
232=head1 SYNOPSIS
233
234 use File::Slurp;
235
236 my $text = read_file( 'filename' ) ;
237 my @lines = read_file( 'filename' ) ;
238
239 write_file( 'filename', @lines ) ;
240
241=head1 DESCRIPTION
242
243This module provides subs that allow you to read or write entire files
244with one simple call. They are designed to be simple to use, have
245flexible ways to pass in or get the file contents and to be very
246efficient. There is also a sub to read in all the files in a
247directory other than C<.> and C<..>
248
249Note that these slurp/spew subs work only for files and not for pipes
250or stdio. If you want to slurp the latter, use the standard techniques
251such as setting $/ to undef, reading <> in a list context, or printing
252all you want to STDOUT.
253
254=head2 B<read_file>
255
256This sub reads in an entire file and returns its contents to the
257caller. In list context it will return a list of lines (using the
258current value of $/ as the separator. In scalar context it returns the
259entire file as a single scalar.
260
261 my $text = read_file( 'filename' ) ;
262 my @lines = read_file( 'filename' ) ;
263
264The first argument to C<read_file> is the filename and the rest of the
265arguments are key/value pairs which are optional and which modify the
266behavior of the call. Other than binmode the options all control how
267the slurped file is returned to the caller.
268
269If the first argument is a file handle reference or I/O object (if
270fileno returns a defined value), then that handle is slurped in. This
271mode is supported so you slurp handles such as <DATA>, \*STDIN. See
272the test handle.t for an example that does C<open( '-|' )> and child
273process spews data to the parant which slurps it in. All of the
274options that control how the data is returned to the caller still work
275in this case.
276
277The options are:
278
279=head3 binmode
280
281If you set the binmode option, then the file will be slurped in binary
282mode.
283
284 my $bin_data = read_file( $bin_file, binmode => ':raw' ) ;
285
286NOTE: this actually sets the O_BINARY mode flag for sysopen. It
287probably should call binmode and pass its argument to support other
288file modes.
289
290=head3 array_ref
291
292If this boolean option is set, the return value (only in scalar
293context) will be an array reference which contains the lines of the
294slurped file. The following two calls are equivilent:
295
296 my $lines_ref = read_file( $bin_file, array_ref => 1 ) ;
297 my $lines_ref = [ read_file( $bin_file ) ] ;
298
299=head3 scalar_ref
300
301If this boolean option is set, the return value (only in scalar
302context) will be an scalar reference to a string which is the contents
303of the slurped file. This will usually be faster than returning the
304plain scalar.
305
306 my $text_ref = read_file( $bin_file, scalar_ref => 1 ) ;
307
308=head3 buf_ref
309
310You can use this option to pass in a scalar reference and the slurped
311file contents will be stored in the scalar. This can be used in
312conjunction with any of the other options.
313
314 my $text_ref = read_file( $bin_file, buf_ref => \$buffer,
315 array_ref => 1 ) ;
316 my @lines = read_file( $bin_file, buf_ref => \$buffer ) ;
317
318=head3 blk_size
319
320You can use this option to set the block size used when slurping from an already open handle (like \*STDIN). It defaults to 1MB.
321
322 my $text_ref = read_file( $bin_file, blk_size => 10_000_000,
323 array_ref => 1 ) ;
324
325=head3 err_mode
326
327You can use this option to control how read_file behaves when an error
328occurs. This option defaults to 'croak'. You can set it to 'carp' or
329to 'quiet to have no error handling. This code wants to carp and then
330read abother file if it fails.
331
332 my $text_ref = read_file( $file, err_mode => 'carp' ) ;
333 unless ( $text_ref ) {
334
335 # read a different file but croak if not found
336 $text_ref = read_file( $another_file ) ;
337 }
338
339 # process ${$text_ref}
340
341=head2 B<write_file>
342
343This sub writes out an entire file in one call.
344
345 write_file( 'filename', @data ) ;
346
347The first argument to C<write_file> is the filename. The next argument
348is an optional hash reference and it contains key/values that can
349modify the behavior of C<write_file>. The rest of the argument list is
350the data to be written to the file.
351
352 write_file( 'filename', {append => 1 }, @data ) ;
353 write_file( 'filename', {binmode => ':raw' }, $buffer ) ;
354
355As a shortcut if the first data argument is a scalar or array
356reference, it is used as the only data to be written to the file. Any
357following arguments in @_ are ignored. This is a faster way to pass in
358the output to be written to the file and is equivilent to the
359C<buf_ref> option. These following pairs are equivilent but the pass
360by reference call will be faster in most cases (especially with larger
361files).
362
363 write_file( 'filename', \$buffer ) ;
364 write_file( 'filename', $buffer ) ;
365
366 write_file( 'filename', \@lines ) ;
367 write_file( 'filename', @lines ) ;
368
369If the first argument is a file handle reference or I/O object (if
370fileno returns a defined value), then that handle is slurped in. This
371mode is supported so you spew to handles such as \*STDOUT. See the
372test handle.t for an example that does C<open( '-|' )> and child
373process spews data to the parant which slurps it in. All of the
374options that control how the data is passes into C<write_file> still
375work in this case.
376
377The options are:
378
379=head3 binmode
380
381If you set the binmode option, then the file will be written in binary
382mode.
383
384 write_file( $bin_file, {binmode => ':raw'}, @data ) ;
385
386NOTE: this actually sets the O_BINARY mode flag for sysopen. It
387probably should call binmode and pass its argument to support other
388file modes.
389
390=head3 buf_ref
391
392You can use this option to pass in a scalar reference which has the
393data to be written. If this is set then any data arguments (including
394the scalar reference shortcut) in @_ will be ignored. These are
395equivilent:
396
397 write_file( $bin_file, { buf_ref => \$buffer } ) ;
398 write_file( $bin_file, \$buffer ) ;
399 write_file( $bin_file, $buffer ) ;
400
401=head3 append
402
403If you set this boolean option, the data will be written at the end of
404the current file.
405
406 write_file( $file, {append => 1}, @data ) ;
407
408C<write_file> croaks if it cannot open the file. It returns true if it
409succeeded in writing out the file and undef if there was an
410error. (Yes, I know if it croaks it can't return anything but that is
411for when I add the options to select the error handling mode).
412
413=head3 err_mode
414
415You can use this option to control how C<write_file> behaves when an
416error occurs. This option defaults to 'croak'. You can set it to
417'carp' or to 'quiet to have no error handling. If the first call to
418C<write_file> fails it will carp and then write to another file. If the
419second call to C<write_file> fails, it will croak.
420
421 unless ( write_file( $file, { err_mode => 'carp', \$data ) ;
422
423 # write a different file but croak if not found
424 write_file( $other_file, \$data ) ;
425 }
426
427=head2 overwrite_file
428
429This sub is just a typeglob alias to write_file since write_file
430always overwrites an existing file. This sub is supported for
431backwards compatibility with the original version of this module. See
432write_file for its API and behavior.
433
434=head2 append_file
435
436This sub will write its data to the end of the file. It is a wrapper
437around write_file and it has the same API so see that for the full
438documentation. These calls are equivilent:
439
440 append_file( $file, @data ) ;
441 write_file( $file, {append => 1}, @data ) ;
442
443=head2 read_dir
444
445This sub reads all the file names from directory and returns them to
446the caller but C<.> and C<..> are removed.
447
448 my @files = read_dir( '/path/to/dir' ) ;
449
450It croaks if it cannot open the directory.
451
452=head2 EXPORT
453
454 read_file write_file overwrite_file append_file read_dir
455
456=head1 AUTHOR
457
458Uri Guttman, E<lt>uri@stemsystems.comE<gt>
459
460=cut