Commit | Line | Data |
8ed110f9 |
1 | package FileSlurp_12; |
2 | |
3 | use strict; |
4 | |
5 | use Carp ; |
6 | use Fcntl qw( :DEFAULT ) ; |
7 | use POSIX qw( :fcntl_h ) ; |
8 | use Symbol ; |
9 | |
10 | use base 'Exporter' ; |
11 | use vars qw( %EXPORT_TAGS @EXPORT_OK $VERSION @EXPORT ) ; |
12 | |
13 | %EXPORT_TAGS = ( 'all' => [ |
14 | qw( read_file write_file overwrite_file append_file read_dir ) ] ) ; |
15 | |
16 | @EXPORT = ( @{ $EXPORT_TAGS{'all'} } ); |
17 | @EXPORT_OK = qw( slurp ) ; |
18 | |
19 | $VERSION = '9999.13'; |
20 | |
21 | my $is_win32 = $^O =~ /win32/i ; |
22 | |
23 | # Install subs for various constants that aren't set in older perls |
24 | # (< 5.005). Fcntl on old perls uses Exporter to define subs without a |
25 | # () prototype These can't be overridden with the constant pragma or |
26 | # we get a prototype mismatch. Hence this less than aesthetically |
27 | # appealing BEGIN block: |
28 | |
29 | BEGIN { |
30 | unless( eval { defined SEEK_SET() } ) { |
31 | *SEEK_SET = sub { 0 }; |
32 | *SEEK_CUR = sub { 1 }; |
33 | *SEEK_END = sub { 2 }; |
34 | } |
35 | |
36 | unless( eval { defined O_BINARY() } ) { |
37 | *O_BINARY = sub { 0 }; |
38 | *O_RDONLY = sub { 0 }; |
39 | *O_WRONLY = sub { 1 }; |
40 | } |
41 | |
42 | unless ( eval { defined O_APPEND() } ) { |
43 | |
44 | if ( $^O =~ /olaris/ ) { |
45 | *O_APPEND = sub { 8 }; |
46 | *O_CREAT = sub { 256 }; |
47 | *O_EXCL = sub { 1024 }; |
48 | } |
49 | elsif ( $^O =~ /inux/ ) { |
50 | *O_APPEND = sub { 1024 }; |
51 | *O_CREAT = sub { 64 }; |
52 | *O_EXCL = sub { 128 }; |
53 | } |
54 | elsif ( $^O =~ /BSD/i ) { |
55 | *O_APPEND = sub { 8 }; |
56 | *O_CREAT = sub { 512 }; |
57 | *O_EXCL = sub { 2048 }; |
58 | } |
59 | } |
60 | } |
61 | |
62 | # print "OS [$^O]\n" ; |
63 | |
64 | # print "O_BINARY = ", O_BINARY(), "\n" ; |
65 | # print "O_RDONLY = ", O_RDONLY(), "\n" ; |
66 | # print "O_WRONLY = ", O_WRONLY(), "\n" ; |
67 | # print "O_APPEND = ", O_APPEND(), "\n" ; |
68 | # print "O_CREAT ", O_CREAT(), "\n" ; |
69 | # print "O_EXCL ", O_EXCL(), "\n" ; |
70 | |
71 | |
72 | *slurp = \&read_file ; |
73 | |
74 | sub read_file { |
75 | |
76 | my( $file_name, %args ) = @_ ; |
77 | |
78 | # set the buffer to either the passed in one or ours and init it to the null |
79 | # string |
80 | |
81 | my $buf ; |
82 | my $buf_ref = $args{'buf_ref'} || \$buf ; |
83 | ${$buf_ref} = '' ; |
84 | |
85 | my( $read_fh, $size_left, $blk_size ) ; |
86 | |
87 | # check if we are reading from a handle (glob ref or IO:: object) |
88 | |
89 | if ( ref $file_name ) { |
90 | |
91 | # slurping a handle so use it and don't open anything. |
92 | # set the block size so we know it is a handle and read that amount |
93 | |
94 | $read_fh = $file_name ; |
95 | $blk_size = $args{'blk_size'} || 1024 * 1024 ; |
96 | $size_left = $blk_size ; |
97 | |
98 | # DEEP DARK MAGIC. this checks the UNTAINT IO flag of a |
99 | # glob/handle. only the DATA handle is untainted (since it is from |
100 | # trusted data in the source file). this allows us to test if this is |
101 | # the DATA handle and then to do a sysseek to make sure it gets |
102 | # slurped correctly. on some systems, the buffered i/o pointer is not |
103 | # left at the same place as the fd pointer. this sysseek makes them |
104 | # the same so slurping with sysread will work. |
105 | |
106 | eval{ require B } ; |
107 | |
108 | if ( $@ ) { |
109 | |
110 | @_ = ( \%args, <<ERR ) ; |
111 | Can't find B.pm with this Perl: $!. |
112 | That module is needed to slurp the DATA handle. |
113 | ERR |
114 | goto &_error ; |
115 | } |
116 | |
117 | if ( B::svref_2object( $read_fh )->IO->IoFLAGS & 16 ) { |
118 | |
119 | # set the seek position to the current tell. |
120 | |
121 | sysseek( $read_fh, tell( $read_fh ), SEEK_SET ) || |
122 | croak "sysseek $!" ; |
123 | } |
124 | } |
125 | else { |
126 | |
127 | # a regular file. set the sysopen mode |
128 | |
129 | my $mode = O_RDONLY ; |
130 | $mode |= O_BINARY if $args{'binmode'} ; |
131 | |
132 | #printf "RD: BINARY %x MODE %x\n", O_BINARY, $mode ; |
133 | |
134 | # open the file and handle any error |
135 | |
136 | $read_fh = gensym ; |
137 | unless ( sysopen( $read_fh, $file_name, $mode ) ) { |
138 | @_ = ( \%args, "read_file '$file_name' - sysopen: $!"); |
139 | goto &_error ; |
140 | } |
141 | |
142 | # get the size of the file for use in the read loop |
143 | |
144 | $size_left = -s $read_fh ; |
145 | |
146 | unless( $size_left ) { |
147 | |
148 | $blk_size = $args{'blk_size'} || 1024 * 1024 ; |
149 | $size_left = $blk_size ; |
150 | } |
151 | } |
152 | |
153 | # infinite read loop. we exit when we are done slurping |
154 | |
155 | while( 1 ) { |
156 | |
157 | # do the read and see how much we got |
158 | |
159 | my $read_cnt = sysread( $read_fh, ${$buf_ref}, |
160 | $size_left, length ${$buf_ref} ) ; |
161 | |
162 | if ( defined $read_cnt ) { |
163 | |
164 | # good read. see if we hit EOF (nothing left to read) |
165 | |
166 | last if $read_cnt == 0 ; |
167 | |
168 | # loop if we are slurping a handle. we don't track $size_left then. |
169 | |
170 | next if $blk_size ; |
171 | |
172 | # count down how much we read and loop if we have more to read. |
173 | $size_left -= $read_cnt ; |
174 | last if $size_left <= 0 ; |
175 | next ; |
176 | } |
177 | |
178 | # handle the read error |
179 | |
180 | @_ = ( \%args, "read_file '$file_name' - sysread: $!"); |
181 | goto &_error ; |
182 | } |
183 | |
184 | # fix up cr/lf to be a newline if this is a windows text file |
185 | |
186 | ${$buf_ref} =~ s/\015\012/\n/g if $is_win32 && !$args{'binmode'} ; |
187 | |
188 | # this is the 5 returns in a row. each handles one possible |
189 | # combination of caller context and requested return type |
190 | |
191 | my $sep = $/ ; |
192 | $sep = '\n\n+' if defined $sep && $sep eq '' ; |
193 | |
194 | # caller wants to get an array ref of lines |
195 | |
196 | # this split doesn't work since it tries to use variable length lookbehind |
197 | # the m// line works. |
198 | # return [ split( m|(?<=$sep)|, ${$buf_ref} ) ] if $args{'array_ref'} ; |
199 | return [ length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () ] |
200 | if $args{'array_ref'} ; |
201 | |
202 | # caller wants a list of lines (normal list context) |
203 | |
204 | # same problem with this split as before. |
205 | # return split( m|(?<=$sep)|, ${$buf_ref} ) if wantarray ; |
206 | return length(${$buf_ref}) ? ${$buf_ref} =~ /(.*?$sep|.+)/sg : () |
207 | if wantarray ; |
208 | |
209 | # caller wants a scalar ref to the slurped text |
210 | |
211 | return $buf_ref if $args{'scalar_ref'} ; |
212 | |
213 | # caller wants a scalar with the slurped text (normal scalar context) |
214 | |
215 | return ${$buf_ref} if defined wantarray ; |
216 | |
217 | # caller passed in an i/o buffer by reference (normal void context) |
218 | |
219 | return ; |
220 | } |
221 | |
222 | |
223 | # error handling section |
224 | # |
225 | # all the error handling uses magic goto so the caller will get the |
226 | # error message as if from their code and not this module. if we just |
227 | # did a call on the error code, the carp/croak would report it from |
228 | # this module since the error sub is one level down on the call stack |
229 | # from read_file/write_file/read_dir. |
230 | |
231 | |
232 | my %err_func = ( |
233 | 'carp' => \&carp, |
234 | 'croak' => \&croak, |
235 | ) ; |
236 | |
237 | sub _error { |
238 | |
239 | my( $args, $err_msg ) = @_ ; |
240 | |
241 | # get the error function to use |
242 | |
243 | my $func = $err_func{ $args->{'err_mode'} || 'croak' } ; |
244 | |
245 | # if we didn't find it in our error function hash, they must have set |
246 | # it to quiet and we don't do anything. |
247 | |
248 | return unless $func ; |
249 | |
250 | # call the carp/croak function |
251 | |
252 | $func->($err_msg) ; |
253 | |
254 | # return a hard undef (in list context this will be a single value of |
255 | # undef which is not a legal in-band value) |
256 | |
257 | return undef ; |
258 | } |
259 | |
260 | 1; |