Commit | Line | Data |
3fea05b9 |
1 | package HTTP::Body; |
2 | |
3 | use strict; |
4 | |
5 | use Carp qw[ ]; |
6 | |
7 | our $VERSION = '1.05'; |
8 | |
9 | our $TYPES = { |
10 | 'application/octet-stream' => 'HTTP::Body::OctetStream', |
11 | 'application/x-www-form-urlencoded' => 'HTTP::Body::UrlEncoded', |
12 | 'multipart/form-data' => 'HTTP::Body::MultiPart', |
13 | 'multipart/related' => 'HTTP::Body::XFormsMultipart', |
14 | 'application/xml' => 'HTTP::Body::XForms' |
15 | }; |
16 | |
17 | require HTTP::Body::OctetStream; |
18 | require HTTP::Body::UrlEncoded; |
19 | require HTTP::Body::MultiPart; |
20 | require HTTP::Body::XFormsMultipart; |
21 | require HTTP::Body::XForms; |
22 | |
23 | use HTTP::Headers; |
24 | use HTTP::Message; |
25 | |
26 | =head1 NAME |
27 | |
28 | HTTP::Body - HTTP Body Parser |
29 | |
30 | =head1 SYNOPSIS |
31 | |
32 | use HTTP::Body; |
33 | |
34 | sub handler : method { |
35 | my ( $class, $r ) = @_; |
36 | |
37 | my $content_type = $r->headers_in->get('Content-Type'); |
38 | my $content_length = $r->headers_in->get('Content-Length'); |
39 | |
40 | my $body = HTTP::Body->new( $content_type, $content_length ); |
41 | my $length = $content_length; |
42 | |
43 | while ( $length ) { |
44 | |
45 | $r->read( my $buffer, ( $length < 8192 ) ? $length : 8192 ); |
46 | |
47 | $length -= length($buffer); |
48 | |
49 | $body->add($buffer); |
50 | } |
51 | |
52 | my $uploads = $body->upload; # hashref |
53 | my $params = $body->param; # hashref |
54 | my $body = $body->body; # IO::Handle |
55 | } |
56 | |
57 | =head1 DESCRIPTION |
58 | |
59 | HTTP::Body parses chunks of HTTP POST data and supports |
60 | application/octet-stream, application/x-www-form-urlencoded, and |
61 | multipart/form-data. |
62 | |
63 | Chunked bodies are supported by not passing a length value to new(). |
64 | |
65 | It is currently used by L<Catalyst> to parse POST bodies. |
66 | |
67 | =head1 NOTES |
68 | |
69 | When parsing multipart bodies, temporary files are created to store any |
70 | uploaded files. You must delete these temporary files yourself after |
71 | processing them. |
72 | |
73 | =head1 METHODS |
74 | |
75 | =over 4 |
76 | |
77 | =item new |
78 | |
79 | Constructor. Takes content type and content length as parameters, |
80 | returns a L<HTTP::Body> object. |
81 | |
82 | =cut |
83 | |
84 | sub new { |
85 | my ( $class, $content_type, $content_length ) = @_; |
86 | |
87 | unless ( @_ >= 2 ) { |
88 | Carp::croak( $class, '->new( $content_type, [ $content_length ] )' ); |
89 | } |
90 | |
91 | my $type; |
92 | foreach my $supported ( keys %{$TYPES} ) { |
93 | if ( index( lc($content_type), $supported ) >= 0 ) { |
94 | $type = $supported; |
95 | } |
96 | } |
97 | |
98 | my $body = $TYPES->{ $type || 'application/octet-stream' }; |
99 | |
100 | my $self = { |
101 | buffer => '', |
102 | chunk_buffer => '', |
103 | body => undef, |
104 | chunked => !defined $content_length, |
105 | content_length => defined $content_length ? $content_length : -1, |
106 | content_type => $content_type, |
107 | length => 0, |
108 | param => {}, |
109 | state => 'buffering', |
110 | upload => {}, |
111 | tmpdir => File::Spec->tmpdir(), |
112 | }; |
113 | |
114 | bless( $self, $body ); |
115 | |
116 | return $self->init; |
117 | } |
118 | |
119 | =item add |
120 | |
121 | Add string to internal buffer. Will call spin unless done. returns |
122 | length before adding self. |
123 | |
124 | =cut |
125 | |
126 | sub add { |
127 | my $self = shift; |
128 | |
129 | if ( $self->{chunked} ) { |
130 | $self->{chunk_buffer} .= $_[0]; |
131 | |
132 | while ( $self->{chunk_buffer} =~ m/^([\da-fA-F]+).*\x0D\x0A/ ) { |
133 | my $chunk_len = hex($1); |
134 | |
135 | if ( $chunk_len == 0 ) { |
136 | # Strip chunk len |
137 | $self->{chunk_buffer} =~ s/^([\da-fA-F]+).*\x0D\x0A//; |
138 | |
139 | # End of data, there may be trailing headers |
140 | if ( my ($headers) = $self->{chunk_buffer} =~ m/(.*)\x0D\x0A/s ) { |
141 | if ( my $message = HTTP::Message->parse( $headers ) ) { |
142 | $self->{trailing_headers} = $message->headers; |
143 | } |
144 | } |
145 | |
146 | $self->{chunk_buffer} = ''; |
147 | |
148 | # Set content_length equal to the amount of data we read, |
149 | # so the spin methods can finish up. |
150 | $self->{content_length} = $self->{length}; |
151 | } |
152 | else { |
153 | # Make sure we have the whole chunk in the buffer (+CRLF) |
154 | if ( length( $self->{chunk_buffer} ) >= $chunk_len ) { |
155 | # Strip chunk len |
156 | $self->{chunk_buffer} =~ s/^([\da-fA-F]+).*\x0D\x0A//; |
157 | |
158 | # Pull chunk data out of chunk buffer into real buffer |
159 | $self->{buffer} .= substr $self->{chunk_buffer}, 0, $chunk_len, ''; |
160 | |
161 | # Strip remaining CRLF |
162 | $self->{chunk_buffer} =~ s/^\x0D\x0A//; |
163 | |
164 | $self->{length} += $chunk_len; |
165 | } |
166 | else { |
167 | # Not enough data for this chunk, wait for more calls to add() |
168 | return; |
169 | } |
170 | } |
171 | |
172 | unless ( $self->{state} eq 'done' ) { |
173 | $self->spin; |
174 | } |
175 | } |
176 | |
177 | return; |
178 | } |
179 | |
180 | my $cl = $self->content_length; |
181 | |
182 | if ( defined $_[0] ) { |
183 | $self->{length} += length( $_[0] ); |
184 | |
185 | # Don't allow buffer data to exceed content-length |
186 | if ( $self->{length} > $cl ) { |
187 | $_[0] = substr $_[0], 0, $cl - $self->{length}; |
188 | $self->{length} = $cl; |
189 | } |
190 | |
191 | $self->{buffer} .= $_[0]; |
192 | } |
193 | |
194 | unless ( $self->state eq 'done' ) { |
195 | $self->spin; |
196 | } |
197 | |
198 | return ( $self->length - $cl ); |
199 | } |
200 | |
201 | =item body |
202 | |
203 | accessor for the body. |
204 | |
205 | =cut |
206 | |
207 | sub body { |
208 | my $self = shift; |
209 | $self->{body} = shift if @_; |
210 | return $self->{body}; |
211 | } |
212 | |
213 | =item chunked |
214 | |
215 | Returns 1 if the request is chunked. |
216 | |
217 | =cut |
218 | |
219 | sub chunked { |
220 | return shift->{chunked}; |
221 | } |
222 | |
223 | =item content_length |
224 | |
225 | Returns the content-length for the body data if known. |
226 | Returns -1 if the request is chunked. |
227 | |
228 | =cut |
229 | |
230 | sub content_length { |
231 | return shift->{content_length}; |
232 | } |
233 | |
234 | =item content_type |
235 | |
236 | Returns the content-type of the body data. |
237 | |
238 | =cut |
239 | |
240 | sub content_type { |
241 | return shift->{content_type}; |
242 | } |
243 | |
244 | =item init |
245 | |
246 | return self. |
247 | |
248 | =cut |
249 | |
250 | sub init { |
251 | return $_[0]; |
252 | } |
253 | |
254 | =item length |
255 | |
256 | Returns the total length of data we expect to read if known. |
257 | In the case of a chunked request, returns the amount of data |
258 | read so far. |
259 | |
260 | =cut |
261 | |
262 | sub length { |
263 | return shift->{length}; |
264 | } |
265 | |
266 | =item trailing_headers |
267 | |
268 | If a chunked request body had trailing headers, trailing_headers will |
269 | return an HTTP::Headers object populated with those headers. |
270 | |
271 | =cut |
272 | |
273 | sub trailing_headers { |
274 | return shift->{trailing_headers}; |
275 | } |
276 | |
277 | =item spin |
278 | |
279 | Abstract method to spin the io handle. |
280 | |
281 | =cut |
282 | |
283 | sub spin { |
284 | Carp::croak('Define abstract method spin() in implementation'); |
285 | } |
286 | |
287 | =item state |
288 | |
289 | Returns the current state of the parser. |
290 | |
291 | =cut |
292 | |
293 | sub state { |
294 | my $self = shift; |
295 | $self->{state} = shift if @_; |
296 | return $self->{state}; |
297 | } |
298 | |
299 | =item param |
300 | |
301 | Get/set body parameters. |
302 | |
303 | =cut |
304 | |
305 | sub param { |
306 | my $self = shift; |
307 | |
308 | if ( @_ == 2 ) { |
309 | |
310 | my ( $name, $value ) = @_; |
311 | |
312 | if ( exists $self->{param}->{$name} ) { |
313 | for ( $self->{param}->{$name} ) { |
314 | $_ = [$_] unless ref($_) eq "ARRAY"; |
315 | push( @$_, $value ); |
316 | } |
317 | } |
318 | else { |
319 | $self->{param}->{$name} = $value; |
320 | } |
321 | } |
322 | |
323 | return $self->{param}; |
324 | } |
325 | |
326 | =item upload |
327 | |
328 | Get/set file uploads. |
329 | |
330 | =cut |
331 | |
332 | sub upload { |
333 | my $self = shift; |
334 | |
335 | if ( @_ == 2 ) { |
336 | |
337 | my ( $name, $upload ) = @_; |
338 | |
339 | if ( exists $self->{upload}->{$name} ) { |
340 | for ( $self->{upload}->{$name} ) { |
341 | $_ = [$_] unless ref($_) eq "ARRAY"; |
342 | push( @$_, $upload ); |
343 | } |
344 | } |
345 | else { |
346 | $self->{upload}->{$name} = $upload; |
347 | } |
348 | } |
349 | |
350 | return $self->{upload}; |
351 | } |
352 | |
353 | =item tmpdir |
354 | |
355 | Specify a different path for temporary files. Defaults to the system temporary path. |
356 | |
357 | =cut |
358 | |
359 | sub tmpdir { |
360 | my $self = shift; |
361 | $self->{tmpdir} = shift if @_; |
362 | return $self->{tmpdir}; |
363 | } |
364 | |
365 | =back |
366 | |
367 | =head1 AUTHOR |
368 | |
369 | Christian Hansen, C<ch@ngmedia.com> |
370 | |
371 | Sebastian Riedel, C<sri@cpan.org> |
372 | |
373 | Andy Grundman, C<andy@hybridized.org> |
374 | |
375 | =head1 LICENSE |
376 | |
377 | This library is free software. You can redistribute it and/or modify |
378 | it under the same terms as perl itself. |
379 | |
380 | =cut |
381 | |
382 | 1; |