Commit | Line | Data |
32b29b79 |
1 | package HTTP::Body; |
2 | |
3 | use strict; |
4 | |
348fdd5a |
5 | use Carp qw[ ]; |
32b29b79 |
6 | |
7e2df1d9 |
7 | our $TYPES = { |
4f5db602 |
8 | 'application/octet-stream' => 'HTTP::Body::OctetStream', |
9 | 'application/x-www-form-urlencoded' => 'HTTP::Body::UrlEncoded', |
5940e4c7 |
10 | 'multipart/form-data' => 'HTTP::Body::MultiPart', |
11 | 'multipart/related' => 'HTTP::Body::XFormsMultipart', |
12 | 'application/xml' => 'HTTP::Body::XForms' |
32b29b79 |
13 | }; |
14 | |
b018320d |
15 | require HTTP::Body::OctetStream; |
16 | require HTTP::Body::UrlEncoded; |
17 | require HTTP::Body::MultiPart; |
5940e4c7 |
18 | require HTTP::Body::XFormsMultipart; |
19 | require HTTP::Body::XForms; |
b018320d |
20 | |
0a66fd23 |
21 | use HTTP::Headers; |
22 | use HTTP::Message; |
23 | |
aac7ca02 |
24 | =head1 NAME |
25 | |
26 | HTTP::Body - HTTP Body Parser |
27 | |
28 | =head1 SYNOPSIS |
29 | |
30 | use HTTP::Body; |
17c3e9b3 |
31 | |
32 | sub handler : method { |
33 | my ( $class, $r ) = @_; |
34 | |
35 | my $content_type = $r->headers_in->get('Content-Type'); |
36 | my $content_length = $r->headers_in->get('Content-Length'); |
37 | |
38 | my $body = HTTP::Body->new( $content_type, $content_length ); |
39 | my $length = $content_length; |
40 | |
41 | while ( $length ) { |
42 | |
43 | $r->read( my $buffer, ( $length < 8192 ) ? $length : 8192 ); |
44 | |
45 | $length -= length($buffer); |
46 | |
47 | $body->add($buffer); |
48 | } |
49 | |
08160cca |
50 | my $uploads = $body->upload; # hashref |
51 | my $params = $body->param; # hashref |
52 | my $param_order = $body->param_order # arrayref |
53 | my $body = $body->body; # IO::Handle |
17c3e9b3 |
54 | } |
aac7ca02 |
55 | |
56 | =head1 DESCRIPTION |
57 | |
6215b02b |
58 | HTTP::Body parses chunks of HTTP POST data and supports |
59 | application/octet-stream, application/x-www-form-urlencoded, and |
60 | multipart/form-data. |
61 | |
0a66fd23 |
62 | Chunked bodies are supported by not passing a length value to new(). |
63 | |
6215b02b |
64 | It is currently used by L<Catalyst> to parse POST bodies. |
aac7ca02 |
65 | |
1ced50e0 |
66 | =head1 NOTES |
67 | |
68 | When parsing multipart bodies, temporary files are created to store any |
69 | uploaded files. You must delete these temporary files yourself after |
b1da105b |
70 | processing them, or set $body->cleanup(1) to automatically delete them |
71 | at DESTROY-time. |
1ced50e0 |
72 | |
aac7ca02 |
73 | =head1 METHODS |
74 | |
6153c112 |
75 | =over 4 |
76 | |
77 | =item new |
78 | |
79 | Constructor. Takes content type and content length as parameters, |
80 | returns a L<HTTP::Body> object. |
aac7ca02 |
81 | |
82 | =cut |
83 | |
32b29b79 |
84 | sub new { |
85 | my ( $class, $content_type, $content_length ) = @_; |
86 | |
0a66fd23 |
87 | unless ( @_ >= 2 ) { |
88 | Carp::croak( $class, '->new( $content_type, [ $content_length ] )' ); |
32b29b79 |
89 | } |
7e2df1d9 |
90 | |
27ee4e94 |
91 | my $type; |
92 | foreach my $supported ( keys %{$TYPES} ) { |
93 | if ( index( lc($content_type), $supported ) >= 0 ) { |
94 | $type = $supported; |
95 | } |
96 | } |
97 | |
7e2df1d9 |
98 | my $body = $TYPES->{ $type || 'application/octet-stream' }; |
99 | |
32b29b79 |
100 | my $self = { |
b1da105b |
101 | cleanup => 0, |
32b29b79 |
102 | buffer => '', |
0a66fd23 |
103 | chunk_buffer => '', |
44761c00 |
104 | body => undef, |
0a66fd23 |
105 | chunked => !defined $content_length, |
106 | content_length => defined $content_length ? $content_length : -1, |
32b29b79 |
107 | content_type => $content_type, |
58050177 |
108 | length => 0, |
7e2df1d9 |
109 | param => {}, |
08160cca |
110 | param_order => [], |
7e2df1d9 |
111 | state => 'buffering', |
3debb7c0 |
112 | upload => {}, |
113 | tmpdir => File::Spec->tmpdir(), |
32b29b79 |
114 | }; |
115 | |
116 | bless( $self, $body ); |
7e2df1d9 |
117 | |
32b29b79 |
118 | return $self->init; |
119 | } |
120 | |
b1da105b |
121 | sub DESTROY { |
122 | my $self = shift; |
123 | |
124 | if ( $self->{cleanup} ) { |
125 | my @temps = (); |
126 | for my $upload ( values %{ $self->{upload} } ) { |
127 | push @temps, map { $_->{tempname} || () } |
128 | ( ref $upload eq 'ARRAY' ? @{$upload} : $upload ); |
129 | } |
130 | |
131 | unlink map { $_ } grep { -e $_ } @temps; |
132 | } |
133 | } |
134 | |
aac7ca02 |
135 | =item add |
136 | |
4deaf0f0 |
137 | Add string to internal buffer. Will call spin unless done. returns |
6153c112 |
138 | length before adding self. |
139 | |
aac7ca02 |
140 | =cut |
141 | |
32b29b79 |
142 | sub add { |
58050177 |
143 | my $self = shift; |
304dca13 |
144 | |
0a66fd23 |
145 | if ( $self->{chunked} ) { |
146 | $self->{chunk_buffer} .= $_[0]; |
147 | |
148 | while ( $self->{chunk_buffer} =~ m/^([\da-fA-F]+).*\x0D\x0A/ ) { |
149 | my $chunk_len = hex($1); |
150 | |
151 | if ( $chunk_len == 0 ) { |
152 | # Strip chunk len |
153 | $self->{chunk_buffer} =~ s/^([\da-fA-F]+).*\x0D\x0A//; |
154 | |
155 | # End of data, there may be trailing headers |
156 | if ( my ($headers) = $self->{chunk_buffer} =~ m/(.*)\x0D\x0A/s ) { |
157 | if ( my $message = HTTP::Message->parse( $headers ) ) { |
158 | $self->{trailing_headers} = $message->headers; |
159 | } |
160 | } |
161 | |
162 | $self->{chunk_buffer} = ''; |
163 | |
164 | # Set content_length equal to the amount of data we read, |
165 | # so the spin methods can finish up. |
166 | $self->{content_length} = $self->{length}; |
167 | } |
168 | else { |
169 | # Make sure we have the whole chunk in the buffer (+CRLF) |
170 | if ( length( $self->{chunk_buffer} ) >= $chunk_len ) { |
171 | # Strip chunk len |
172 | $self->{chunk_buffer} =~ s/^([\da-fA-F]+).*\x0D\x0A//; |
173 | |
174 | # Pull chunk data out of chunk buffer into real buffer |
175 | $self->{buffer} .= substr $self->{chunk_buffer}, 0, $chunk_len, ''; |
176 | |
177 | # Strip remaining CRLF |
178 | $self->{chunk_buffer} =~ s/^\x0D\x0A//; |
179 | |
180 | $self->{length} += $chunk_len; |
181 | } |
182 | else { |
183 | # Not enough data for this chunk, wait for more calls to add() |
184 | return; |
185 | } |
186 | } |
187 | |
188 | unless ( $self->{state} eq 'done' ) { |
189 | $self->spin; |
190 | } |
191 | } |
192 | |
193 | return; |
194 | } |
195 | |
304dca13 |
196 | my $cl = $self->content_length; |
7e2df1d9 |
197 | |
58050177 |
198 | if ( defined $_[0] ) { |
7e2df1d9 |
199 | $self->{length} += length( $_[0] ); |
304dca13 |
200 | |
201 | # Don't allow buffer data to exceed content-length |
202 | if ( $self->{length} > $cl ) { |
203 | $_[0] = substr $_[0], 0, $cl - $self->{length}; |
204 | $self->{length} = $cl; |
205 | } |
206 | |
207 | $self->{buffer} .= $_[0]; |
58050177 |
208 | } |
aac7ca02 |
209 | |
7e2df1d9 |
210 | unless ( $self->state eq 'done' ) { |
211 | $self->spin; |
212 | } |
213 | |
304dca13 |
214 | return ( $self->length - $cl ); |
32b29b79 |
215 | } |
216 | |
aac7ca02 |
217 | =item body |
218 | |
6153c112 |
219 | accessor for the body. |
220 | |
aac7ca02 |
221 | =cut |
222 | |
32b29b79 |
223 | sub body { |
224 | my $self = shift; |
225 | $self->{body} = shift if @_; |
226 | return $self->{body}; |
227 | } |
228 | |
0a66fd23 |
229 | =item chunked |
aac7ca02 |
230 | |
0a66fd23 |
231 | Returns 1 if the request is chunked. |
6153c112 |
232 | |
aac7ca02 |
233 | =cut |
234 | |
0a66fd23 |
235 | sub chunked { |
236 | return shift->{chunked}; |
58050177 |
237 | } |
238 | |
b1da105b |
239 | =item cleanup |
240 | |
241 | Set to 1 to enable automatic deletion of temporary files at DESTROY-time. |
242 | |
243 | =cut |
244 | |
245 | sub cleanup { |
246 | my $self = shift; |
247 | $self->{cleanup} = shift if @_; |
248 | return $self->{cleanup}; |
249 | } |
250 | |
aac7ca02 |
251 | =item content_length |
252 | |
0a66fd23 |
253 | Returns the content-length for the body data if known. |
254 | Returns -1 if the request is chunked. |
6153c112 |
255 | |
aac7ca02 |
256 | =cut |
257 | |
32b29b79 |
258 | sub content_length { |
259 | return shift->{content_length}; |
260 | } |
261 | |
aac7ca02 |
262 | =item content_type |
263 | |
0a66fd23 |
264 | Returns the content-type of the body data. |
6153c112 |
265 | |
aac7ca02 |
266 | =cut |
267 | |
32b29b79 |
268 | sub content_type { |
269 | return shift->{content_type}; |
270 | } |
271 | |
aac7ca02 |
272 | =item init |
273 | |
6153c112 |
274 | return self. |
275 | |
aac7ca02 |
276 | =cut |
277 | |
58050177 |
278 | sub init { |
279 | return $_[0]; |
280 | } |
281 | |
aac7ca02 |
282 | =item length |
283 | |
0a66fd23 |
284 | Returns the total length of data we expect to read if known. |
285 | In the case of a chunked request, returns the amount of data |
286 | read so far. |
6153c112 |
287 | |
aac7ca02 |
288 | =cut |
289 | |
58050177 |
290 | sub length { |
291 | return shift->{length}; |
292 | } |
293 | |
0a66fd23 |
294 | =item trailing_headers |
295 | |
296 | If a chunked request body had trailing headers, trailing_headers will |
297 | return an HTTP::Headers object populated with those headers. |
298 | |
299 | =cut |
300 | |
301 | sub trailing_headers { |
302 | return shift->{trailing_headers}; |
303 | } |
304 | |
aac7ca02 |
305 | =item spin |
306 | |
6153c112 |
307 | Abstract method to spin the io handle. |
308 | |
aac7ca02 |
309 | =cut |
310 | |
58050177 |
311 | sub spin { |
312 | Carp::croak('Define abstract method spin() in implementation'); |
313 | } |
314 | |
aac7ca02 |
315 | =item state |
316 | |
0a66fd23 |
317 | Returns the current state of the parser. |
6153c112 |
318 | |
aac7ca02 |
319 | =cut |
320 | |
7e2df1d9 |
321 | sub state { |
322 | my $self = shift; |
323 | $self->{state} = shift if @_; |
aac7ca02 |
324 | return $self->{state}; |
325 | } |
326 | |
aac7ca02 |
327 | =item param |
328 | |
0a66fd23 |
329 | Get/set body parameters. |
6153c112 |
330 | |
aac7ca02 |
331 | =cut |
332 | |
32b29b79 |
333 | sub param { |
334 | my $self = shift; |
335 | |
336 | if ( @_ == 2 ) { |
337 | |
338 | my ( $name, $value ) = @_; |
339 | |
340 | if ( exists $self->{param}->{$name} ) { |
341 | for ( $self->{param}->{$name} ) { |
342 | $_ = [$_] unless ref($_) eq "ARRAY"; |
343 | push( @$_, $value ); |
344 | } |
345 | } |
346 | else { |
347 | $self->{param}->{$name} = $value; |
348 | } |
08160cca |
349 | |
350 | push @{$self->{param_order}}, $name; |
32b29b79 |
351 | } |
352 | |
353 | return $self->{param}; |
354 | } |
355 | |
aac7ca02 |
356 | =item upload |
357 | |
0a66fd23 |
358 | Get/set file uploads. |
359 | |
aac7ca02 |
360 | =cut |
361 | |
32b29b79 |
362 | sub upload { |
363 | my $self = shift; |
364 | |
365 | if ( @_ == 2 ) { |
366 | |
367 | my ( $name, $upload ) = @_; |
368 | |
369 | if ( exists $self->{upload}->{$name} ) { |
370 | for ( $self->{upload}->{$name} ) { |
371 | $_ = [$_] unless ref($_) eq "ARRAY"; |
372 | push( @$_, $upload ); |
373 | } |
374 | } |
375 | else { |
376 | $self->{upload}->{$name} = $upload; |
377 | } |
378 | } |
379 | |
380 | return $self->{upload}; |
381 | } |
382 | |
3debb7c0 |
383 | =item tmpdir |
384 | |
385 | Specify a different path for temporary files. Defaults to the system temporary path. |
386 | |
387 | =cut |
388 | |
389 | sub tmpdir { |
390 | my $self = shift; |
391 | $self->{tmpdir} = shift if @_; |
392 | return $self->{tmpdir}; |
393 | } |
394 | |
08160cca |
395 | =item param_order |
396 | |
397 | Returns the array ref of the param keys in the order how they appeared on the body |
398 | |
399 | =cut |
400 | |
401 | sub param_order { |
402 | return shift->{param_order}; |
403 | } |
404 | |
aac7ca02 |
405 | =back |
406 | |
e0c37f8e |
407 | =head1 SUPPORT |
408 | |
409 | Since its original creation this module has been taken over by the Catalyst |
410 | development team. If you want to contribute patches, these will be your |
411 | primary contact points: |
412 | |
413 | IRC: |
414 | |
415 | Join #catalyst-dev on irc.perl.org. |
416 | |
417 | Mailing Lists: |
418 | |
419 | http://lists.scsys.co.uk/cgi-bin/mailman/listinfo/catalyst-dev |
420 | |
aac7ca02 |
421 | =head1 AUTHOR |
422 | |
f994d0c8 |
423 | Christian Hansen, C<chansen@cpan.org> |
17c3e9b3 |
424 | |
425 | Sebastian Riedel, C<sri@cpan.org> |
aac7ca02 |
426 | |
0a66fd23 |
427 | Andy Grundman, C<andy@hybridized.org> |
428 | |
e0c37f8e |
429 | =head1 CONTRIBUTORS |
430 | |
431 | Simon Elliott C<cpan@papercreatures.com> |
432 | |
433 | Kent Fredric <kentnl@cpan.org> |
434 | |
435 | Christian Walde |
436 | |
08160cca |
437 | Torsten Raudssus <torsten@raudssus.de> |
438 | |
aac7ca02 |
439 | =head1 LICENSE |
440 | |
17c3e9b3 |
441 | This library is free software. You can redistribute it and/or modify |
aac7ca02 |
442 | it under the same terms as perl itself. |
443 | |
444 | =cut |
445 | |
32b29b79 |
446 | 1; |