Commit | Line | Data |
32b29b79 |
1 | package HTTP::Body; |
2 | |
3 | use strict; |
4 | |
348fdd5a |
5 | use Carp qw[ ]; |
32b29b79 |
6 | |
7e2df1d9 |
7 | our $TYPES = { |
4f5db602 |
8 | 'application/octet-stream' => 'HTTP::Body::OctetStream', |
9 | 'application/x-www-form-urlencoded' => 'HTTP::Body::UrlEncoded', |
5940e4c7 |
10 | 'multipart/form-data' => 'HTTP::Body::MultiPart', |
11 | 'multipart/related' => 'HTTP::Body::XFormsMultipart', |
e4ea9403 |
12 | 'application/xml' => 'HTTP::Body::XForms', |
13 | 'application/json' => 'HTTP::Body::OctetStream', |
32b29b79 |
14 | }; |
15 | |
b018320d |
16 | require HTTP::Body::OctetStream; |
17 | require HTTP::Body::UrlEncoded; |
18 | require HTTP::Body::MultiPart; |
5940e4c7 |
19 | require HTTP::Body::XFormsMultipart; |
20 | require HTTP::Body::XForms; |
b018320d |
21 | |
0a66fd23 |
22 | use HTTP::Headers; |
23 | use HTTP::Message; |
24 | |
aac7ca02 |
25 | =head1 NAME |
26 | |
27 | HTTP::Body - HTTP Body Parser |
28 | |
29 | =head1 SYNOPSIS |
30 | |
31 | use HTTP::Body; |
17c3e9b3 |
32 | |
33 | sub handler : method { |
34 | my ( $class, $r ) = @_; |
35 | |
36 | my $content_type = $r->headers_in->get('Content-Type'); |
37 | my $content_length = $r->headers_in->get('Content-Length'); |
38 | |
39 | my $body = HTTP::Body->new( $content_type, $content_length ); |
40 | my $length = $content_length; |
41 | |
42 | while ( $length ) { |
43 | |
44 | $r->read( my $buffer, ( $length < 8192 ) ? $length : 8192 ); |
45 | |
46 | $length -= length($buffer); |
47 | |
48 | $body->add($buffer); |
49 | } |
50 | |
08160cca |
51 | my $uploads = $body->upload; # hashref |
52 | my $params = $body->param; # hashref |
53 | my $param_order = $body->param_order # arrayref |
54 | my $body = $body->body; # IO::Handle |
17c3e9b3 |
55 | } |
aac7ca02 |
56 | |
57 | =head1 DESCRIPTION |
58 | |
e4ea9403 |
59 | HTTP::Body parses chunks of HTTP POST data and supports |
60 | application/octet-stream, application/json, application/x-www-form-urlencoded, |
61 | and multipart/form-data. |
6215b02b |
62 | |
0a66fd23 |
63 | Chunked bodies are supported by not passing a length value to new(). |
64 | |
cc75c886 |
65 | It is currently used by L<Catalyst>, L<Dancer>, L<Maypole>, L<Web::Simple> and |
66 | L<Jedi>. |
aac7ca02 |
67 | |
1ced50e0 |
68 | =head1 NOTES |
69 | |
70 | When parsing multipart bodies, temporary files are created to store any |
71 | uploaded files. You must delete these temporary files yourself after |
cc75c886 |
72 | processing them, or set $body->cleanup(1) to automatically delete them at |
73 | DESTROY-time. |
74 | |
75 | With version 1.23, we have changed the basic behavior of how temporary files |
76 | are prepared for uploads. The extension of the file is no longer transferred |
77 | to the temporary file, the extension will always be C<.upload>. We have also |
78 | introduced variables that make it possible to set the behavior as required. |
79 | |
80 | =over 4 |
81 | |
82 | =item $HTTP::Body::MultiPart::file_temp_suffix |
83 | |
84 | This is the extension that is given to all multipart files. The default |
85 | setting here is C<.upload>. If you want the old behavior from before version |
86 | 1.23, simply undefine the value here. |
87 | |
88 | =item $HTTP::Body::MultiPart::basename_regexp |
89 | |
90 | This is the regexp used to determine out the file extension. This is of |
91 | course no longer necessary, unless you undefine |
92 | C<HTTP::Body::MultiPart::file_temp_suffix>. |
93 | |
94 | =item $HTTP::Body::MultiPart::file_temp_template |
95 | |
96 | This gets passed through to the L<File::Temp> TEMPLATE parameter. There is no |
97 | special default in our module. |
98 | |
99 | =item %HTTP::Body::MultiPart::file_temp_parameters |
100 | |
101 | In this hash you can add up custom settings for the L<File::Temp> invokation. |
102 | Those override every other setting. |
103 | |
104 | =back |
1ced50e0 |
105 | |
aac7ca02 |
106 | =head1 METHODS |
107 | |
6153c112 |
108 | =over 4 |
109 | |
110 | =item new |
111 | |
112 | Constructor. Takes content type and content length as parameters, |
113 | returns a L<HTTP::Body> object. |
aac7ca02 |
114 | |
115 | =cut |
116 | |
32b29b79 |
117 | sub new { |
118 | my ( $class, $content_type, $content_length ) = @_; |
119 | |
0a66fd23 |
120 | unless ( @_ >= 2 ) { |
121 | Carp::croak( $class, '->new( $content_type, [ $content_length ] )' ); |
32b29b79 |
122 | } |
7e2df1d9 |
123 | |
27ee4e94 |
124 | my $type; |
e4ea9403 |
125 | my $earliest_index; |
27ee4e94 |
126 | foreach my $supported ( keys %{$TYPES} ) { |
e4ea9403 |
127 | my $index = index( lc($content_type), $supported ); |
128 | if ($index >= 0 && (!defined $earliest_index || $index < $earliest_index)) { |
129 | $type = $supported; |
130 | $earliest_index = $index; |
27ee4e94 |
131 | } |
132 | } |
133 | |
7e2df1d9 |
134 | my $body = $TYPES->{ $type || 'application/octet-stream' }; |
135 | |
32b29b79 |
136 | my $self = { |
b1da105b |
137 | cleanup => 0, |
32b29b79 |
138 | buffer => '', |
0a66fd23 |
139 | chunk_buffer => '', |
44761c00 |
140 | body => undef, |
0a66fd23 |
141 | chunked => !defined $content_length, |
142 | content_length => defined $content_length ? $content_length : -1, |
32b29b79 |
143 | content_type => $content_type, |
58050177 |
144 | length => 0, |
7e2df1d9 |
145 | param => {}, |
08160cca |
146 | param_order => [], |
7e2df1d9 |
147 | state => 'buffering', |
3debb7c0 |
148 | upload => {}, |
2f14a496 |
149 | part_data => {}, |
3debb7c0 |
150 | tmpdir => File::Spec->tmpdir(), |
32b29b79 |
151 | }; |
152 | |
153 | bless( $self, $body ); |
7e2df1d9 |
154 | |
32b29b79 |
155 | return $self->init; |
156 | } |
157 | |
b1da105b |
158 | sub DESTROY { |
159 | my $self = shift; |
160 | |
161 | if ( $self->{cleanup} ) { |
162 | my @temps = (); |
163 | for my $upload ( values %{ $self->{upload} } ) { |
164 | push @temps, map { $_->{tempname} || () } |
165 | ( ref $upload eq 'ARRAY' ? @{$upload} : $upload ); |
166 | } |
167 | |
168 | unlink map { $_ } grep { -e $_ } @temps; |
169 | } |
170 | } |
171 | |
aac7ca02 |
172 | =item add |
173 | |
4deaf0f0 |
174 | Add string to internal buffer. Will call spin unless done. returns |
6153c112 |
175 | length before adding self. |
176 | |
aac7ca02 |
177 | =cut |
178 | |
32b29b79 |
179 | sub add { |
58050177 |
180 | my $self = shift; |
304dca13 |
181 | |
0a66fd23 |
182 | if ( $self->{chunked} ) { |
183 | $self->{chunk_buffer} .= $_[0]; |
184 | |
185 | while ( $self->{chunk_buffer} =~ m/^([\da-fA-F]+).*\x0D\x0A/ ) { |
186 | my $chunk_len = hex($1); |
187 | |
188 | if ( $chunk_len == 0 ) { |
189 | # Strip chunk len |
190 | $self->{chunk_buffer} =~ s/^([\da-fA-F]+).*\x0D\x0A//; |
191 | |
192 | # End of data, there may be trailing headers |
193 | if ( my ($headers) = $self->{chunk_buffer} =~ m/(.*)\x0D\x0A/s ) { |
194 | if ( my $message = HTTP::Message->parse( $headers ) ) { |
195 | $self->{trailing_headers} = $message->headers; |
196 | } |
197 | } |
198 | |
199 | $self->{chunk_buffer} = ''; |
200 | |
201 | # Set content_length equal to the amount of data we read, |
202 | # so the spin methods can finish up. |
203 | $self->{content_length} = $self->{length}; |
204 | } |
205 | else { |
206 | # Make sure we have the whole chunk in the buffer (+CRLF) |
207 | if ( length( $self->{chunk_buffer} ) >= $chunk_len ) { |
208 | # Strip chunk len |
209 | $self->{chunk_buffer} =~ s/^([\da-fA-F]+).*\x0D\x0A//; |
210 | |
211 | # Pull chunk data out of chunk buffer into real buffer |
212 | $self->{buffer} .= substr $self->{chunk_buffer}, 0, $chunk_len, ''; |
213 | |
214 | # Strip remaining CRLF |
215 | $self->{chunk_buffer} =~ s/^\x0D\x0A//; |
216 | |
217 | $self->{length} += $chunk_len; |
218 | } |
219 | else { |
220 | # Not enough data for this chunk, wait for more calls to add() |
221 | return; |
222 | } |
223 | } |
224 | |
225 | unless ( $self->{state} eq 'done' ) { |
226 | $self->spin; |
227 | } |
228 | } |
229 | |
230 | return; |
231 | } |
232 | |
304dca13 |
233 | my $cl = $self->content_length; |
7e2df1d9 |
234 | |
58050177 |
235 | if ( defined $_[0] ) { |
7e2df1d9 |
236 | $self->{length} += length( $_[0] ); |
304dca13 |
237 | |
238 | # Don't allow buffer data to exceed content-length |
239 | if ( $self->{length} > $cl ) { |
240 | $_[0] = substr $_[0], 0, $cl - $self->{length}; |
241 | $self->{length} = $cl; |
242 | } |
243 | |
244 | $self->{buffer} .= $_[0]; |
58050177 |
245 | } |
aac7ca02 |
246 | |
7e2df1d9 |
247 | unless ( $self->state eq 'done' ) { |
248 | $self->spin; |
249 | } |
250 | |
304dca13 |
251 | return ( $self->length - $cl ); |
32b29b79 |
252 | } |
253 | |
aac7ca02 |
254 | =item body |
255 | |
6153c112 |
256 | accessor for the body. |
257 | |
aac7ca02 |
258 | =cut |
259 | |
32b29b79 |
260 | sub body { |
261 | my $self = shift; |
262 | $self->{body} = shift if @_; |
263 | return $self->{body}; |
264 | } |
265 | |
0a66fd23 |
266 | =item chunked |
aac7ca02 |
267 | |
0a66fd23 |
268 | Returns 1 if the request is chunked. |
6153c112 |
269 | |
aac7ca02 |
270 | =cut |
271 | |
0a66fd23 |
272 | sub chunked { |
273 | return shift->{chunked}; |
58050177 |
274 | } |
275 | |
b1da105b |
276 | =item cleanup |
277 | |
278 | Set to 1 to enable automatic deletion of temporary files at DESTROY-time. |
279 | |
280 | =cut |
281 | |
282 | sub cleanup { |
283 | my $self = shift; |
284 | $self->{cleanup} = shift if @_; |
285 | return $self->{cleanup}; |
286 | } |
287 | |
aac7ca02 |
288 | =item content_length |
289 | |
0a66fd23 |
290 | Returns the content-length for the body data if known. |
291 | Returns -1 if the request is chunked. |
6153c112 |
292 | |
aac7ca02 |
293 | =cut |
294 | |
32b29b79 |
295 | sub content_length { |
296 | return shift->{content_length}; |
297 | } |
298 | |
aac7ca02 |
299 | =item content_type |
300 | |
0a66fd23 |
301 | Returns the content-type of the body data. |
6153c112 |
302 | |
aac7ca02 |
303 | =cut |
304 | |
32b29b79 |
305 | sub content_type { |
306 | return shift->{content_type}; |
307 | } |
308 | |
aac7ca02 |
309 | =item init |
310 | |
6153c112 |
311 | return self. |
312 | |
aac7ca02 |
313 | =cut |
314 | |
58050177 |
315 | sub init { |
316 | return $_[0]; |
317 | } |
318 | |
aac7ca02 |
319 | =item length |
320 | |
0a66fd23 |
321 | Returns the total length of data we expect to read if known. |
322 | In the case of a chunked request, returns the amount of data |
323 | read so far. |
6153c112 |
324 | |
aac7ca02 |
325 | =cut |
326 | |
58050177 |
327 | sub length { |
328 | return shift->{length}; |
329 | } |
330 | |
0a66fd23 |
331 | =item trailing_headers |
332 | |
333 | If a chunked request body had trailing headers, trailing_headers will |
334 | return an HTTP::Headers object populated with those headers. |
335 | |
336 | =cut |
337 | |
338 | sub trailing_headers { |
339 | return shift->{trailing_headers}; |
340 | } |
341 | |
aac7ca02 |
342 | =item spin |
343 | |
6153c112 |
344 | Abstract method to spin the io handle. |
345 | |
aac7ca02 |
346 | =cut |
347 | |
58050177 |
348 | sub spin { |
349 | Carp::croak('Define abstract method spin() in implementation'); |
350 | } |
351 | |
aac7ca02 |
352 | =item state |
353 | |
0a66fd23 |
354 | Returns the current state of the parser. |
6153c112 |
355 | |
aac7ca02 |
356 | =cut |
357 | |
7e2df1d9 |
358 | sub state { |
359 | my $self = shift; |
360 | $self->{state} = shift if @_; |
aac7ca02 |
361 | return $self->{state}; |
362 | } |
363 | |
aac7ca02 |
364 | =item param |
365 | |
0a66fd23 |
366 | Get/set body parameters. |
6153c112 |
367 | |
aac7ca02 |
368 | =cut |
369 | |
32b29b79 |
370 | sub param { |
371 | my $self = shift; |
372 | |
373 | if ( @_ == 2 ) { |
374 | |
375 | my ( $name, $value ) = @_; |
376 | |
377 | if ( exists $self->{param}->{$name} ) { |
378 | for ( $self->{param}->{$name} ) { |
379 | $_ = [$_] unless ref($_) eq "ARRAY"; |
380 | push( @$_, $value ); |
381 | } |
382 | } |
383 | else { |
384 | $self->{param}->{$name} = $value; |
385 | } |
08160cca |
386 | |
387 | push @{$self->{param_order}}, $name; |
32b29b79 |
388 | } |
389 | |
390 | return $self->{param}; |
391 | } |
392 | |
aac7ca02 |
393 | =item upload |
394 | |
0a66fd23 |
395 | Get/set file uploads. |
396 | |
aac7ca02 |
397 | =cut |
398 | |
32b29b79 |
399 | sub upload { |
400 | my $self = shift; |
401 | |
402 | if ( @_ == 2 ) { |
403 | |
404 | my ( $name, $upload ) = @_; |
405 | |
406 | if ( exists $self->{upload}->{$name} ) { |
407 | for ( $self->{upload}->{$name} ) { |
408 | $_ = [$_] unless ref($_) eq "ARRAY"; |
409 | push( @$_, $upload ); |
410 | } |
411 | } |
412 | else { |
413 | $self->{upload}->{$name} = $upload; |
414 | } |
415 | } |
416 | |
417 | return $self->{upload}; |
418 | } |
419 | |
2f14a496 |
420 | =item part_data |
421 | |
422 | Just like 'param' but gives you a hash of the full data associated with the |
423 | part in a multipart type POST/PUT. Example: |
424 | |
425 | { |
426 | data => "test", |
427 | done => 1, |
428 | headers => { |
429 | "Content-Disposition" => "form-data; name=\"arg2\"", |
430 | "Content-Type" => "text/plain" |
431 | }, |
432 | name => "arg2", |
433 | size => 4 |
434 | } |
435 | |
436 | =cut |
437 | |
438 | sub part_data { |
439 | my $self = shift; |
440 | |
441 | if ( @_ == 2 ) { |
442 | |
443 | my ( $name, $data ) = @_; |
444 | |
445 | if ( exists $self->{part_data}->{$name} ) { |
446 | for ( $self->{part_data}->{$name} ) { |
447 | $_ = [$_] unless ref($_) eq "ARRAY"; |
448 | push( @$_, $data ); |
449 | } |
450 | } |
451 | else { |
452 | $self->{part_data}->{$name} = $data; |
453 | } |
454 | } |
455 | |
456 | return $self->{part_data}; |
457 | } |
458 | |
3debb7c0 |
459 | =item tmpdir |
460 | |
461 | Specify a different path for temporary files. Defaults to the system temporary path. |
462 | |
463 | =cut |
464 | |
465 | sub tmpdir { |
466 | my $self = shift; |
467 | $self->{tmpdir} = shift if @_; |
468 | return $self->{tmpdir}; |
469 | } |
470 | |
08160cca |
471 | =item param_order |
472 | |
473 | Returns the array ref of the param keys in the order how they appeared on the body |
474 | |
475 | =cut |
476 | |
477 | sub param_order { |
478 | return shift->{param_order}; |
479 | } |
480 | |
aac7ca02 |
481 | =back |
482 | |
e0c37f8e |
483 | =head1 SUPPORT |
484 | |
485 | Since its original creation this module has been taken over by the Catalyst |
cc75c886 |
486 | development team. If you need general support using this module: |
487 | |
488 | IRC: |
489 | |
490 | Join #catalyst on irc.perl.org. |
491 | |
492 | Mailing Lists: |
493 | |
494 | http://lists.scsys.co.uk/cgi-bin/mailman/listinfo/catalyst |
495 | |
496 | If you want to contribute patches, these will be your |
e0c37f8e |
497 | primary contact points: |
498 | |
499 | IRC: |
500 | |
501 | Join #catalyst-dev on irc.perl.org. |
502 | |
503 | Mailing Lists: |
504 | |
505 | http://lists.scsys.co.uk/cgi-bin/mailman/listinfo/catalyst-dev |
506 | |
aac7ca02 |
507 | =head1 AUTHOR |
508 | |
f994d0c8 |
509 | Christian Hansen, C<chansen@cpan.org> |
17c3e9b3 |
510 | |
511 | Sebastian Riedel, C<sri@cpan.org> |
aac7ca02 |
512 | |
0a66fd23 |
513 | Andy Grundman, C<andy@hybridized.org> |
514 | |
e0c37f8e |
515 | =head1 CONTRIBUTORS |
516 | |
517 | Simon Elliott C<cpan@papercreatures.com> |
518 | |
cc75c886 |
519 | Kent Fredric C<kentnl@cpan.org> |
e0c37f8e |
520 | |
cc75c886 |
521 | Christian Walde C<walde.christian@gmail.com> |
e0c37f8e |
522 | |
cc75c886 |
523 | Torsten Raudssus C<torsten@raudssus.de> |
08160cca |
524 | |
aac7ca02 |
525 | =head1 LICENSE |
526 | |
17c3e9b3 |
527 | This library is free software. You can redistribute it and/or modify |
aac7ca02 |
528 | it under the same terms as perl itself. |
529 | |
530 | =cut |
531 | |
32b29b79 |
532 | 1; |