X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=blobdiff_plain;f=lib%2FCatalyst%2FRequest.pm;h=e937fb193cf09894657850ea0a2dc51e14283289;hp=f75319b699f23a5e2f9cd327ec80136f217ca88a;hb=3b1178b01f1d71618f6a52357ec91ab40e2fc5e3;hpb=ade3da0a3602627512932eef9af511247f11634f diff --git a/lib/Catalyst/Request.pm b/lib/Catalyst/Request.pm index f75319b..e937fb1 100644 --- a/lib/Catalyst/Request.pm +++ b/lib/Catalyst/Request.pm @@ -1,20 +1,25 @@ package Catalyst::Request; -use IO::Socket qw[AF_INET inet_aton]; +use Socket qw( getaddrinfo getnameinfo AI_NUMERICHOST NI_NAMEREQD NIx_NOSERV ); use Carp; use utf8; use URI::http; use URI::https; use URI::QueryParam; use HTTP::Headers; - +use Stream::Buffered; +use Hash::MultiValue; +use Scalar::Util; +use HTTP::Body; +use Catalyst::Exception; +use Catalyst::Request::PartData; use Moose; use namespace::clean -except => 'meta'; with 'MooseX::Emulate::Class::Accessor::Fast'; -has env => (is => 'ro', writer => '_set_env'); +has env => (is => 'ro', writer => '_set_env', predicate => '_has_env'); # XXX Deprecated crap here - warn? has action => (is => 'rw'); # XXX: Deprecated in docs ages ago (2006), deprecated with warning in 5.8000 due @@ -57,7 +62,7 @@ has query_keywords => (is => 'rw'); has match => (is => 'rw'); has method => (is => 'rw'); has protocol => (is => 'rw'); -has query_parameters => (is => 'rw', default => sub { {} }); +has query_parameters => (is => 'rw', lazy=>1, default => sub { shift->_use_hash_multivalue ? Hash::MultiValue->new : +{} }); has secure => (is => 'rw', default => 0); has captures => (is => 'rw', default => sub { [] }); has uri => (is => 'rw', predicate => 'has_uri'); @@ -92,27 +97,53 @@ has _log => ( ); has io_fh => ( - is=>'ro', - predicate=>'has_io_fh', - lazy=>1, - builder=>'_build_io_fh'); + is=>'ro', + predicate=>'_has_io_fh', + lazy=>1, + builder=>'_build_io_fh'); sub _build_io_fh { my $self = shift; return $self->env->{'psgix.io'} + || ( + $self->env->{'net.async.http.server.req'} && + $self->env->{'net.async.http.server.req'}->stream) ## Until I can make ioasync cabal see the value of supportin psgix.io (jnap) || die "Your Server does not support psgix.io"; }; -has body_fh => ( - is=>'ro', - predicate=>'has_body_fh', - lazy=>1, - builder=>'_build_body_fh'); +has data_handlers => ( is=>'ro', isa=>'HashRef', default=>sub { +{} } ); -sub _build_body_fh { - (my $input_fh = shift->env->{'psgi.input'})->seek(0, 0); - return $input_fh; -}; +has body_data => ( + is=>'ro', + lazy=>1, + builder=>'_build_body_data'); + +sub _build_body_data { + my ($self) = @_; + + # Not sure if these returns should not be exceptions... + my $content_type = $self->content_type || return; + return unless ($self->method eq 'POST' || $self->method eq 'PUT' || $self->method eq 'PATCH'); + + my ($match) = grep { $content_type =~/$_/i } + keys(%{$self->data_handlers}); + + if($match) { + my $fh = $self->body; + local $_ = $fh; + return $self->data_handlers->{$match}->($fh, $self); + } else { + Catalyst::Exception->throw( + sprintf '%s does not have an available data handler. Valid data_handlers are %s.', + $content_type, join ', ', sort keys %{$self->data_handlers} + ); + } +} + +has _use_hash_multivalue => ( + is=>'ro', + required=>1, + default=> sub {0}); # Amount of data to read from input on each pass our $CHUNKSIZE = 64 * 1024; @@ -152,6 +183,7 @@ has body_parameters => ( is => 'rw', required => 1, lazy => 1, + predicate => 'has_body_parameters', builder => 'prepare_body_parameters', ); @@ -182,13 +214,16 @@ sub prepare_parameters { return $self->parameters; } - - sub _build_parameters { my ( $self ) = @_; my $parameters = {}; my $body_parameters = $self->body_parameters; my $query_parameters = $self->query_parameters; + + if($self->_use_hash_multivalue) { + return Hash::MultiValue->new($query_parameters->flatten, $body_parameters->flatten); + } + # We copy, no references foreach my $name (keys %$query_parameters) { my $param = $query_parameters->{$name}; @@ -215,30 +250,71 @@ has _uploadtmp => ( sub prepare_body { my ( $self ) = @_; - if ( my $length = $self->_read_length ) { - unless ( $self->_body ) { - my $type = $self->header('Content-Type'); - $self->_body(HTTP::Body->new( $type, $length )); - $self->_body->cleanup(1); # Make extra sure! - $self->_body->tmpdir( $self->_uploadtmp ) - if $self->_has_uploadtmp; - } + # If previously applied middleware created the HTTP::Body object, then we + # just use that one. - # Check for definedness as you could read '0' - while ( defined ( my $buffer = $self->read() ) ) { - $self->prepare_body_chunk($buffer); - } + if(my $plack_body = $self->_has_env ? $self->env->{'plack.request.http.body'} : undef) { + $self->_body($plack_body); + $self->_body->cleanup(1); + return; + } - # paranoia against wrong Content-Length header - my $remaining = $length - $self->_read_position; - if ( $remaining > 0 ) { - Catalyst::Exception->throw( - "Wrong Content-Length value: $length" ); - } + # If there is nothing to read, set body to naught and return. This + # will cause all body code to be skipped + + return $self->_body(0) unless my $length = $self->_read_length; + + # Unless the body has already been set, create it. Not sure about this + # code, how else might it be set, but this was existing logic. + + unless ($self->_body) { + my $type = $self->header('Content-Type'); + $self->_body(HTTP::Body->new( $type, $length )); + $self->_body->cleanup(1); + + # JNAP: I'm not sure this is doing what we expect, but it also doesn't + # seem to be hurting (seems ->_has_uploadtmp is true more than I would + # expect. + + $self->_body->tmpdir( $self->_uploadtmp ) + if $self->_has_uploadtmp; } - else { - # Defined but will cause all body code to be skipped - $self->_body(0); + + # Ok if we get this far, we have to read psgi.input into the new body + # object. Lets play nice with any plack app or other downstream, so + # we create a buffer unless one exists. + + my $stream_buffer; + if ($self->env->{'psgix.input.buffered'}) { + # Be paranoid about previous psgi middleware or apps that read the + # input but didn't return the buffer to the start. + $self->env->{'psgi.input'}->seek(0, 0); + } else { + $stream_buffer = Stream::Buffered->new($length); + } + + # Check for definedness as you could read '0' + while ( defined ( my $chunk = $self->read() ) ) { + $self->prepare_body_chunk($chunk); + next unless $stream_buffer; + + $stream_buffer->print($chunk) + || die sprintf "Failed to write %d bytes to psgi.input file: $!", length( $chunk ); + } + + # Ok, we read the body. Lets play nice for any PSGI app down the pipe + + if ($stream_buffer) { + $self->env->{'psgix.input.buffered'} = 1; + $self->env->{'psgi.input'} = $stream_buffer->rewind; + } else { + $self->env->{'psgi.input'}->seek(0, 0); # Reset the buffer for downstream middleware or apps + } + + # paranoia against wrong Content-Length header + my $remaining = $length - $self->_read_position; + if ( $remaining > 0 ) { + Catalyst::Exception->throw("Wrong Content-Length value: $length" ); } } @@ -249,12 +325,60 @@ sub prepare_body_chunk { } sub prepare_body_parameters { - my ( $self ) = @_; - + my ( $self, $c ) = @_; + return $self->body_parameters if $self->has_body_parameters; $self->prepare_body if ! $self->_has_body; - return {} unless $self->_body; - return $self->_body->param; + unless($self->_body) { + my $return = $self->_use_hash_multivalue ? Hash::MultiValue->new : {}; + $self->body_parameters($return); + return $return; + } + + my $params; + my %part_data = %{$self->_body->part_data}; + if(scalar %part_data && !$c->config->{skip_complex_post_part_handling}) { + foreach my $key (keys %part_data) { + my $proto_value = $part_data{$key}; + my ($val, @extra) = (ref($proto_value)||'') eq 'ARRAY' ? @$proto_value : ($proto_value); + + $key = $c->_handle_param_unicode_decoding($key) + if ($c and $c->encoding and !$c->config->{skip_body_param_unicode_decoding}); + + if(@extra) { + $params->{$key} = [map { Catalyst::Request::PartData->build_from_part_data($c, $_) } ($val,@extra)]; + } else { + $params->{$key} = Catalyst::Request::PartData->build_from_part_data($c, $val); + } + } + } else { + $params = $self->_body->param; + + # If we have an encoding configured (like UTF-8) in general we expect a client + # to POST with the encoding we fufilled the request in. Otherwise don't do any + # encoding (good change wide chars could be in HTML entity style llike the old + # days -JNAP + + # so, now that HTTP::Body prepared the body params, we gotta 'walk' the structure + # and do any needed decoding. + + # This only does something if the encoding is set via the encoding param. Remember + # this is assuming the client is not bad and responds with what you provided. In + # general you can just use utf8 and get away with it. + # + # I need to see if $c is here since this also doubles as a builder for the object :( + + if($c and $c->encoding and !$c->config->{skip_body_param_unicode_decoding}) { + $params = $c->_handle_unicode_decoding($params); + } + } + + my $return = $self->_use_hash_multivalue ? + Hash::MultiValue->from_mixed($params) : + $params; + + $self->body_parameters($return) unless $self->has_body_parameters; + return $return; } sub prepare_connection { @@ -304,7 +428,7 @@ has _body => ( # and provide a custom reader.. sub body { my $self = shift; - $self->prepare_body unless ! $self->_has_body; + $self->prepare_body unless $self->_has_body; croak 'body is a reader' if scalar @_; return blessed $self->_body ? $self->_body->body : $self->_body; } @@ -315,7 +439,21 @@ has hostname => ( lazy => 1, default => sub { my ($self) = @_; - gethostbyaddr( inet_aton( $self->address ), AF_INET ) || $self->address + my ( $err, $sockaddr ) = getaddrinfo( + $self->address, + # no service + '', + { flags => AI_NUMERICHOST } + ); + return $self->address + if $err; + ( $err, my $hostname ) = getnameinfo( + $sockaddr->{addr}, + NI_NAMEREQD, + # we are only interested in the hostname, not the servicename + NIx_NOSERV + ); + return $err ? $self->address : $hostname; }, ); @@ -342,6 +480,7 @@ Catalyst::Request - provides information about the current client request $req->args; $req->base; $req->body; + $req->body_data; $req->body_parameters; $req->content_encoding; $req->content_length; @@ -370,6 +509,7 @@ Catalyst::Request - provides information about the current client request $req->uri; $req->user; $req->user_agent; + $req->env; See also L, L. @@ -424,6 +564,21 @@ Returns the message body of the request, as returned by L: a string, unless Content-Type is C, C, or C, in which case a L object is returned. +=head2 $req->body_data + +Returns a Perl representation of POST/PUT body data that is not classic HTML +form data, such as JSON, XML, etc. By default, Catalyst will parse incoming +data of the type 'application/json' and return access to that data via this +method. You may define addition data_handlers via a global configuration +setting. See L for more information. + +If the POST is malformed in some way (such as undefined or not content that +matches the content-type) we raise a L with the error +text as the message. + +If the POSTed content type does not match an available data handler, this +will also raise an exception. + =head2 $req->body_parameters Returns a reference to a hash containing body (POST) parameters. Values can @@ -434,6 +589,16 @@ be either a scalar or an arrayref containing scalars. These are the parameters from the POST part of the request, if any. +B If your POST is multipart, but contains non file upload parts (such +as an line part with an alternative encoding or content type) we do our best to +try and figure out how the value should be presented. If there's a specified character +set we will use that to decode rather than the default encoding set by the application. +However if there are complex headers and we cannot determine +the correct way to extra a meaningful value from the upload, in this case any +part like this will be represented as an instance of L. + +Patches and review of this part of the code welcomed. + =head2 $req->body_params Shortcut for body_parameters. @@ -558,6 +723,60 @@ If multiple C parameters are provided this code might corrupt data or cause a hash initialization error. For a more straightforward interface see C<< $c->req->parameters >>. +B Interfaces like this, which are based on L and the C method +are known to cause demonstrated exploits. It is highly recommended that you +avoid using this method, and migrate existing code away from it. Here's a +whitepaper of the exploit: + +L + +B Further discussion on IRC indicate that the L core team from 'back then' +were well aware of this hack and this is the main reason we added the new approach to +getting parameters in the first place. + +Basically this is an exploit that takes advantage of how L<\param> will do one thing +in scalar context and another thing in list context. This is combined with how Perl +chooses to deal with duplicate keys in a hash definition by overwriting the value of +existing keys with a new value if the same key shows up again. Generally you will be +vulnerable to this exploit if you are using this method in a direct assignment in a +hash, such as with a L create statement. For example, if you have +parameters like: + + user?user=123&foo=a&foo=user&foo=456 + +You could end up with extra parameters injected into your method calls: + + $c->model('User')->create({ + user => $c->req->param('user'), + foo => $c->req->param('foo'), + }); + +Which would look like: + + $c->model('User')->create({ + user => 123, + foo => qw(a user 456), + }); + +(or to be absolutely clear if you are not seeing it): + + $c->model('User')->create({ + user => 456, + foo => 'a', + }); + +Possible remediations include scrubbing your parameters with a form validator like +L or being careful to force scalar context using the scalar +keyword: + + $c->model('User')->create({ + user => scalar($c->req->param('user')), + foo => scalar($c->req->param('foo')), + }); + +Upcoming versions of L will disable this interface by default and require +you to positively enable it should you require it for backwards compatibility reasons. + =cut sub param { @@ -567,9 +786,15 @@ sub param { return keys %{ $self->parameters }; } - if ( @_ == 1 ) { + # If anything in @_ is undef, carp about that, and remove it from + # the list; + + my @params = grep { defined($_) ? 1 : do {carp "You called ->params with an undefined value"; 0} } @_; - my $param = shift; + if ( @params == 1 ) { + + defined(my $param = shift @params) || + carp "You called ->params with an undefined value 2"; unless ( exists $self->parameters->{$param} ) { return wantarray ? () : undef; @@ -586,9 +811,9 @@ sub param { : $self->parameters->{$param}; } } - elsif ( @_ > 1 ) { - my $field = shift; - $self->parameters->{$field} = [@_]; + elsif ( @params > 1 ) { + my $field = shift @params; + $self->parameters->{$field} = [@params]; } } @@ -793,7 +1018,7 @@ sub mangle_params { next unless defined $value; for ( ref $value eq 'ARRAY' ? @$value : $value ) { $_ = "$_"; - utf8::encode( $_ ) if utf8::is_utf8($_); + # utf8::encode($_); } }; @@ -912,6 +1137,9 @@ combined from those in the request and those in the body. If parameters have already been set will clear the parameters and build them again. +=head2 $self->env + +Access to the raw PSGI env. =head2 meta