From: John Napiorkowski Date: Mon, 23 Feb 2015 20:46:17 +0000 (-0600) Subject: when a POST supplies content encoding, dont assume UTF8 X-Git-Tag: 5.90084~4 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=commitdiff_plain;h=0d94e986178610515926806b405beeab19457a36;hp=ddc88fbd6b6db4088181ad8f59b4f74fea58a9ef when a POST supplies content encoding, dont assume UTF8 --- diff --git a/Changes b/Changes index 5f6f06f..168a0fb 100644 --- a/Changes +++ b/Changes @@ -1,8 +1,16 @@ # This file documents the revision history for Perl extension Catalyst. 5.90084 - 2015-02-XX + - Small change to the way body parameters are created in order to prevent + trying to create parameters twice. - Use new HTTP::Body and code updates to fix issue when POSTed params have - non UTF-8 charset encodings. Documentation about this. + non UTF-8 charset encodings or otherwise complex upload parts that are not + file uploads. Documentation about this. + - Two new application configuration parameters 'skip_body_param_unicode_decoding' + and 'skip_complex_post_part_handling' to assist you with any backward + compatibility issues with all the new UTF8 work in the most recent stable + Catalyst. You may use these settings to TEMPORARILY disable certain new + features while you are seeking a long term fix. 5.90083 - 2015-02-16 - Fixed typo in support for OPTIONS method matching (andre++) diff --git a/Makefile.PL b/Makefile.PL index a5ad60c..a0aeb41 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -42,7 +42,7 @@ requires 'Data::Dump'; requires 'Data::OptList'; requires 'HTML::Entities'; requires 'HTML::HeadParser'; -requires 'HTTP::Body' => '1.20'; +requires 'HTTP::Body' => '1.22'; requires 'HTTP::Headers' => '1.64'; requires 'HTTP::Request' => '5.814'; requires 'HTTP::Response' => '5.813'; diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm index b9c6d76..e384bbb 100644 --- a/lib/Catalyst.pm +++ b/lib/Catalyst.pm @@ -3233,6 +3233,7 @@ sub _handle_unicode_decoding { sub _handle_param_unicode_decoding { my ( $self, $value ) = @_; return unless defined $value; # not in love with just ignoring undefs - jnap + return $value if blessed($value); #don't decode when the value is an object. my $enc = $self->encoding; return try { @@ -3882,6 +3883,27 @@ backwardly compatible). =item * +C + +When creating body parameters from a POST, if we run into a multpart POST +that does not contain uploads, but instead contains inlined complex data +(very uncommon) we cannot reliably convert that into field => value pairs. So +instead we create an instance of L. If this causes +issue for you, you can disable this by setting C +to true (default is false). + +=item * + +C + +Generally we decode incoming POST params based on your declared encoding (the +default for this is to decode UTF-8). If this is causing you trouble and you +do not wish to turn all encoding support off (with the C configuration +parameter) you may disable this step atomically by setting this configuration +parameter to true. + +=item * + C - See L. =item * diff --git a/lib/Catalyst/Request.pm b/lib/Catalyst/Request.pm index 0cfcbae..53f9337 100644 --- a/lib/Catalyst/Request.pm +++ b/lib/Catalyst/Request.pm @@ -12,6 +12,7 @@ use Hash::MultiValue; use Scalar::Util; use HTTP::Body; use Catalyst::Exception; +use Catalyst::Request::PartData; use Moose; use namespace::clean -except => 'meta'; @@ -179,6 +180,7 @@ has body_parameters => ( is => 'rw', required => 1, lazy => 1, + predicate => 'has_body_parameters', builder => 'prepare_body_parameters', ); @@ -318,14 +320,31 @@ sub prepare_body_chunk { sub prepare_body_parameters { my ( $self, $c ) = @_; - + return $self->body_parameters if $self->has_body_parameters; $self->prepare_body if ! $self->_has_body; unless($self->_body) { - return $self->_use_hash_multivalue ? Hash::MultiValue->new : {}; + my $return = $self->_use_hash_multivalue ? Hash::MultiValue->new : {}; + $self->body_parameters($return); + return $return; } - my $params = $self->_body->param; + my $params; + my %part_data = %{$self->_body->part_data}; + if(scalar %part_data && !$c->config->{skip_complex_post_part_handling}) { + foreach my $key (keys %part_data) { + my $proto_value = $part_data{$key}; + my ($val, @extra) = (ref($proto_value)||'') eq 'ARRAY' ? @$proto_value : ($proto_value); + + if(@extra) { + $params->{$key} = [map { Catalyst::Request::PartData->build_from_part_data($_) } ($val,@extra)]; + } else { + $params->{$key} = Catalyst::Request::PartData->build_from_part_data($val); + } + } + } else { + $params = $self->_body->param; + } # If we have an encoding configured (like UTF-8) in general we expect a client # to POST with the encoding we fufilled the request in. Otherwise don't do any @@ -341,13 +360,16 @@ sub prepare_body_parameters { # # I need to see if $c is here since this also doubles as a builder for the object :( - if($c and $c->encoding) { + if($c and $c->encoding and !$c->config->{skip_body_param_unicode_decoding}) { $params = $c->_handle_unicode_decoding($params); } - return $self->_use_hash_multivalue ? + my $return = $self->_use_hash_multivalue ? Hash::MultiValue->from_mixed($params) : $params; + + $self->body_parameters($return) unless $self->has_body_parameters; + return $return; } sub prepare_connection { @@ -544,6 +566,11 @@ be either a scalar or an arrayref containing scalars. These are the parameters from the POST part of the request, if any. +B If your POST is multipart, but contains non file upload parts (such +as an line part with an alternative encoding or content type) we cannot determine +the correct way to extra a meaningful value from the upload. In this case any +part like this will be represented as an instance of L. + =head2 $req->body_params Shortcut for body_parameters. diff --git a/lib/Catalyst/Request/PartData.pm b/lib/Catalyst/Request/PartData.pm new file mode 100644 index 0000000..7089373 --- /dev/null +++ b/lib/Catalyst/Request/PartData.pm @@ -0,0 +1,97 @@ +package Catalyst::Request::PartData; + +use Moose; +use HTTP::Headers; + +has [qw/raw_data name size/] => (is=>'ro', required=>1); + +has headers => ( + is=>'ro', + required=>1, + handles=>[qw/content_type content_encoding content_type_charset/]); + +sub build_from_part_data { + my ($class, $part_data) = @_; + return $part_data->{data} unless $class->part_data_has_complex_headers($part_data); + return $class->new( + raw_data => $part_data->{data}, + name => $part_data->{name}, + size => $part_data->{size}, + headers => HTTP::Headers->new(%{ $part_data->{headers} })); +} + +sub part_data_has_complex_headers { + my ($class, $part_data) = @_; + return scalar keys %{$part_data->{headers}} > 1 ? 1:0; +} + +__PACKAGE__->meta->make_immutable; + +=head1 NAME + +Catalyst::Request::Upload - handles file upload requests + +=head1 SYNOPSIS + + my $data_part = + +To specify where Catalyst should put the temporary files, set the 'uploadtmp' +option in the Catalyst config. If unset, Catalyst will use the system temp dir. + + __PACKAGE__->config( uploadtmp => '/path/to/tmpdir' ); + +See also L. + +=head1 DESCRIPTION + +=head1 ATTRIBUTES + +This class defines the following immutable attributes + +=head2 raw_data + +The raw data as returned via L. + +=head2 name + +The part name that gets extracted from the content-disposition header. + +=head2 size + +The raw byte count (over http) of the data. This is not the same as the character +length + +=head2 headers + +An L object that represents the submitted headers of the POST. This +object will handle the following methods: + +=head3 content_type + +=head3 content_encoding + +=head3 content_type_charset + +These three methods are the same as methods described in L. + +=head1 METHODS + +=head2 build_from_part_data + +Factory method to build an object from part data returned by L + +=head2 part_data_has_complex_headers + +Returns true if there more than one header (indicates the part data is complex and +contains content type and encoding information.). + +=head1 AUTHORS + +Catalyst Contributors, see Catalyst.pm + +=head1 COPYRIGHT + +This library is free software. You can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut diff --git a/lib/Catalyst/Request/Upload.pm b/lib/Catalyst/Request/Upload.pm index 6df2dff..39bc4c0 100644 --- a/lib/Catalyst/Request/Upload.pm +++ b/lib/Catalyst/Request/Upload.pm @@ -135,8 +135,8 @@ is found. This also accepts an override encoding value that you can use to force a particular L layer. If neither are found the filehandle is set to :raw. -This is useful if you are pulling the file into code and inspecting bit and -maybe then sending those bits back as the response. (Please not this is not +This is useful if you are pulling the file into code and inspecting bits and +maybe then sending those bits back as the response. (Please note this is not a suitable filehandle to set in the body; use C if you are doing that). Please note that using this method sets the underlying filehandle IO layer diff --git a/lib/Catalyst/Upgrading.pod b/lib/Catalyst/Upgrading.pod index e6a16ab..ebfa2a3 100644 --- a/lib/Catalyst/Upgrading.pod +++ b/lib/Catalyst/Upgrading.pod @@ -15,6 +15,12 @@ UTF8 is enabled going forwards and the expectation is that other ecosystem projects will assume this as well. At some point you application will not correctly function without this setting. +As of 5.90084 we've added two additional configuration flags for more selective +control over some encoding changes: 'skip_body_param_unicode_decoding' and +'skip_complex_post_part_handling'. You may use these to more selectively +disable new features while you are seeking a long term fix. Please review +CONFIGURATION in L. + For further information, please see L A number of projects in the wider ecosystem required minor updates to be able diff --git a/t/utf_incoming.t b/t/utf_incoming.t index dbed329..332ff76 100644 --- a/t/utf_incoming.t +++ b/t/utf_incoming.t @@ -7,6 +7,7 @@ use HTTP::Message::PSGI (); use Encode 2.21 'decode_utf8', 'encode_utf8', 'encode'; use File::Spec; use JSON::MaybeXS; +use Scalar::Util (); # Test cases for incoming utf8 @@ -441,6 +442,7 @@ SKIP: { Content_Type => 'form-data', Content => [ arg0 => 'helloworld', + Encode::encode('UTF-8','♥') => Encode::encode('UTF-8','♥♥'), # Long form POST simple does not auto encode... arg1 => [ undef, '', 'Content-Type' =>'text/plain; charset=UTF-8', @@ -457,11 +459,20 @@ SKIP: { my ($res, $c) = ctx_request $req; + use Devel::Dwarn; + #Dwarn $c->req->body_parameters; + is $c->req->body_parameters->{'arg0'}, 'helloworld', 'got helloworld value'; + is $c->req->body_parameters->{'♥'}, '♥♥'; + + ok Scalar::Util::blessed($c->req->body_parameters->{'arg1'}); + ok Scalar::Util::blessed($c->req->body_parameters->{'arg2'}[0]); + ok Scalar::Util::blessed($c->req->body_parameters->{'arg2'}[1]); - # We expect catalyst to have decoded this - is $c->req->body_parameters->{'arg1'}, $utf8, 'decoded utf8 param'; - is $c->req->body_parameters->{'arg2'}[0], $shiftjs, 'decoded shiftjis param'; + # Since the form post is COMPLEX you are expected to decode it yourself. + is Encode::decode('UTF-8', $c->req->body_parameters->{'arg1'}->raw_data), $utf8, 'decoded utf8 param'; + is Encode::decode('SHIFT_JIS', $c->req->body_parameters->{'arg2'}[0]->raw_data), $shiftjs, 'decoded shiftjis param'; + is Encode::decode('SHIFT_JIS', $c->req->body_parameters->{'arg2'}[1]->raw_data), $shiftjs, 'decoded shiftjis param'; }