From: John Napiorkowski Date: Wed, 25 Mar 2015 18:33:20 +0000 (-0500) Subject: fix for bug around evil query params and docs X-Git-Tag: 5.90085~2 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=commitdiff_plain;h=6cf77e11ef210219fbbe19df5f5b7cd7c84f501c;hp=9f3e69eeed571171a639f0af2a627da4baf9fc93 fix for bug around evil query params and docs --- diff --git a/Changes b/Changes index 6a971df..595d249 100644 --- a/Changes +++ b/Changes @@ -10,6 +10,9 @@ You may use the application configuration setting "use_chained_args_0_special_case" to disable this new behavior, if you must for back-compat reasons. - Added PATCH HTTP Method action attribute shortcut. + - Several new configuration options aimed to give improved backwards compatibility + for when your URL query parameters or keywords have non UTF-8 encodings. + See Catalyst::Upgrading. 5.90084 - 2015-02-23 - Small change to the way body parameters are created in order to prevent diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm index 4d28b83..03b49c6 100644 --- a/lib/Catalyst.pm +++ b/lib/Catalyst.pm @@ -3904,6 +3904,39 @@ parameter to true. =item * +C + +If true, then do not try to character decode any wide characters in your +request URL query or keywords. Most readings of the relevent specifications +suggest these should be UTF-* encoded, which is the default that L +will use, hwoever if you are creating a lot of URLs manually or have external +evil clients, this might cause you trouble. If you find the changes introduced +in Catalyst version 5.90080+ break some of your query code, you may disable +the UTF-8 decoding globally using this configuration. + +This setting takes precedence over C and +C + +=item * + +C + +By default we decode query and keywords in your request URL using UTF-8, which +is our reading of the relevent specifications. This setting allows one to +specify a fixed value for how to decode your query. You might need this if +you are doing a lot of custom encoding of your URLs and not using UTF-8. + +This setting take precedence over C. + +=item * + +C + +Setting this to true will default your query decoding to whatever your +general global encoding is (the default is UTF-8). + +=item * + C In older versions of Catalyst, when more than one action matched the same path diff --git a/lib/Catalyst/Engine.pm b/lib/Catalyst/Engine.pm index 44c9c12..e4deb9e 100644 --- a/lib/Catalyst/Engine.pm +++ b/lib/Catalyst/Engine.pm @@ -10,7 +10,7 @@ use HTML::Entities; use HTTP::Headers; use Plack::Loader; use Catalyst::EngineLoader; -use Encode 2.21 'decode_utf8'; +use Encode 2.21 'decode_utf8', 'encode', 'decode'; use Plack::Request::Upload; use Hash::MultiValue; use namespace::clean -except => 'meta'; @@ -573,6 +573,17 @@ process the query string and extract query parameters. sub prepare_query_parameters { my ($self, $c) = @_; my $env = $c->request->env; + my $do_not_decode_query = $c->config->{do_not_decode_query}; + my $default_query_encoding = $c->config->{default_query_encoding} || + ($c->config->{decode_query_using_global_encoding} ? + $c->encoding : 'UTF-8'); + + my $decoder = sub { + my $str = shift; + return $str if $do_not_decode_query; + return $str unless $default_query_encoding; + return decode( $default_query_encoding, $str); + }; my $query_string = exists $env->{QUERY_STRING} ? $env->{QUERY_STRING} @@ -582,7 +593,7 @@ sub prepare_query_parameters { # (yes, index() is faster than a regex :)) if ( index( $query_string, '=' ) < 0 ) { my $keywords = $self->unescape_uri($query_string); - $keywords = decode_utf8 $keywords; + $keywords = $decoder->($keywords); $c->request->query_keywords($keywords); return; } @@ -590,7 +601,7 @@ sub prepare_query_parameters { $query_string =~ s/\A[&;]+//; my $p = Hash::MultiValue->new( - map { defined $_ ? decode_utf8($self->unescape_uri($_)) : $_ } + map { defined $_ ? $decoder->($self->unescape_uri($_)) : $_ } map { ( split /=/, $_, 2 )[0,1] } # slice forces two elements split /[&;]+/, $query_string ); diff --git a/lib/Catalyst/Upgrading.pod b/lib/Catalyst/Upgrading.pod index 79a90ec..e210bbb 100644 --- a/lib/Catalyst/Upgrading.pod +++ b/lib/Catalyst/Upgrading.pod @@ -66,6 +66,40 @@ If this causes you trouble and you can't fix your code to conform, you may set t application configuration setting "use_chained_args_0_special_case" to true and that will revert you code to the previous behavior. +=head2 More backwards compatibility options with UTF-8 changes + +In order to give better backwards compatiblity with the 5.90080+ UTF-8 changes +we've added several configuration options around control of how we try to decode +your URL keywords / query parameters. + +C + +If true, then do not try to character decode any wide characters in your +request URL query or keywords. Most readings of the relevent specifications +suggest these should be UTF-* encoded, which is the default that L +will use, hwoever if you are creating a lot of URLs manually or have external +evil clients, this might cause you trouble. If you find the changes introduced +in Catalyst version 5.90080+ break some of your query code, you may disable +the UTF-8 decoding globally using this configuration. + +This setting takes precedence over C and +C + +C + +By default we decode query and keywords in your request URL using UTF-8, which +is our reading of the relevent specifications. This setting allows one to +specify a fixed value for how to decode your query. You might need this if +you are doing a lot of custom encoding of your URLs and not using UTF-8. + +This setting take precedence over C. + +C + +Setting this to true will default your query decoding to whatever your +general global encoding is (the default is UTF-8). + + =head1 Upgrading to Catalyst 5.90080 UTF8 encoding is now default. For temporary backwards compatibility, if this diff --git a/t/not_utf8_query_bug.t b/t/not_utf8_query_bug.t new file mode 100644 index 0000000..52578bc --- /dev/null +++ b/t/not_utf8_query_bug.t @@ -0,0 +1,45 @@ +use utf8; +use warnings; +use strict; + +# For reported: https://rt.cpan.org/Ticket/Display.html?id=103063 + +{ + package MyApp::Controller::Root; + $INC{'MyApp/Controller/Root.pm'} = __FILE__; + + use base 'Catalyst::Controller'; + + sub example :Local Args(0) { + pop->stash->{testing1} = 'testing2'; + } + + package MyApp; + use Catalyst; + + #MyApp->config(decode_query_using_global_encoding=>1, encoding => 'SHIFT_JIS'); + #MyApp->config(do_not_decode_query=>1); + #MyApp->config(decode_query_using_global_encoding=>1, encoding => undef); + MyApp->config(default_query_encoding=>'SHIFT_JIS'); + + MyApp->setup; +} + +use Test::More; +use Catalyst::Test 'MyApp'; +use Encode; +use HTTP::Request::Common; + +{ + my $shiftjs = 'test テスト'; + my $encoded = Encode::encode('SHIFT_JIS', $shiftjs); + + ok my $req = GET "/root/example?a=$encoded"; + my ($res, $c) = ctx_request $req; + + is $c->req->query_parameters->{'a'}, $shiftjs, 'got expected value'; +} + + +done_testing; + diff --git a/t/utf_incoming.t b/t/utf_incoming.t index c144a44..6342025 100644 --- a/t/utf_incoming.t +++ b/t/utf_incoming.t @@ -479,6 +479,16 @@ SKIP: { } +{ + my $shiftjs = 'test テスト'; + my $encoded = Encode::encode('UTF-8', $shiftjs); + + ok my $req = GET "/root/echo_arg?a=$encoded"; + my ($res, $c) = ctx_request $req; + + is $c->req->query_parameters->{'a'}, $shiftjs, 'got expected value'; +} + ## should we use binmode on filehandles to force the encoding...? ## Not sure what else to do with multipart here, if docs are enough...