X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=blobdiff_plain;f=t%2Futf_incoming.t;h=8b88a0ef8e635be1ba429db48041afad03668a3d;hp=f556bf77ec54b55f6e1e7c8ab007bfe67a833a30;hb=9206d78d3c3c9439fcbc0b9dd79c552beebe5ff5;hpb=fe1dfeafc510d1778f8af6ebfb02cad570ccec6d diff --git a/t/utf_incoming.t b/t/utf_incoming.t index f556bf7..8b88a0e 100644 --- a/t/utf_incoming.t +++ b/t/utf_incoming.t @@ -3,6 +3,12 @@ use warnings; use strict; use Test::More; use HTTP::Request::Common; +use HTTP::Message::PSGI (); +use Encode 2.21 'decode_utf8', 'encode_utf8', 'encode'; +use File::Spec; +use JSON::MaybeXS; +use Data::Dumper; +use Scalar::Util (); # Test cases for incoming utf8 @@ -28,7 +34,7 @@ use HTTP::Request::Common; sub uri_for :Path('uri_for') { my ($self, $c) = @_; $c->response->content_type('text/html'); - $c->response->body("${\$c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'})}"); + $c->response->body("${\$c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥#X♥X', {'♥'=>'♥♥'})}"); } sub heart_with_arg :Path('a♥') Args(1) { @@ -59,6 +65,8 @@ use HTTP::Request::Common; Test::More::is $c->req->args->[0], '♥'; Test::More::is $c->req->captures->[0], '♥'; + Test::More::is $arg, '♥'; + Test::More::is length($arg), 1, "got length of one"; $c->response->body("

This is base-link action ♥ ${\$c->req->args->[0]}

"); @@ -79,23 +87,159 @@ use HTTP::Request::Common; $c->response->content_type('text/html'); my $writer = $c->res->write_fh; + $writer->write_encoded('

This is stream_write_fh action ♥

'); + $writer->close; + } + + # Stream a file with utf8 chars directly, you don't need to decode + sub stream_body_fh :Local { + my ($self, $c) = @_; + my $path = File::Spec->catfile('t', 'utf8.txt'); + open(my $fh, '<', $path) || die "trouble: $!"; + $c->response->content_type('text/html'); + $c->response->body($fh); + } + + # If you pull the file contents into a var, NOW you need to specify the + # IO encoding on the FH. Ultimately Plack at the end wants bytes... + sub stream_body_fh2 :Local { + my ($self, $c) = @_; + my $path = File::Spec->catfile('t', 'utf8.txt'); + open(my $fh, '<:encoding(UTF-8)', $path) || die "trouble: $!"; + my $contents = do { local $/; <$fh> }; + + $c->response->content_type('text/html'); + $c->response->body($contents); + } + + sub write_then_body :Local { + my ($self, $c) = @_; + + $c->res->content_type('text/html'); + $c->res->write("

This is early_write action ♥

"); + $c->res->body("

This is body_write action ♥

"); + } + + sub file_upload :POST Consumes(Multipart) Local { + my ($self, $c) = @_; + + Test::More::is $c->req->body_parameters->{'♥'}, '♥♥'; + Test::More::ok my $upload = $c->req->uploads->{file}; + Test::More::is $upload->charset, 'UTF-8'; + + my $text = $upload->slurp; + Test::More::is Encode::decode_utf8($text), "

This is stream_body_fh action ♥

\n"; - $writer->write("

This is stream_write_fh action ♥

"); - $writer->close("

This is stream_write_fh action ♥

"); + my $decoded_text = $upload->decoded_slurp; + Test::More::is $decoded_text, "

This is stream_body_fh action ♥

\n"; + + Test::More::is $upload->filename, '♥ttachment.txt'; + Test::More::is $upload->raw_basename, '♥ttachment.txt'; + + $c->response->content_type('text/html'); + $c->response->body($decoded_text); + } + + sub file_upload_utf8_param :POST Consumes(Multipart) Local { + my ($self, $c) = @_; + + Test::More::is $c->req->body_parameters->{'♥'}, '♥♥'; + Test::More::ok my $upload = $c->req->uploads->{'♥'}; + Test::More::is $upload->charset, 'UTF-8'; + + my $text = $upload->slurp; + Test::More::is Encode::decode_utf8($text), "

This is stream_body_fh action ♥

\n"; + + my $decoded_text = $upload->decoded_slurp; + Test::More::is $decoded_text, "

This is stream_body_fh action ♥

\n"; + + Test::More::is $upload->filename, '♥ttachment.txt'; + Test::More::is $upload->raw_basename, '♥ttachment.txt'; + + $c->response->content_type('text/html'); + $c->response->body($decoded_text); } + sub json :POST Consumes(JSON) Local { + my ($self, $c) = @_; + my $post = $c->req->body_data; + + Test::More::is $post->{'♥'}, '♥♥'; + Test::More::is length($post->{'♥'}), 2; + $c->response->content_type('application/json'); + + # Encode JSON also encodes to a UTF-8 encoded, binary string. This is why we don't + # have application/json as one of the things we match, otherwise we get double + # encoding. + $c->response->body(JSON::MaybeXS::encode_json($post)); + } + + ## If someone clears encoding, they can do as they wish + sub manual_1 :Local { + my ($self, $c) = @_; + $c->clear_encoding; + $c->res->content_type('text/plain'); + $c->res->content_type_charset('UTF-8'); + $c->response->body( Encode::encode_utf8("manual_1 ♥")); + } + + ## If you do like gzip, well handle that yourself! Basically if you do some sort + ## of content encoding like gzip, you must do on top of the encoding. We will fix + ## the encoding plugins (Catalyst::Plugin::Compress) to do this properly for you. + # + sub gzipped :Local { + require Compress::Zlib; + my ($self, $c) = @_; + $c->res->content_type('text/plain'); + $c->res->content_type_charset('UTF-8'); + $c->res->content_encoding('gzip'); + $c->response->body(Compress::Zlib::memGzip(Encode::encode_utf8("manual_1 ♥"))); + } + + sub override_encoding :Local { + my ($self, $c) = @_; + $c->res->content_type('text/plain'); + $c->encoding(Encode::find_encoding('UTF-8')); + $c->encoding(Encode::find_encoding('Shift_JIS')); + $c->response->body("テスト"); + } + + sub stream_write_error :Local { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + $c->response->write("

This is stream_write action ♥

"); + $c->encoding(Encode::find_encoding('Shift_JIS')); + $c->response->write("

This is stream_write action ♥

"); + } + + sub from_external_psgi :Local { + my ($self, $c) = @_; + my $env = HTTP::Message::PSGI::req_to_psgi( HTTP::Request::Common::GET '/root/♥'); + $c->res->from_psgi_response( ref($c)->to_app->($env)); + } + + sub echo_arg :Local { + my ($self, $c) = @_; + $c->response->content_type('text/plain'); + $c->response->body($c->req->body_parameters->{arg}); + } + + sub echo_param :Local { + my ($self, $c) = @_; + $c->response->content_type('text/plain'); + $c->response->body($c->req->query_parameters->{arg}); + } + + package MyApp; use Catalyst; - MyApp->config(encoding=>'UTF-8'); - Test::More::ok(MyApp->setup, 'setup app'); } ok my $psgi = MyApp->psgi_app, 'build psgi app'; use Catalyst::Test 'MyApp'; -use Encode 2.21 'decode_utf8', 'encode_utf8'; { my $res = request "/root/♥"; @@ -103,6 +247,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $res->code, 200, 'OK'; is decode_utf8($res->content), '

This is path-heart action ♥

', 'correct body'; is $res->content_length, 36, 'correct length'; + is $res->content_charset, 'UTF-8'; } { @@ -111,6 +256,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $res->code, 200, 'OK'; is decode_utf8($res->content), '

This is path-heart-arg action ♥

', 'correct body'; is $res->content_length, 40, 'correct length'; + is $res->content_charset, 'UTF-8'; } { @@ -119,6 +265,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $res->code, 200, 'OK'; is decode_utf8($res->content), '

This is path-hat action ^

', 'correct body'; is $res->content_length, 32, 'correct length'; + is $res->content_charset, 'UTF-8'; } { @@ -127,6 +274,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $res->code, 200, 'OK'; is decode_utf8($res->content), '

This is base-link action ♥

', 'correct body'; is $res->content_length, 35, 'correct length'; + is $res->content_charset, 'UTF-8'; } { @@ -141,6 +289,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $c->req->parameters->{'♥'}[0], '♥'; is $c->req->parameters->{a}, 1; is $c->req->body_parameters->{a}, 1; + is $res->content_charset, 'UTF-8'; } { @@ -150,6 +299,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is decode_utf8($res->content), '

This is base-link action ♥

', 'correct body'; is $res->content_length, 35, 'correct length'; is $c->req->query_keywords, '♥♥♥'; + is $res->content_charset, 'UTF-8'; } { @@ -158,6 +308,7 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $res->code, 200, 'OK'; is decode_utf8($res->content), '

This is base-link action ♥ ♥

', 'correct body'; is $res->content_length, 39, 'correct length'; + is $res->content_charset, 'UTF-8'; } { @@ -165,40 +316,71 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is decode_utf8($res->content), '

This is base-link action ♥ ♥

', 'correct body'; is $res->content_length, 39, 'correct length'; + is $res->content_charset, 'UTF-8'; } { my ($res, $c) = ctx_request POST "/base/♥/♥/♥/♥?♥=♥♥", [a=>1, b=>'2', '♥'=>'♥♥']; ## Make sure that the urls we generate work the same - my $uri_for = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'}); + my $uri_for1 = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'}); + my $uri_for2 = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥', '♥'], {'♥'=>'♥♥'}); my $uri = $c->req->uri; - is "$uri", "$uri_for"; + is "$uri_for1", "$uri_for2"; + is "$uri", "$uri_for1"; { - my ($res, $c) = ctx_request POST "$uri_for", [a=>1, b=>'2', '♥'=>'♥♥']; + my ($res, $c) = ctx_request POST "$uri_for1", [a=>1, b=>'2', '♥'=>'♥♥']; is $c->req->query_parameters->{'♥'}, '♥♥'; is $c->req->body_parameters->{'♥'}, '♥♥'; is $c->req->parameters->{'♥'}[0], '♥♥'; #combined with query and body + is $c->req->args->[0], '♥'; + is length($c->req->parameters->{'♥'}[0]), 2; + is length($c->req->query_parameters->{'♥'}), 2; + is length($c->req->body_parameters->{'♥'}), 2; + is length($c->req->args->[0]), 1; + is $res->content_charset, 'UTF-8'; } } { my ($res, $c) = ctx_request "/root/uri_for"; - my $url = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'}); + my $url = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥#X♥X', {'♥'=>'♥♥'}); is $res->code, 200, 'OK'; is decode_utf8($res->content), "$url", 'correct body'; #should do nothing is $res->content, "$url", 'correct body'; - is $res->content_length, 90, 'correct length'; + is $res->content_length, 104, 'correct length'; + is $res->content_charset, 'UTF-8'; + + { + my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), '♥'); + is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url"; + } + + { + my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), ['♥']); + is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url"; + } } { my $res = request "/root/stream_write"; - is $res->code, 200, 'OK'; + is $res->code, 200, 'OK GET /root/stream_write'; is decode_utf8($res->content), '

This is stream_write action ♥

', 'correct body'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/stream_body_fh"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n", 'correct body'; + is $res->content_charset, 'UTF-8'; + # Not sure why there is a trailing newline above... its not in catalyst code I can see. Not sure + # if is a problem or just an artifact of the why the test stuff works - JNAP } { @@ -206,6 +388,181 @@ use Encode 2.21 'decode_utf8', 'encode_utf8'; is $res->code, 200, 'OK'; is decode_utf8($res->content), '

This is stream_write_fh action ♥

', 'correct body'; + #is $res->content_length, 41, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/stream_body_fh2"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n", 'correct body'; + is $res->content_length, 41, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/write_then_body"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "

This is early_write action ♥

This is body_write action ♥

"; + is $res->content_charset, 'UTF-8'; +} + +{ + ok my $path = File::Spec->catfile('t', 'utf8.txt'); + ok my $req = POST '/root/file_upload', + Content_Type => 'form-data', + Content => [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", encode_utf8('♥ttachment.txt'), 'Content-Type' =>'text/html; charset=UTF-8', ]]; + + ok my $res = request $req; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n"; +} + +{ + ok my $path = File::Spec->catfile('t', 'utf8.txt'); + ok my $req = POST '/root/file_upload_utf8_param', + Content_Type => 'form-data', + Content => [encode_utf8('♥')=>encode_utf8('♥♥'), encode_utf8('♥')=>["$path", encode_utf8('♥ttachment.txt'), 'Content-Type' =>'text/html; charset=UTF-8', ]]; + + ok my $res = request $req; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n"; +} + +{ + ok my $req = POST '/root/json', + Content_Type => 'application/json', + Content => encode_json +{'♥'=>'♥♥'}; # Note: JSON does the UTF* encoding for us + + ok my $res = request $req; + + ## decode_json expect the binary utf8 string and does the decoded bit for us. + is_deeply decode_json(($res->content)), +{'♥'=>'♥♥'}, 'JSON was decoded correctly'; +} + +{ + ok my $res = request "/root/override_encoding"; + ok my $enc = Encode::find_encoding('SHIFT_JIS'); + + is $res->code, 200, 'OK'; + is $enc->decode($res->content), "テスト", 'correct body'; + is $res->content_length, 6, 'correct length'; # Bytes over the wire + is length($enc->decode($res->content)), 3; + is $res->content_charset, 'SHIFT_JIS', 'content charset is SHIFT_JIS as expected'; +} + +{ + my $res = request "/root/manual_1"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "manual_1 ♥", 'correct body'; + is $res->content_length, 12, 'correct length'; + is $res->content_charset, 'UTF-8'; } +SKIP: { + eval { require Compress::Zlib; 1} || do { + skip "Compress::Zlib needed to test gzip encoding", 5 }; + + my $res = request "/root/gzipped"; + ok my $raw_content = $res->content; + ok my $content = Compress::Zlib::memGunzip($raw_content), 'no gunzip error'; + + is $res->code, 200, 'OK'; + is decode_utf8($content), "manual_1 ♥", 'correct body'; + is $res->content_charset, 'UTF-8', 'zlib charset is set correctly'; +} + +{ + my $res = request "/root/stream_write_error"; + + is $res->code, 200, 'OK'; + like decode_utf8($res->content), qr[

This is stream_write action ♥

code, 200, 'OK'; + is decode_utf8($res->content), '

This is path-heart action ♥

', 'correct body'; + is $res->content_length, 36, 'correct length'; + is $res->content_charset, 'UTF-8', 'external PSGI app has expected charset'; +} + +{ + my $utf8 = 'test ♥'; + my $shiftjs = 'test テスト'; + + ok my $req = POST '/root/echo_arg', + Content_Type => 'form-data', + Content => [ + arg0 => 'helloworld', + Encode::encode('UTF-8','♥') => Encode::encode('UTF-8','♥♥'), # Long form POST simple does not auto encode... + Encode::encode('UTF-8','♥♥♥') => [ + undef, '', + 'Content-Type' =>'text/plain; charset=SHIFT_JIS', + 'Content' => Encode::encode('SHIFT_JIS', $shiftjs)], + arg1 => [ + undef, '', + 'Content-Type' =>'text/plain; charset=UTF-8', + 'Content' => Encode::encode('UTF-8', $utf8)], + arg2 => [ + undef, '', + 'Content-Type' =>'text/plain; charset=SHIFT_JIS', + 'Content' => Encode::encode('SHIFT_JIS', $shiftjs)], + arg2 => [ + undef, '', + 'Content-Type' =>'text/plain; charset=SHIFT_JIS', + 'Content' => Encode::encode('SHIFT_JIS', $shiftjs)], + ]; + + my ($res, $c) = ctx_request $req; + + is $c->req->body_parameters->{'arg0'}, 'helloworld', 'got helloworld value'; + is $c->req->body_parameters->{'♥'}, '♥♥'; + is $c->req->body_parameters->{'arg1'}, $utf8, 'decoded utf8 param'; + is $c->req->body_parameters->{'arg2'}[0], $shiftjs, 'decoded shiftjs param'; + is $c->req->body_parameters->{'arg2'}[1], $shiftjs, 'decoded shiftjs param'; + is $c->req->body_parameters->{'♥♥♥'}, $shiftjs, 'decoded shiftjs param'; + +} + +{ + my $shiftjs = 'test テスト'; + my $encoded = Encode::encode('UTF-8', $shiftjs); + + ok my $req = GET "/root/echo_arg?a=$encoded"; + my ($res, $c) = ctx_request $req; + + is $c->req->query_parameters->{'a'}, $shiftjs, 'got expected value'; +} + +{ + my $invalid = '%e2'; + # in url + { + my $req = GET "/$invalid"; + my $res = request $req; + is ($res->code, '400', "Invalid url param is 400"); + } + # in body + { + my $req = POST "/root/echo_arg", Content => "arg0=$invalid"; + my $res = request $req; + is ($res->code, '400', "Invalid post param is 400"); + } + # in query + { + # failing since 5.90080 + my $req = GET "/root/echo_param?arg=$invalid"; + my $res = request $req; + is ($res->code, '400', "Invalid get param is 400") or diag Dumper($res->decoded_content); + } + +} + + +## should we use binmode on filehandles to force the encoding...? +## Not sure what else to do with multipart here, if docs are enough... + done_testing;