From: John Napiorkowski Date: Thu, 18 Dec 2014 14:48:29 +0000 (-0600) Subject: more tests for utf8 and docs X-Git-Tag: 5.90079_008~25 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=12982f8623b4a3520146d4f52c5705d2b8a3b5ab;hp=b063a165c4247e0312f19ed90829a0e7161fa449;p=catagits%2FCatalyst-Runtime.git more tests for utf8 and docs --- diff --git a/Changes b/Changes index 9b8b4dd..7b5b2bf 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,11 @@ # This file documents the revision history for Perl extension Catalyst. +5.90079_004 - TBD + - Starting adding some docs around the new encoding stuff + - Exposed the reqexp we use to match content types that need encoding via a + global variable. + - Added some test cases for JSON utf8 and tested file uploads with utf8. + 5.90079_003 - 2014-12-03 - Make sure all tests run even if debug mode is enabled. - Fixed issue with middleware stash test case that failed on older Perls diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm index b595318..9592b81 100644 --- a/lib/Catalyst.pm +++ b/lib/Catalyst.pm @@ -113,6 +113,7 @@ our $START = time; our $RECURSION = 1000; our $DETACH = Catalyst::Exception::Detach->new; our $GO = Catalyst::Exception::Go->new; +our $DEFAULT_ENCODE_CONTENT_TYPE_MATCH = qr{text|xml$|javascript$}; #I imagine that very few of these really need to be class variables. if any. #maybe we should just make them attributes with a default? @@ -2055,7 +2056,7 @@ sub finalize_headers { my ($ct, $ct_enc) = $c->response->content_type; # Only touch 'text-like' contents - if($c->response->content_type =~ /^text|xml$|javascript$/) { + if($c->response->content_type =~ /$DEFAULT_ENCODE_CONTENT_TYPE_MATCH/) { if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) { if (uc($1) ne uc($enc->mime_name)) { $c->log->debug("Catalyst encoding config is set to encode in '" . @@ -2094,7 +2095,7 @@ sub finalize_encoding { return unless $enc; # Only touch 'text-like' contents - if($c->response->content_type =~ /^text|xml$|javascript$/) { + if($c->response->content_type =~ /$DEFAULT_ENCODE_CONTENT_TYPE_MATCH/) { if (ref(\$body) eq 'SCALAR') { $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) ); } @@ -4000,6 +4001,26 @@ logical characters. On response, encodes body into encoding. By default encoding is now 'UTF-8'. You may turn it off by setting the encoding configuration to undef. +Encoding is automatically applied when the content-type is set to +a type that can be encoded. Currently we encode when the content type +matches the following regular expression: + + $content_type =~ /^text|xml$|javascript$/ + +The value of this regex is contained in the global variable + + $Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH + +This may change in the future. Be default we don't automatically +encode 'application/json' since the most popular JSON encoders (such +as L which is the library that L can make use +of) will do the UTF8 encoding and decoding automatically. Having it on +in Catalyst could result in double encoding. + +If you are producing JSON response in an unconventional manner (such +as via a template or manual strings) you should perform the UTF8 encoding +manually as well such as to conform to the JSON specification. + =head2 Methods =over 4 diff --git a/lib/Catalyst/Response.pm b/lib/Catalyst/Response.pm index 82677a7..0ef4c52 100644 --- a/lib/Catalyst/Response.pm +++ b/lib/Catalyst/Response.pm @@ -55,7 +55,7 @@ has write_fh => ( sub _build_write_fh { my $writer = $_[0]->_writer; # We need to get the finalize headers side effect... - my $requires_encoding = $_[0]->content_type =~ m/^text|xml$|javascript$/; + my $requires_encoding = $_[0]->content_type =~ m/$Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH/; my %fields = ( _writer => $writer, _encoding => $_[0]->encoding, @@ -118,7 +118,7 @@ sub write { $buffer = q[] unless defined $buffer; $buffer = $self->_context->encoding->encode( $buffer, $self->_context->_encode_check ) - if $self->_context->encoding && $self->content_type =~ /^text|xml$|javascript$/; + if $self->_context->encoding && $self->content_type =~ /$Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH/; my $len = length($buffer); $self->_writer->write($buffer); diff --git a/t/utf_incoming.t b/t/utf_incoming.t index 04f724f..516e9d5 100644 --- a/t/utf_incoming.t +++ b/t/utf_incoming.t @@ -5,6 +5,7 @@ use Test::More; use HTTP::Request::Common; use Encode 2.21 'decode_utf8', 'encode_utf8'; use File::Spec; +use JSON::MaybeXS; # Test cases for incoming utf8 @@ -106,6 +107,30 @@ use File::Spec; $c->response->body($contents); } + sub file_upload :POST Consumes(Multipart) Local { + my ($self, $c) = @_; + Test::More::is $c->req->body_parameters->{'♥'}, '♥♥'; + Test::More::ok my $upload = $c->req->uploads->{file}; + + my $text = $upload->slurp; + Test::More::is Encode::decode_utf8($text), "

This is stream_body_fh action ♥

\n"; + + $c->response->content_type('text/html'); + $c->response->body($upload->fh); + } + + sub json :POST Consumes(JSON) Local { + my ($self, $c) = @_; + my $post = $c->req->body_data; + + Test::More::is $post->{'♥'}, '♥♥'; + $c->response->content_type('application/json'); + + # Encode JSON also encodes to a UTF-8 encoded, binary string. This is why we don't + # have application/json as one of the things we match, otherwise we get double + # encoding. + $c->response->body(JSON::MaybeXS::encode_json($post)); + } package MyApp; use Catalyst; @@ -275,4 +300,28 @@ use Catalyst::Test 'MyApp'; is $res->content_charset, 'UTF-8'; } +{ + ok my $path = File::Spec->catfile('t', 'utf8.txt'); + ok my $req = POST '/root/file_upload', + Content_Type => 'form-data', + Content => [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", 'attachment.txt', 'Content-Type' =>'text/html; charset=UTF-8', ]]; + + ok my $res = request $req; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n"; +} + +{ + ok my $req = POST '/root/json', + Content_Type => 'application/json', + Content => encode_json +{'♥'=>'♥♥'}; # Note: JSON does the UTF* encoding for us + + ok my $res = request $req; + + ## decode_json expect the binary utf8 string and does the decoded bit for us. + is_deeply decode_json(($res->content)), +{'♥'=>'♥♥'}; +} + +## should we use binmode on filehandles to force the encoding...? +## Not sure what else to do with multipart here, if docs are enough... + done_testing;