changes so that we skip encoding under programmatic situations

diff --git a/Changes b/Changes

index 7b5b2bf..64897ae 100644 (file)
--- a/Changes
+++ b/Changes
@@ -5,6 +5,16 @@
   - Exposed the reqexp we use to match content types that need encoding via a
     global variable.
   - Added some test cases for JSON utf8 and tested file uploads with utf8.
+  - Fixes to decoding on file upload filenames and related methods
+  - new methods on upload object that tries to do the right thing if we find
+    a character set on the upload and its UTF8.
+  - new additional helper methods on the file upload object.
+  - new helper methods has_encoding and clear_encoding on context.
+  - Method on Catalyst::Response to determine if the reponse should be encoded.
+  - Warn if changing headers only if headers are finalized AND the response callback
+    has allready been called (and headers already sent).
+  - Centralized rules about detecting if we need to automatically encode or not and
+    added tests around cases when you choose to skip auto encoding.
 
 5.90079_003 - 2014-12-03
   - Make sure all tests run even if debug mode is enabled.
diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm

index 9592b81..95ddb97 100644 (file)
--- a/lib/Catalyst.pm
+++ b/lib/Catalyst.pm
@@ -113,7 +113,6 @@ our $START     = time;
 our $RECURSION = 1000;
 our $DETACH    = Catalyst::Exception::Detach->new;
 our $GO        = Catalyst::Exception::Go->new;
-our $DEFAULT_ENCODE_CONTENT_TYPE_MATCH = qr{text|xml$|javascript$};
 
 #I imagine that very few of these really need to be class variables. if any.
 #maybe we should just make them attributes with a default?
@@ -1014,12 +1013,31 @@ And later:
 Your log class should implement the methods described in
 L<Catalyst::Log>.
 
+=head2 has_encoding
+
+Returned True if there's a valid encoding
+
+=head2 clear_encoding
+
+Clears the encoding for the current context
+
 =head2 encoding
 
 Sets or gets the application encoding.
 
 =cut
 
+sub has_encoding { shift->encoding ? 1:0 }
+
+sub clear_encoding {
+    my $c = shift;
+    if(blessed $c) {
+        $c->encoding(undef);
+    } else {
+        $c->debug->error("You can't clear encoding on the application");
+    }
+}
+
 sub encoding {
     my $c = shift;
     my $encoding;
@@ -2050,25 +2068,10 @@ sub finalize_headers {
 
     $c->finalize_cookies;
 
+    # This currently is a NOOP but I don't want to remove it since I guess people
+    # might have Response subclasses that use it for something... (JNAP)
     $c->response->finalize_headers();
 
-    if(my $enc = $c->encoding) {
-       my ($ct, $ct_enc) = $c->response->content_type;
-
-        # Only touch 'text-like' contents
-        if($c->response->content_type =~ /$DEFAULT_ENCODE_CONTENT_TYPE_MATCH/) {
-          if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) {
-            if (uc($1) ne uc($enc->mime_name)) {
-              $c->log->debug("Catalyst encoding config is set to encode in '" .
-                           $enc->mime_name .
-                           "', content type is '$1', not encoding ");
-            }
-          } else {
-            $c->res->content_type($c->res->content_type . "; charset=" . $enc->mime_name);
-          }
-        }
-    }
-
     # Done
     $response->finalized_headers(1);
 }
@@ -2079,26 +2082,44 @@ Make sure your body is encoded properly IF you set an encoding.  By
 default the encoding is UTF-8 but you can disable it by explictly setting the
 encoding configuration value to undef.
 
+We can only encode when the body is a scalar.  Methods for encoding via the
+streaming interfaces (such as C<write> and C<write_fh> on L<Catalyst::Response>
+are available).
+
 See L</ENCODING>.
 
 =cut
 
 sub finalize_encoding {
     my $c = shift;
+    my $res = $c->res || return;
+
+    # Warn if the set charset is different from the one you put into encoding.  We need
+    # to do this early since encodable_response is false for this condition and we need
+    # to match the debug output for backcompat (there's a test for this...) -JNAP
+    if(
+      $res->content_type_charset and $c->encoding and 
+      (uc($c->encoding->mime_name) ne uc($res->content_type_charset))
+    ) {
+        my $ct = lc($res->content_type_charset);
+        $c->log->debug("Catalyst encoding config is set to encode in '" .
+            $c->encoding->mime_name .
+            "', content type is '$ct', not encoding ");
+    }
 
-    my $body = $c->response->body;
-
-    return unless defined($body);
-
-    my $enc = $c->encoding;
-
-    return unless $enc;
-
-    # Only touch 'text-like' contents
-    if($c->response->content_type =~ /$DEFAULT_ENCODE_CONTENT_TYPE_MATCH/) {
-      if (ref(\$body) eq 'SCALAR') {
-        $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) );
-      }
+    if(
+      ($res->encodable_response) and
+      (defined($res->body)) and
+      (ref(\$res->body) eq 'SCALAR')
+    ) {
+        $c->res->body( $c->encoding->encode( $c->res->body, $c->_encode_check ) );
+
+        # Set the charset if necessary.  This might be a bit bonkers since encodable response
+        # is false when the set charset is not the same as the encoding mimetype (maybe 
+        # confusing action at a distance here..
+        # Don't try to set the charset if one already exists
+        $c->res->content_type($c->res->content_type . "; charset=" . $c->encoding->mime_name)
+          unless($c->res->content_type_charset);
     }
 }
 
@@ -4007,20 +4028,27 @@ matches the following regular expression:
 
     $content_type =~ /^text|xml$|javascript$/
 
-The value of this regex is contained in the global variable
-
-    $Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH
+Encoding is set on the application, but it is copied to the response object
+so you can override encoding rules per request (See L<Catalyst::Response>
+for more information).
 
-This may change in the future.  Be default we don't automatically
-encode 'application/json' since the most popular JSON encoders (such
-as L<JSON::MaybeXS> which is the library that L<Catalyst> can make use
-of) will do the UTF8 encoding and decoding automatically.  Having it on
-in Catalyst could result in double encoding.
+Be default we don't automatically encode 'application/json' since the most
+popular JSON encoders (such as L<JSON::MaybeXS> which is the library that
+L<Catalyst> can make use of) will do the UTF8 encoding and decoding automatically.
+Having it on in Catalyst could result in double encoding.
 
 If you are producing JSON response in an unconventional manner (such
 as via a template or manual strings) you should perform the UTF8 encoding
 manually as well such as to conform to the JSON specification.
 
+NOTE: We also examine the value of $c->response->content_encoding.  If
+you set this (like for example 'gzip', and manually gzipping the body)
+we assume that you have done all the neccessary encoding yourself, since
+we cannot encode the gzipped contents.  If you use a plugin like
+L<Catalyst::Plugin::Compress> we will be updating that plugin to work 
+with the new UTF8 encoding code, or you can use L<Plack::Middleware::Deflater>
+or (probably best) do your compression on a front end proxy.
+
 =head2 Methods
 
 =over 4
diff --git a/lib/Catalyst/Engine.pm b/lib/Catalyst/Engine.pm

index 194d254..948f28f 100644 (file)
--- a/lib/Catalyst/Engine.pm
+++ b/lib/Catalyst/Engine.pm
@@ -685,6 +685,7 @@ sub prepare_uploads {
               (
                size => $upload->{size},
                type => scalar $headers->content_type,
+               charset => scalar $headers->content_type_charset,
                headers => $headers,
                tempname => $upload->{tempname},
                filename => $filename,
diff --git a/lib/Catalyst/Request/Upload.pm b/lib/Catalyst/Request/Upload.pm

index d8e58be..6df2dff 100644 (file)
--- a/lib/Catalyst/Request/Upload.pm
+++ b/lib/Catalyst/Request/Upload.pm
@@ -15,6 +15,8 @@ has size => (is => 'rw');
 has tempname => (is => 'rw');
 has type => (is => 'rw');
 has basename => (is => 'ro', lazy_build => 1);
+has raw_basename => (is => 'ro', lazy_build => 1);
+has charset => (is=>'ro', predicate=>'has_charset');
 
 has fh => (
   is => 'rw',
@@ -29,17 +31,21 @@ has fh => (
       Catalyst::Exception->throw(
           message => qq/Can't open '$filename': '$!'/ );
     }
-
     return $fh;
   },
 );
 
 sub _build_basename {
+    my $basename = shift->raw_basename;
+    $basename =~ s|[^\w\.-]+|_|g;
+    return $basename;
+}
+
+sub _build_raw_basename {
     my $self = shift;
     my $basename = $self->filename;
     $basename =~ s|\\|/|g;
     $basename = ( File::Spec::Unix->splitpath($basename) )[2];
-    $basename =~ s|[^\w\.-]+|_|g;
     return $basename;
 }
 
@@ -58,13 +64,16 @@ Catalyst::Request::Upload - handles file upload requests
     $upload->basename;
     $upload->copy_to;
     $upload->fh;
+    $upload->decoded_fh
     $upload->filename;
     $upload->headers;
     $upload->link_to;
     $upload->size;
     $upload->slurp;
+    $upload->decoded_slurp;
     $upload->tempname;
     $upload->type;
+    $upload->charset;
 
 To specify where Catalyst should put the temporary files, set the 'uploadtmp'
 option in the Catalyst config. If unset, Catalyst will use the system temp dir.
@@ -97,10 +106,56 @@ sub copy_to {
     return File::Copy::copy( $self->tempname, @_ );
 }
 
+=head2 $upload->is_utf8_encoded
+
+Returns true of the upload defines a character set at that value is 'UTF-8'.
+This does not try to inspect your upload and make any guesses if the Content
+Type charset is undefined.
+
+=cut
+
+sub is_utf8_encoded {
+    my $self = shift;
+    if(my $charset = $self->charset) {
+      return $charset eq 'UTF-8' ? 1 : 0;
+    }
+    return 0;
+}
+
 =head2 $upload->fh
 
 Opens a temporary file (see tempname below) and returns an L<IO::File> handle.
 
+This is a filehandle that is opened with no additional IO Layers.
+
+=head2 $upload->decoded_fh(?$encoding)
+
+Returns a filehandle that has binmode set to UTF-8 if a UTF-8 character set
+is found. This also accepts an override encoding value that you can use to
+force a particular L<PerlIO> layer.  If neither are found the filehandle is
+set to :raw.
+
+This is useful if you are pulling the file into code and inspecting bit and
+maybe then sending those bits back as the response.  (Please not this is not
+a suitable filehandle to set in the body; use C<fh> if you are doing that).
+
+Please note that using this method sets the underlying filehandle IO layer
+so once you use this method if you go back and use the C<fh> method you
+still get the IO layer applied.
+
+=cut
+
+sub decoded_fh {
+    my ($self, $layer) = @_;
+    my $fh  = $self->fh;
+
+    $layer = ":encoding(UTF-8)" if !$layer && $self->is_utf8_encoded;
+    $layer = ':raw' unless $layer;
+
+    binmode($fh, $layer);
+    return $fh;
+}
+
 =head2 $upload->filename
 
 Returns the client-supplied filename.
@@ -127,13 +182,17 @@ sub link_to {
 
 Returns the size of the uploaded file in bytes.
 
-=head2 $upload->slurp
+=head2 $upload->slurp(?$encoding)
+
+Optionally accepts an argument to define an IO Layer (which is applied to
+the filehandle via binmode; if no layer is defined the default is set to
+":raw".
 
 Returns a scalar containing the contents of the temporary file.
 
 Note that this will cause the filehandle pointed to by C<< $upload->fh >> to
 be reset to the start of the file using seek and the file handle to be put
-into binary mode.
+into whatever encoding mode is applied.
 
 =cut
 
@@ -158,9 +217,39 @@ sub slurp {
     return $content;
 }
 
+=head2 $upload->decoded_slurp(?$encoding)
+
+Works just like C<slurp> except we use C<decoded_fh> instead of C<fh> to
+open a filehandle to slurp.  This means if your upload charset is UTF8
+we binmode the filehandle to that encoding.
+
+=cut
+
+sub decoded_slurp {
+    my ( $self, $layer ) = @_;
+    my $handle = $self->decoded_fh($layer);
+
+    my $content = undef;
+    $handle->seek(0, IO::File::SEEK_SET);
+    while ( $handle->sysread( my $buffer, 8192 ) ) {
+        $content .= $buffer;
+    }
+
+    $handle->seek(0, IO::File::SEEK_SET);
+    return $content;
+}
+
 =head2 $upload->basename
 
-Returns basename for C<filename>.
+Returns basename for C<filename>.  This filters the name through a regexp
+C<basename =~ s|[^\w\.-]+|_|g> to make it safe for filesystems that don't
+like advanced characters.  This will of course filter UTF8 characters.
+If you need the exact basename unfiltered use C<raw_basename>.
+
+=head2 $upload->raw_basename
+
+Just like C<basename> but without filtering the filename for characters that
+don't always write to a filesystem.
 
 =head2 $upload->tempname
 
@@ -170,6 +259,11 @@ Returns the path to the temporary file.
 
 Returns the client-supplied Content-Type.
 
+=head2 $upload->charset
+
+The character set information part of the content type, if any.  Useful if you
+need to figure out any encodings on the file upload.
+
 =head2 meta
 
 Provided by Moose
diff --git a/lib/Catalyst/Response.pm b/lib/Catalyst/Response.pm

index 0ef4c52..28cf7b7 100644 (file)
--- a/lib/Catalyst/Response.pm
+++ b/lib/Catalyst/Response.pm
@@ -6,9 +6,18 @@ use Moose::Util::TypeConstraints;
 use namespace::autoclean;
 use Scalar::Util 'blessed';
 use Catalyst::Response::Writer;
+use Catalyst::Utils ();
 
 with 'MooseX::Emulate::Class::Accessor::Fast';
 
+our $DEFAULT_ENCODE_CONTENT_TYPE_MATCH = qr{text|xml$|javascript$};
+
+has encodable_content_type => (
+    is => 'rw',
+    required => 1,
+    default => sub { $DEFAULT_ENCODE_CONTENT_TYPE_MATCH }
+);
+
 has _response_cb => (
     is      => 'ro',
     isa     => 'CodeRef', 
@@ -55,10 +64,10 @@ has write_fh => (
 
 sub _build_write_fh {
   my $writer = $_[0]->_writer; # We need to get the finalize headers side effect...
-  my $requires_encoding = $_[0]->content_type =~ m/$Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH/;
+  my $requires_encoding = $_[0]->encodable_response;
   my %fields = (
     _writer => $writer,
-    _encoding => $_[0]->encoding,
+    _encoding => $_[0]->_context->encoding,
     _requires_encoding => $requires_encoding,
   );
 
@@ -83,7 +92,7 @@ has finalized_headers => (is => 'rw', default => 0);
 has headers   => (
   is      => 'rw',
   isa => 'HTTP::Headers',
-  handles => [qw(content_encoding content_length content_type header)],
+  handles => [qw(content_encoding content_length content_type content_type_charset header)],
   default => sub { HTTP::Headers->new() },
   required => 1,
   lazy => 1,
@@ -94,15 +103,13 @@ has _context => (
   clearer => '_clear_context',
 );
 
-has encoding => (is=>'ro');
-
 before [qw(status headers content_encoding content_length content_type header)] => sub {
   my $self = shift;
 
   $self->_context->log->warn( 
-    "Useless setting a header value after finalize_headers called." .
+    "Useless setting a header value after finalize_headers and the response callback has been called." .
     " Not what you want." )
-      if ( $self->finalized_headers && @_ );
+      if ( $self->finalized_headers && !$self->_has_response_cb && @_ );
 };
 
 sub output { shift->body(@_) }
@@ -117,8 +124,9 @@ sub write {
 
     $buffer = q[] unless defined $buffer;
 
-    $buffer = $self->_context->encoding->encode( $buffer, $self->_context->_encode_check )
-      if $self->_context->encoding && $self->content_type =~ /$Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH/;
+    if($self->encodable_response) {
+      $buffer = $self->_context->encoding->encode( $buffer, $self->_context->_encode_check )
+    }
 
     my $len = length($buffer);
     $self->_writer->write($buffer);
@@ -195,6 +203,22 @@ you might want to use a L<IO::Handle> type of object (Something that implements
 in the same fashion), or a filehandle GLOB. Catalyst
 will write it piece by piece into the response.
 
+If you are using a filehandle as the body response you are responsible for
+making sure it comforms to the L<PSGI> specification with regards to content
+encoding.  Unlike with scalar body values or when using the streaming interfaces
+we currently do not attempt to normalize and encode your filehandle.  In general
+this means you should be sure to be sending bytes not UTF8 decoded multibyte
+characters.
+
+Most of the time when you do:
+
+    open(my $fh, '<:raw', $path);
+
+You should be fine.  If you open a filehandle with a L<PerlIO> layer you probably
+are not fine.  You can usually fix this by explicitly using binmode to set
+the IOLayer to :raw.  Its possible future versions of L<Catalyst> will try to
+'do the right thing'.
+
 When using a L<IO::Handle> type of object and no content length has been
 already set in the response headers Catalyst will make a reasonable attempt
 to determine the size of the Handle. Depending on the implementation of your
@@ -306,6 +330,10 @@ This value is typically set by your view or plugin. For example,
 L<Catalyst::Plugin::Static::Simple> will guess the mime type based on the file
 it found, while L<Catalyst::View::TT> defaults to C<text/html>.
 
+=head2 $res->content_type_charset
+
+Shortcut for $res->headers->content_type_charset;
+
 =head2 $res->cookies
 
 Returns a reference to a hash containing cookies to be set. The keys of the
@@ -511,6 +539,67 @@ Example:
 Please note this does not attempt to map or nest your PSGI application under
 the Controller and Action namespace or path.  
 
+=head2 encodable_content_type
+
+This is a regular expression used to determine of the current content type
+should be considered encodable.  Currently we apply default encoding (usually
+UTF8) to text type contents.  Here's the default regular expression:
+
+This would match content types like:
+
+    text/plain
+    text/html
+    text/xml
+    application/javascript
+    application/xml
+    application/vnd.user+xml
+
+B<NOTE>: We don't encode JSON content type responses by default since most
+of the JSON serializers that are commonly used for this task will do so
+automatically and we don't want to double encode.  If you are not using a
+tool like L<JSON> to produce JSON type content, (for example you are using
+a template system, or creating the strings manually) you will need to either
+encoding the body yourself:
+
+    $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) );
+
+Or you can alter the regular expression using this attribute.
+
+=head2 encodable_response
+
+Given a L<Catalyst::Response> return true if its one that can be encoded.  
+
+     make sure there is an encoding set on the response
+     make sure the content type is encodable
+     make sure no content type charset has been already set to something different from the global encoding
+     make sure no content encoding is present.
+
+Note this does not inspect a body since we do allow automatic encoding on streaming
+type responses.
+
+=cut
+
+sub encodable_response {
+  my ($self) = @_;
+  return 0 unless $self->_context; # Cases like returning a HTTP Exception response you don't have a context here...
+  return 0 unless $self->_context->encoding;
+
+  my $has_manual_charset = 0;
+  if(my $charset = $self->content_type_charset) {
+    $has_manual_charset = (uc($charset) ne uc($self->_context->encoding->mime_name)) ? 1:0;
+  }
+
+  if(
+      ($self->content_type =~ m/${\$self->encodable_content_type}/) and
+      (!$has_manual_charset) and
+      (!$self->content_encoding || $self->content_encoding eq 'identity' )
+  ) { 
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
 =head2 DEMOLISH
 
 Ensures that the response is flushed and closed at the end of the
diff --git a/lib/Catalyst/Utils.pm b/lib/Catalyst/Utils.pm

index 5ee7451..1bccecb 100644 (file)
--- a/lib/Catalyst/Utils.pm
+++ b/lib/Catalyst/Utils.pm
@@ -10,7 +10,6 @@ use Cwd;
 use Class::Load 'is_class_loaded';
 use String::RewritePrefix;
 use Class::Load ();
-
 use namespace::clean;
 
 =head1 NAME
@@ -503,6 +502,8 @@ sub apply_registered_middleware {
     return $new_psgi;
 }
 
+
+
 =head1 PSGI Helpers
 
 Utility functions to make it easier to work with PSGI applications under Catalyst
diff --git a/t/lib/TestAppUnicode.pm b/t/lib/TestAppUnicode.pm

index 55359f7..8338f3d 100644 (file)
--- a/t/lib/TestAppUnicode.pm
+++ b/t/lib/TestAppUnicode.pm
@@ -3,7 +3,7 @@ use strict;
 use warnings;
 use TestLogger;
 use base qw/Catalyst/;
-use Catalyst qw/Unicode::Encoding/;
+use Catalyst;
 
 __PACKAGE__->config(
   'name' => 'TestAppUnicode',
diff --git a/t/unicode_plugin_charset_utf8.t b/t/unicode_plugin_charset_utf8.t

index 81ba9f7..4c7c0c6 100644 (file)
--- a/t/unicode_plugin_charset_utf8.t
+++ b/t/unicode_plugin_charset_utf8.t
@@ -6,7 +6,7 @@ use lib "$Bin/lib";
 use Data::Dumper;
 
 BEGIN {
-    $ENV{TESTAPP_ENCODING} = 'UTF-8';
+  # $ENV{TESTAPP_ENCODING} = 'UTF-8'; # This is now default
     $ENV{TESTAPP_DEBUG} = 0;
     $ENV{CATALYST_DEBUG} = 0;
 }
@@ -27,6 +27,6 @@ is scalar(@TestLogger::LOGS), 1
     or diag Dumper(\@TestLogger::LOGS);
 like $TestLogger::LOGS[0], qr/content type is 'iso-8859-1'/;
 
-like $TestLogger::ELOGS[0], qr/Unicode::Encoding plugin/;
+#like $TestLogger::ELOGS[0], qr/Unicode::Encoding plugin/; #no longer a plugin
 
 done_testing;
diff --git a/t/utf_incoming.t b/t/utf_incoming.t

index 516e9d5..f04dd19 100644 (file)
--- a/t/utf_incoming.t
+++ b/t/utf_incoming.t
@@ -111,12 +111,19 @@ use JSON::MaybeXS;
     my ($self, $c) = @_;
     Test::More::is $c->req->body_parameters->{'♥'}, '♥♥';
     Test::More::ok my $upload = $c->req->uploads->{file};
+    Test::More::is $upload->charset, 'UTF-8';
 
     my $text = $upload->slurp;
     Test::More::is Encode::decode_utf8($text), "<p>This is stream_body_fh action ♥</p>\n";
 
+    my $decoded_text = $upload->decoded_slurp;
+    Test::More::is $decoded_text, "<p>This is stream_body_fh action ♥</p>\n";
+
+    Test::More::is $upload->filename, '♥ttachment.txt';
+    Test::More::is $upload->raw_basename, '♥ttachment.txt';
+
     $c->response->content_type('text/html');
-    $c->response->body($upload->fh);
+    $c->response->body($decoded_text);
   }
 
   sub json :POST Consumes(JSON) Local {
@@ -132,6 +139,28 @@ use JSON::MaybeXS;
     $c->response->body(JSON::MaybeXS::encode_json($post));
   }
 
+  ## If someone clears encoding, they can do as they wish
+  sub manual_1 :Local {
+    my ($self, $c) = @_;
+    $c->encoding(undef);
+    $c->res->content_type('text/plain');
+    $c->res->content_type_charset('UTF-8');
+    $c->response->body( Encode::encode_utf8("manual_1 ♥"));
+  }
+
+  ## If you do like gzip, well handle that yourself!  Basically if you do some sort
+  ## of content encoding like gzip, you must do on top of the encoding.  We will fix
+  ## the encoding plugins (Catalyst::Plugin::Compress) to do this properly for you.
+  #
+  sub gzipped :Local {
+    require Compress::Zlib;
+    my ($self, $c) = @_;
+    $c->res->content_type('text/plain');
+    $c->res->content_type_charset('UTF-8');
+    $c->res->content_encoding('gzip');
+    $c->response->body(Compress::Zlib::memGzip(Encode::encode_utf8("manual_1 ♥")));
+  }
+
   package MyApp;
   use Catalyst;
 
@@ -255,19 +284,19 @@ use Catalyst::Test 'MyApp';
 
   {
     my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), '♥');
-    is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5';
+    is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url";
   }
 
   {
     my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), ['♥']);
-    is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5';
+    is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url";
   }
 }
 
 {
   my $res = request "/root/stream_write";
 
-  is $res->code, 200, 'OK';
+  is $res->code, 200, 'OK GET /root/stream_write';
   is decode_utf8($res->content), '<p>This is stream_write action ♥</p>', 'correct body';
   is $res->content_charset, 'UTF-8';
 }
@@ -304,7 +333,7 @@ use Catalyst::Test 'MyApp';
   ok my $path = File::Spec->catfile('t', 'utf8.txt');
   ok my $req = POST '/root/file_upload',
     Content_Type => 'form-data',
-    Content =>  [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", 'attachment.txt', 'Content-Type' =>'text/html; charset=UTF-8', ]];
+    Content =>  [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", encode_utf8('♥ttachment.txt'), 'Content-Type' =>'text/html; charset=UTF-8', ]];
 
   ok my $res = request $req;
   is decode_utf8($res->content), "<p>This is stream_body_fh action ♥</p>\n";
@@ -321,6 +350,28 @@ use Catalyst::Test 'MyApp';
   is_deeply decode_json(($res->content)), +{'♥'=>'♥♥'};
 }
 
+{
+  my $res = request "/root/manual_1";
+
+  is $res->code, 200, 'OK';
+  is decode_utf8($res->content), "manual_1 ♥", 'correct body';
+  is $res->content_length, 12, 'correct length';
+  is $res->content_charset, 'UTF-8';
+}
+
+SKIP: {
+  eval { require Compress::Zlib; 1} || do {
+    skip "Compress::Zlib needed to test gzip encoding", 5 };
+
+  my $res = request "/root/gzipped";
+  ok my $raw_content = $res->content;
+  ok my $content = Compress::Zlib::memGunzip($raw_content), 'no gunzip error';
+
+  is $res->code, 200, 'OK';
+  is decode_utf8($content), "manual_1 ♥", 'correct body';
+  is $res->content_charset, 'UTF-8';
+}
+
 ## should we use binmode on filehandles to force the encoding...?
 ## Not sure what else to do with multipart here, if docs are enough...
Changes		patch \| blob \| blame \| history
lib/Catalyst.pm		patch \| blob \| blame \| history
lib/Catalyst/Engine.pm		patch \| blob \| blame \| history
lib/Catalyst/Request/Upload.pm		patch \| blob \| blame \| history
lib/Catalyst/Response.pm		patch \| blob \| blame \| history
lib/Catalyst/Utils.pm		patch \| blob \| blame \| history
t/lib/TestAppUnicode.pm		patch \| blob \| blame \| history
t/unicode_plugin_charset_utf8.t		patch \| blob \| blame \| history
t/utf_incoming.t		patch \| blob \| blame \| history