- Exposed the reqexp we use to match content types that need encoding via a
global variable.
- Added some test cases for JSON utf8 and tested file uploads with utf8.
+ - Fixes to decoding on file upload filenames and related methods
+ - new methods on upload object that tries to do the right thing if we find
+ a character set on the upload and its UTF8.
+ - new additional helper methods on the file upload object.
+ - new helper methods has_encoding and clear_encoding on context.
+ - Method on Catalyst::Response to determine if the reponse should be encoded.
+ - Warn if changing headers only if headers are finalized AND the response callback
+ has allready been called (and headers already sent).
+ - Centralized rules about detecting if we need to automatically encode or not and
+ added tests around cases when you choose to skip auto encoding.
5.90079_003 - 2014-12-03
- Make sure all tests run even if debug mode is enabled.
our $RECURSION = 1000;
our $DETACH = Catalyst::Exception::Detach->new;
our $GO = Catalyst::Exception::Go->new;
-our $DEFAULT_ENCODE_CONTENT_TYPE_MATCH = qr{text|xml$|javascript$};
#I imagine that very few of these really need to be class variables. if any.
#maybe we should just make them attributes with a default?
Your log class should implement the methods described in
L<Catalyst::Log>.
+=head2 has_encoding
+
+Returned True if there's a valid encoding
+
+=head2 clear_encoding
+
+Clears the encoding for the current context
+
=head2 encoding
Sets or gets the application encoding.
=cut
+sub has_encoding { shift->encoding ? 1:0 }
+
+sub clear_encoding {
+ my $c = shift;
+ if(blessed $c) {
+ $c->encoding(undef);
+ } else {
+ $c->debug->error("You can't clear encoding on the application");
+ }
+}
+
sub encoding {
my $c = shift;
my $encoding;
$c->finalize_cookies;
+ # This currently is a NOOP but I don't want to remove it since I guess people
+ # might have Response subclasses that use it for something... (JNAP)
$c->response->finalize_headers();
- if(my $enc = $c->encoding) {
- my ($ct, $ct_enc) = $c->response->content_type;
-
- # Only touch 'text-like' contents
- if($c->response->content_type =~ /$DEFAULT_ENCODE_CONTENT_TYPE_MATCH/) {
- if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) {
- if (uc($1) ne uc($enc->mime_name)) {
- $c->log->debug("Catalyst encoding config is set to encode in '" .
- $enc->mime_name .
- "', content type is '$1', not encoding ");
- }
- } else {
- $c->res->content_type($c->res->content_type . "; charset=" . $enc->mime_name);
- }
- }
- }
-
# Done
$response->finalized_headers(1);
}
default the encoding is UTF-8 but you can disable it by explictly setting the
encoding configuration value to undef.
+We can only encode when the body is a scalar. Methods for encoding via the
+streaming interfaces (such as C<write> and C<write_fh> on L<Catalyst::Response>
+are available).
+
See L</ENCODING>.
=cut
sub finalize_encoding {
my $c = shift;
+ my $res = $c->res || return;
+
+ # Warn if the set charset is different from the one you put into encoding. We need
+ # to do this early since encodable_response is false for this condition and we need
+ # to match the debug output for backcompat (there's a test for this...) -JNAP
+ if(
+ $res->content_type_charset and $c->encoding and
+ (uc($c->encoding->mime_name) ne uc($res->content_type_charset))
+ ) {
+ my $ct = lc($res->content_type_charset);
+ $c->log->debug("Catalyst encoding config is set to encode in '" .
+ $c->encoding->mime_name .
+ "', content type is '$ct', not encoding ");
+ }
- my $body = $c->response->body;
-
- return unless defined($body);
-
- my $enc = $c->encoding;
-
- return unless $enc;
-
- # Only touch 'text-like' contents
- if($c->response->content_type =~ /$DEFAULT_ENCODE_CONTENT_TYPE_MATCH/) {
- if (ref(\$body) eq 'SCALAR') {
- $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) );
- }
+ if(
+ ($res->encodable_response) and
+ (defined($res->body)) and
+ (ref(\$res->body) eq 'SCALAR')
+ ) {
+ $c->res->body( $c->encoding->encode( $c->res->body, $c->_encode_check ) );
+
+ # Set the charset if necessary. This might be a bit bonkers since encodable response
+ # is false when the set charset is not the same as the encoding mimetype (maybe
+ # confusing action at a distance here..
+ # Don't try to set the charset if one already exists
+ $c->res->content_type($c->res->content_type . "; charset=" . $c->encoding->mime_name)
+ unless($c->res->content_type_charset);
}
}
$content_type =~ /^text|xml$|javascript$/
-The value of this regex is contained in the global variable
-
- $Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH
+Encoding is set on the application, but it is copied to the response object
+so you can override encoding rules per request (See L<Catalyst::Response>
+for more information).
-This may change in the future. Be default we don't automatically
-encode 'application/json' since the most popular JSON encoders (such
-as L<JSON::MaybeXS> which is the library that L<Catalyst> can make use
-of) will do the UTF8 encoding and decoding automatically. Having it on
-in Catalyst could result in double encoding.
+Be default we don't automatically encode 'application/json' since the most
+popular JSON encoders (such as L<JSON::MaybeXS> which is the library that
+L<Catalyst> can make use of) will do the UTF8 encoding and decoding automatically.
+Having it on in Catalyst could result in double encoding.
If you are producing JSON response in an unconventional manner (such
as via a template or manual strings) you should perform the UTF8 encoding
manually as well such as to conform to the JSON specification.
+NOTE: We also examine the value of $c->response->content_encoding. If
+you set this (like for example 'gzip', and manually gzipping the body)
+we assume that you have done all the neccessary encoding yourself, since
+we cannot encode the gzipped contents. If you use a plugin like
+L<Catalyst::Plugin::Compress> we will be updating that plugin to work
+with the new UTF8 encoding code, or you can use L<Plack::Middleware::Deflater>
+or (probably best) do your compression on a front end proxy.
+
=head2 Methods
=over 4
(
size => $upload->{size},
type => scalar $headers->content_type,
+ charset => scalar $headers->content_type_charset,
headers => $headers,
tempname => $upload->{tempname},
filename => $filename,
has tempname => (is => 'rw');
has type => (is => 'rw');
has basename => (is => 'ro', lazy_build => 1);
+has raw_basename => (is => 'ro', lazy_build => 1);
+has charset => (is=>'ro', predicate=>'has_charset');
has fh => (
is => 'rw',
Catalyst::Exception->throw(
message => qq/Can't open '$filename': '$!'/ );
}
-
return $fh;
},
);
sub _build_basename {
+ my $basename = shift->raw_basename;
+ $basename =~ s|[^\w\.-]+|_|g;
+ return $basename;
+}
+
+sub _build_raw_basename {
my $self = shift;
my $basename = $self->filename;
$basename =~ s|\\|/|g;
$basename = ( File::Spec::Unix->splitpath($basename) )[2];
- $basename =~ s|[^\w\.-]+|_|g;
return $basename;
}
$upload->basename;
$upload->copy_to;
$upload->fh;
+ $upload->decoded_fh
$upload->filename;
$upload->headers;
$upload->link_to;
$upload->size;
$upload->slurp;
+ $upload->decoded_slurp;
$upload->tempname;
$upload->type;
+ $upload->charset;
To specify where Catalyst should put the temporary files, set the 'uploadtmp'
option in the Catalyst config. If unset, Catalyst will use the system temp dir.
return File::Copy::copy( $self->tempname, @_ );
}
+=head2 $upload->is_utf8_encoded
+
+Returns true of the upload defines a character set at that value is 'UTF-8'.
+This does not try to inspect your upload and make any guesses if the Content
+Type charset is undefined.
+
+=cut
+
+sub is_utf8_encoded {
+ my $self = shift;
+ if(my $charset = $self->charset) {
+ return $charset eq 'UTF-8' ? 1 : 0;
+ }
+ return 0;
+}
+
=head2 $upload->fh
Opens a temporary file (see tempname below) and returns an L<IO::File> handle.
+This is a filehandle that is opened with no additional IO Layers.
+
+=head2 $upload->decoded_fh(?$encoding)
+
+Returns a filehandle that has binmode set to UTF-8 if a UTF-8 character set
+is found. This also accepts an override encoding value that you can use to
+force a particular L<PerlIO> layer. If neither are found the filehandle is
+set to :raw.
+
+This is useful if you are pulling the file into code and inspecting bit and
+maybe then sending those bits back as the response. (Please not this is not
+a suitable filehandle to set in the body; use C<fh> if you are doing that).
+
+Please note that using this method sets the underlying filehandle IO layer
+so once you use this method if you go back and use the C<fh> method you
+still get the IO layer applied.
+
+=cut
+
+sub decoded_fh {
+ my ($self, $layer) = @_;
+ my $fh = $self->fh;
+
+ $layer = ":encoding(UTF-8)" if !$layer && $self->is_utf8_encoded;
+ $layer = ':raw' unless $layer;
+
+ binmode($fh, $layer);
+ return $fh;
+}
+
=head2 $upload->filename
Returns the client-supplied filename.
Returns the size of the uploaded file in bytes.
-=head2 $upload->slurp
+=head2 $upload->slurp(?$encoding)
+
+Optionally accepts an argument to define an IO Layer (which is applied to
+the filehandle via binmode; if no layer is defined the default is set to
+":raw".
Returns a scalar containing the contents of the temporary file.
Note that this will cause the filehandle pointed to by C<< $upload->fh >> to
be reset to the start of the file using seek and the file handle to be put
-into binary mode.
+into whatever encoding mode is applied.
=cut
return $content;
}
+=head2 $upload->decoded_slurp(?$encoding)
+
+Works just like C<slurp> except we use C<decoded_fh> instead of C<fh> to
+open a filehandle to slurp. This means if your upload charset is UTF8
+we binmode the filehandle to that encoding.
+
+=cut
+
+sub decoded_slurp {
+ my ( $self, $layer ) = @_;
+ my $handle = $self->decoded_fh($layer);
+
+ my $content = undef;
+ $handle->seek(0, IO::File::SEEK_SET);
+ while ( $handle->sysread( my $buffer, 8192 ) ) {
+ $content .= $buffer;
+ }
+
+ $handle->seek(0, IO::File::SEEK_SET);
+ return $content;
+}
+
=head2 $upload->basename
-Returns basename for C<filename>.
+Returns basename for C<filename>. This filters the name through a regexp
+C<basename =~ s|[^\w\.-]+|_|g> to make it safe for filesystems that don't
+like advanced characters. This will of course filter UTF8 characters.
+If you need the exact basename unfiltered use C<raw_basename>.
+
+=head2 $upload->raw_basename
+
+Just like C<basename> but without filtering the filename for characters that
+don't always write to a filesystem.
=head2 $upload->tempname
Returns the client-supplied Content-Type.
+=head2 $upload->charset
+
+The character set information part of the content type, if any. Useful if you
+need to figure out any encodings on the file upload.
+
=head2 meta
Provided by Moose
use namespace::autoclean;
use Scalar::Util 'blessed';
use Catalyst::Response::Writer;
+use Catalyst::Utils ();
with 'MooseX::Emulate::Class::Accessor::Fast';
+our $DEFAULT_ENCODE_CONTENT_TYPE_MATCH = qr{text|xml$|javascript$};
+
+has encodable_content_type => (
+ is => 'rw',
+ required => 1,
+ default => sub { $DEFAULT_ENCODE_CONTENT_TYPE_MATCH }
+);
+
has _response_cb => (
is => 'ro',
isa => 'CodeRef',
sub _build_write_fh {
my $writer = $_[0]->_writer; # We need to get the finalize headers side effect...
- my $requires_encoding = $_[0]->content_type =~ m/$Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH/;
+ my $requires_encoding = $_[0]->encodable_response;
my %fields = (
_writer => $writer,
- _encoding => $_[0]->encoding,
+ _encoding => $_[0]->_context->encoding,
_requires_encoding => $requires_encoding,
);
has headers => (
is => 'rw',
isa => 'HTTP::Headers',
- handles => [qw(content_encoding content_length content_type header)],
+ handles => [qw(content_encoding content_length content_type content_type_charset header)],
default => sub { HTTP::Headers->new() },
required => 1,
lazy => 1,
clearer => '_clear_context',
);
-has encoding => (is=>'ro');
-
before [qw(status headers content_encoding content_length content_type header)] => sub {
my $self = shift;
$self->_context->log->warn(
- "Useless setting a header value after finalize_headers called." .
+ "Useless setting a header value after finalize_headers and the response callback has been called." .
" Not what you want." )
- if ( $self->finalized_headers && @_ );
+ if ( $self->finalized_headers && !$self->_has_response_cb && @_ );
};
sub output { shift->body(@_) }
$buffer = q[] unless defined $buffer;
- $buffer = $self->_context->encoding->encode( $buffer, $self->_context->_encode_check )
- if $self->_context->encoding && $self->content_type =~ /$Catalyst::DEFAULT_ENCODE_CONTENT_TYPE_MATCH/;
+ if($self->encodable_response) {
+ $buffer = $self->_context->encoding->encode( $buffer, $self->_context->_encode_check )
+ }
my $len = length($buffer);
$self->_writer->write($buffer);
in the same fashion), or a filehandle GLOB. Catalyst
will write it piece by piece into the response.
+If you are using a filehandle as the body response you are responsible for
+making sure it comforms to the L<PSGI> specification with regards to content
+encoding. Unlike with scalar body values or when using the streaming interfaces
+we currently do not attempt to normalize and encode your filehandle. In general
+this means you should be sure to be sending bytes not UTF8 decoded multibyte
+characters.
+
+Most of the time when you do:
+
+ open(my $fh, '<:raw', $path);
+
+You should be fine. If you open a filehandle with a L<PerlIO> layer you probably
+are not fine. You can usually fix this by explicitly using binmode to set
+the IOLayer to :raw. Its possible future versions of L<Catalyst> will try to
+'do the right thing'.
+
When using a L<IO::Handle> type of object and no content length has been
already set in the response headers Catalyst will make a reasonable attempt
to determine the size of the Handle. Depending on the implementation of your
L<Catalyst::Plugin::Static::Simple> will guess the mime type based on the file
it found, while L<Catalyst::View::TT> defaults to C<text/html>.
+=head2 $res->content_type_charset
+
+Shortcut for $res->headers->content_type_charset;
+
=head2 $res->cookies
Returns a reference to a hash containing cookies to be set. The keys of the
Please note this does not attempt to map or nest your PSGI application under
the Controller and Action namespace or path.
+=head2 encodable_content_type
+
+This is a regular expression used to determine of the current content type
+should be considered encodable. Currently we apply default encoding (usually
+UTF8) to text type contents. Here's the default regular expression:
+
+This would match content types like:
+
+ text/plain
+ text/html
+ text/xml
+ application/javascript
+ application/xml
+ application/vnd.user+xml
+
+B<NOTE>: We don't encode JSON content type responses by default since most
+of the JSON serializers that are commonly used for this task will do so
+automatically and we don't want to double encode. If you are not using a
+tool like L<JSON> to produce JSON type content, (for example you are using
+a template system, or creating the strings manually) you will need to either
+encoding the body yourself:
+
+ $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) );
+
+Or you can alter the regular expression using this attribute.
+
+=head2 encodable_response
+
+Given a L<Catalyst::Response> return true if its one that can be encoded.
+
+ make sure there is an encoding set on the response
+ make sure the content type is encodable
+ make sure no content type charset has been already set to something different from the global encoding
+ make sure no content encoding is present.
+
+Note this does not inspect a body since we do allow automatic encoding on streaming
+type responses.
+
+=cut
+
+sub encodable_response {
+ my ($self) = @_;
+ return 0 unless $self->_context; # Cases like returning a HTTP Exception response you don't have a context here...
+ return 0 unless $self->_context->encoding;
+
+ my $has_manual_charset = 0;
+ if(my $charset = $self->content_type_charset) {
+ $has_manual_charset = (uc($charset) ne uc($self->_context->encoding->mime_name)) ? 1:0;
+ }
+
+ if(
+ ($self->content_type =~ m/${\$self->encodable_content_type}/) and
+ (!$has_manual_charset) and
+ (!$self->content_encoding || $self->content_encoding eq 'identity' )
+ ) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
=head2 DEMOLISH
Ensures that the response is flushed and closed at the end of the
use Class::Load 'is_class_loaded';
use String::RewritePrefix;
use Class::Load ();
-
use namespace::clean;
=head1 NAME
return $new_psgi;
}
+
+
=head1 PSGI Helpers
Utility functions to make it easier to work with PSGI applications under Catalyst
use warnings;
use TestLogger;
use base qw/Catalyst/;
-use Catalyst qw/Unicode::Encoding/;
+use Catalyst;
__PACKAGE__->config(
'name' => 'TestAppUnicode',
use Data::Dumper;
BEGIN {
- $ENV{TESTAPP_ENCODING} = 'UTF-8';
+ # $ENV{TESTAPP_ENCODING} = 'UTF-8'; # This is now default
$ENV{TESTAPP_DEBUG} = 0;
$ENV{CATALYST_DEBUG} = 0;
}
or diag Dumper(\@TestLogger::LOGS);
like $TestLogger::LOGS[0], qr/content type is 'iso-8859-1'/;
-like $TestLogger::ELOGS[0], qr/Unicode::Encoding plugin/;
+#like $TestLogger::ELOGS[0], qr/Unicode::Encoding plugin/; #no longer a plugin
done_testing;
my ($self, $c) = @_;
Test::More::is $c->req->body_parameters->{'♥'}, '♥♥';
Test::More::ok my $upload = $c->req->uploads->{file};
+ Test::More::is $upload->charset, 'UTF-8';
my $text = $upload->slurp;
Test::More::is Encode::decode_utf8($text), "<p>This is stream_body_fh action ♥</p>\n";
+ my $decoded_text = $upload->decoded_slurp;
+ Test::More::is $decoded_text, "<p>This is stream_body_fh action ♥</p>\n";
+
+ Test::More::is $upload->filename, '♥ttachment.txt';
+ Test::More::is $upload->raw_basename, '♥ttachment.txt';
+
$c->response->content_type('text/html');
- $c->response->body($upload->fh);
+ $c->response->body($decoded_text);
}
sub json :POST Consumes(JSON) Local {
$c->response->body(JSON::MaybeXS::encode_json($post));
}
+ ## If someone clears encoding, they can do as they wish
+ sub manual_1 :Local {
+ my ($self, $c) = @_;
+ $c->encoding(undef);
+ $c->res->content_type('text/plain');
+ $c->res->content_type_charset('UTF-8');
+ $c->response->body( Encode::encode_utf8("manual_1 ♥"));
+ }
+
+ ## If you do like gzip, well handle that yourself! Basically if you do some sort
+ ## of content encoding like gzip, you must do on top of the encoding. We will fix
+ ## the encoding plugins (Catalyst::Plugin::Compress) to do this properly for you.
+ #
+ sub gzipped :Local {
+ require Compress::Zlib;
+ my ($self, $c) = @_;
+ $c->res->content_type('text/plain');
+ $c->res->content_type_charset('UTF-8');
+ $c->res->content_encoding('gzip');
+ $c->response->body(Compress::Zlib::memGzip(Encode::encode_utf8("manual_1 ♥")));
+ }
+
package MyApp;
use Catalyst;
{
my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), '♥');
- is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5';
+ is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url";
}
{
my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), ['♥']);
- is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5';
+ is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url";
}
}
{
my $res = request "/root/stream_write";
- is $res->code, 200, 'OK';
+ is $res->code, 200, 'OK GET /root/stream_write';
is decode_utf8($res->content), '<p>This is stream_write action ♥</p>', 'correct body';
is $res->content_charset, 'UTF-8';
}
ok my $path = File::Spec->catfile('t', 'utf8.txt');
ok my $req = POST '/root/file_upload',
Content_Type => 'form-data',
- Content => [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", 'attachment.txt', 'Content-Type' =>'text/html; charset=UTF-8', ]];
+ Content => [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", encode_utf8('♥ttachment.txt'), 'Content-Type' =>'text/html; charset=UTF-8', ]];
ok my $res = request $req;
is decode_utf8($res->content), "<p>This is stream_body_fh action ♥</p>\n";
is_deeply decode_json(($res->content)), +{'♥'=>'♥♥'};
}
+{
+ my $res = request "/root/manual_1";
+
+ is $res->code, 200, 'OK';
+ is decode_utf8($res->content), "manual_1 ♥", 'correct body';
+ is $res->content_length, 12, 'correct length';
+ is $res->content_charset, 'UTF-8';
+}
+
+SKIP: {
+ eval { require Compress::Zlib; 1} || do {
+ skip "Compress::Zlib needed to test gzip encoding", 5 };
+
+ my $res = request "/root/gzipped";
+ ok my $raw_content = $res->content;
+ ok my $content = Compress::Zlib::memGunzip($raw_content), 'no gunzip error';
+
+ is $res->code, 200, 'OK';
+ is decode_utf8($content), "manual_1 ♥", 'correct body';
+ is $res->content_charset, 'UTF-8';
+}
+
## should we use binmode on filehandles to force the encoding...?
## Not sure what else to do with multipart here, if docs are enough...