From: John Napiorkowski Date: Wed, 31 Dec 2014 16:36:02 +0000 (-0600) Subject: merged and resolved conflicts from stable X-Git-Tag: 5.90079_008~21 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=commitdiff_plain;h=6dcc530761473f574ccde956e3a321b1dfb3d27e;hp=e4f3fb2485c9181600ca8fe759016f9e69a53f2d merged and resolved conflicts from stable --- diff --git a/Changes b/Changes index 928442f..857ab36 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,66 @@ # This file documents the revision history for Perl extension Catalyst. +5.90079_005 - 2014-12-31 + - Merged changes from 5.90078 + +5.90079_004 - 2014-12-26 + - Starting adding some docs around the new encoding stuff + - Exposed the reqexp we use to match content types that need encoding via a + global variable. + - Added some test cases for JSON utf8 and tested file uploads with utf8. + - Fixes to decoding on file upload filenames and related methods + - new methods on upload object that tries to do the right thing if we find + a character set on the upload and its UTF8. + - new additional helper methods on the file upload object. + - new helper methods has_encoding and clear_encoding on context. + - Method on Catalyst::Response to determine if the reponse should be encoded. + - Warn if changing headers only if headers are finalized AND the response callback + has allready been called (and headers already sent). + - Centralized rules about detecting if we need to automatically encode or not and + added tests around cases when you choose to skip auto encoding. + +5.90079_003 - 2014-12-03 + - Make sure all tests run even if debug mode is enabled. + - Fixed issue with middleware stash test case that failed on older Perls + +5.90079_002 - 2014-12-02 + - Fixed typo in Makefile.PL which borked the previous distribution. No other + changes. + +5.90079_001 - 2014-12-02 + - MyApp->to_app is now an alias for MyApp->psgi_app in order to better support + existing Plack conventions. + - Modify Catayst::Response->from_psgi_response to allow the first argument to + be an object that does ->as_psgi. + - Modified Catayst::Middleware::Stash to be a shallow copy in $env. Added some + docs. Added a test case to make sure stash keys added in a child application + don't bubble back up to the main application. + - We no longer use Encode::is_utf8 since it doesn't work the way we think it + does... This required some UTF-8 changes. If your application is UTF-8 aware + I highly suggest you test this release. + - We alway do utf8 decoding on incoming URLs (before we only did so if the server + encoding was utf8. I believe this is correct as per the w3c spec, but please + correct if incorrect :) + - Debug output now shows utf8 characters if those are incoming via Args or as + path or pathparts in your actions. query and body parameter keys are now also + subject to utf8 decoding (or as specificed via the encoding configuration value). + - lots of UTF8 changes. Again we think this is now more correct but please test. + - Allow $c->res->redirect($url) to accept $url as an object that does ->as_string + which I think will ease a common case (and common bug) and added documentation. + - !!! UTF-8 is now the default encoding (there used to be none...). You can disable + this if you need to with MyApp->config(encoding => undef) if it causes you trouble. + - Calling $c->res->write($data) now encodes $data based on the configured encoding + (UTF-8 is default). + - $c->res->writer_fh now returns Catalyst::Response::Writer which is a decorator + over the PSGI writer and provides an additional methd 'write_encoded' that just + does the right thing for encoding your responses. This is probably the method + you want to use. + - New dispatch matching attribute: Scheme. This lets you match a route based on + the incoming URI scheme (http, https, ws, wss). + - If $c->uri_for targets an action or action chain that defines Scheme, use that + scheme for the generated URI object instead of just using whatever the incoming + request uses. + 5.90078 - 2014-12-30 - POD corrections (sergey++) - New configuration option to disable the HTTP Exception passthru feature diff --git a/MANIFEST.SKIP b/MANIFEST.SKIP index 31fd9fc..90b0676 100644 --- a/MANIFEST.SKIP +++ b/MANIFEST.SKIP @@ -1,2 +1,2 @@ -^(?!script/\w+\.pl$|TODO$|lib/.+(? '2.49'; requires 'LWP' => '5.837'; # LWP had unicode fail in 5.8.26 -requires 'URI' => '1.36'; +requires 'URI' => '1.65'; +requires 'URI::ws' => '0.03'; requires 'JSON::MaybeXS' => '1.000000'; requires 'Stream::Buffered'; requires 'Hash::MultiValue'; diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm index 9686617..582ebb3 100644 --- a/lib/Catalyst.pm +++ b/lib/Catalyst.pm @@ -50,7 +50,7 @@ use Plack::Middleware::RemoveRedundantBody; use Catalyst::Middleware::Stash; use Plack::Util; use Class::Load 'load_class'; -use Encode 2.21 (); +use Encode 2.21 'decode_utf8', 'encode_utf8'; BEGIN { require 5.008003; } @@ -86,8 +86,10 @@ has response => ( lazy => 1, ); sub _build_response_constructor_args { - my $self = shift; - { _log => $self->log }; + return +{ + _log => $_[0]->log, + encoding => $_[0]->encoding, + }; } has namespace => (is => 'rw'); @@ -127,7 +129,8 @@ __PACKAGE__->stats_class('Catalyst::Stats'); __PACKAGE__->_encode_check(Encode::FB_CROAK | Encode::LEAVE_SRC); # Remember to update this in Catalyst::Runtime as well! -our $VERSION = '5.90078'; +our $VERSION = '5.90079_005'; +$VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases sub import { my ( $class, @arguments ) = @_; @@ -495,6 +498,18 @@ Catalyst). # stash is automatically passed to the view for use in a template $c->forward( 'MyApp::View::TT' ); +The stash hash is currently stored in the PSGI C<$env> and is managed by +L. Since it's part of the C<$env> items in +the stash can be accessed in sub applications mounted under your main +L application. For example if you delegate the response of an +action to another L application, that sub application will have +access to all the stash keys of the main one, and if can of course add +more keys of its own. However those new keys will not 'bubble' back up +to the main application. + +For more information the best thing to do is to review the test case: +t/middleware-stash.t in the distribution /t directory. + =cut sub stash { @@ -1024,12 +1039,31 @@ And later: Your log class should implement the methods described in L. +=head2 has_encoding + +Returned True if there's a valid encoding + +=head2 clear_encoding + +Clears the encoding for the current context + =head2 encoding Sets or gets the application encoding. =cut +sub has_encoding { shift->encoding ? 1:0 } + +sub clear_encoding { + my $c = shift; + if(blessed $c) { + $c->encoding(undef); + } else { + $c->debug->error("You can't clear encoding on the application"); + } +} + sub encoding { my $c = shift; my $encoding; @@ -1346,6 +1380,8 @@ sub setup_finalize { =head2 $c->uri_for( $action, \@captures?, @args?, \%query_values? ) +=head2 $c->uri_for( $action, [@captures, @args], \%query_values? ) + Constructs an absolute L object based on the application root, the provided path, and the additional arguments and query parameters provided. When used as a string, provides a textual URI. If you need more flexibility @@ -1385,6 +1421,10 @@ path, use C<< $c->uri_for_action >> instead. # Path to a static resource $c->uri_for('/static/images/logo.png'); +In general the scheme of the generated URI object will follow the incoming request +however if your targeted action or action chain has the Scheme attribute it will +use that instead. + =cut sub uri_for { @@ -1402,30 +1442,38 @@ sub uri_for { ( scalar @args && ref $args[$#args] eq 'HASH' ? pop @args : {} ); carp "uri_for called with undef argument" if grep { ! defined $_ } @args; + + my @encoded_args = (); foreach my $arg (@args) { - utf8::encode($arg) if utf8::is_utf8($arg); - $arg =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + if(ref($arg)||'' eq 'ARRAY') { + push @encoded_args, [map { + my $encoded = encode_utf8 $_; + $encoded =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $encoded; + } @$arg]; + } else { + push @encoded_args, do { + my $encoded = encode_utf8 $arg; + $encoded =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $encoded; + } + } } + my $target_action = $path->$_isa('Catalyst::Action') ? $path : undef; if ( $path->$_isa('Catalyst::Action') ) { # action object - s|/|%2F|g for @args; + s|/|%2F|g for @encoded_args; my $captures = [ map { s|/|%2F|g; $_; } - ( scalar @args && ref $args[0] eq 'ARRAY' - ? @{ shift(@args) } + ( scalar @encoded_args && ref $encoded_args[0] eq 'ARRAY' + ? @{ shift(@encoded_args) } : ()) ]; - foreach my $capture (@$captures) { - utf8::encode($capture) if utf8::is_utf8($capture); - $capture =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; - } - my $action = $path; # ->uri_for( $action, \@captures_and_args, \%query_values? ) - if( !@args && $action->number_of_args ) { + if( !@encoded_args && $action->number_of_args ) { my $expanded_action = $c->dispatcher->expand_action( $action ); - my $num_captures = $expanded_action->number_of_captures; - unshift @args, splice @$captures, $num_captures; + unshift @encoded_args, splice @$captures, $num_captures; } $path = $c->dispatcher->uri_for_action($action, $captures); @@ -1437,25 +1485,37 @@ sub uri_for { $path = '/' if $path eq ''; } - unshift(@args, $path); + unshift(@encoded_args, $path); unless (defined $path && $path =~ s!^/!!) { # in-place strip my $namespace = $c->namespace; if (defined $path) { # cheesy hack to handle path '../foo' - $namespace =~ s{(?:^|/)[^/]+$}{} while $args[0] =~ s{^\.\./}{}; + $namespace =~ s{(?:^|/)[^/]+$}{} while $encoded_args[0] =~ s{^\.\./}{}; } - unshift(@args, $namespace || ''); + unshift(@encoded_args, $namespace || ''); } # join args with '/', or a blank string - my $args = join('/', grep { defined($_) } @args); + my $args = join('/', grep { defined($_) } @encoded_args); $args =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE $args =~ s!^/+!!; my ($base, $class) = ('/', 'URI::_generic'); if(blessed($c)) { $base = $c->req->base; - $class = ref($base); + if($target_action) { + $target_action = $c->dispatcher->expand_action($target_action); + if(my $s = $target_action->scheme) { + $s = lc($s); + $class = "URI::$s"; + $base->scheme($s); + } else { + $class = ref($base); + } + } else { + $class = ref($base); + } + $base =~ s{(?{$_}; - s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/go; + #s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/go; ## Commented out because seems to lead to double encoding - JNAP s/ /+/g; my $key = $_; $val = '' unless defined $val; (map { my $param = "$_"; - utf8::encode( $param ) if utf8::is_utf8($param); + $param = encode_utf8($param); # using the URI::Escape pattern here so utf8 chars survive $param =~ s/([^A-Za-z0-9\-_.!~*'() ])/$URI::Escape::escapes{$1}/go; $param =~ s/ /+/g; + + $key = encode_utf8($key); + # using the URI::Escape pattern here so utf8 chars survive + $key =~ s/([^A-Za-z0-9\-_.!~*'() ])/$URI::Escape::escapes{$1}/go; + $key =~ s/ /+/g; + "${key}=$param"; } ( ref $val eq 'ARRAY' ? @$val : $val )); } @keys); } @@ -2017,6 +2083,8 @@ sub finalize_headers { $c->finalize_cookies; + # This currently is a NOOP but I don't want to remove it since I guess people + # might have Response subclasses that use it for something... (JNAP) $c->response->finalize_headers(); # Done @@ -2025,42 +2093,49 @@ sub finalize_headers { =head2 $c->finalize_encoding -Make sure your headers and body are encoded properly IF you set an encoding. +Make sure your body is encoded properly IF you set an encoding. By +default the encoding is UTF-8 but you can disable it by explictly setting the +encoding configuration value to undef. + +We can only encode when the body is a scalar. Methods for encoding via the +streaming interfaces (such as C and C on L +are available). + See L. =cut sub finalize_encoding { my $c = shift; - - my $body = $c->response->body; - - return unless defined($body); - - my $enc = $c->encoding; - - return unless $enc; - - my ($ct, $ct_enc) = $c->response->content_type; - - # Only touch 'text-like' contents - return unless $c->response->content_type =~ /^text|xml$|javascript$/; - - if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) { - if (uc($1) ne uc($enc->mime_name)) { - $c->log->debug("Unicode::Encoding is set to encode in '" . - $enc->mime_name . - "', content type is '$1', not encoding "); - return; - } - } else { - $c->res->content_type($c->res->content_type . "; charset=" . $enc->mime_name); + my $res = $c->res || return; + + # Warn if the set charset is different from the one you put into encoding. We need + # to do this early since encodable_response is false for this condition and we need + # to match the debug output for backcompat (there's a test for this...) -JNAP + if( + $res->content_type_charset and $c->encoding and + (uc($c->encoding->mime_name) ne uc($res->content_type_charset)) + ) { + my $ct = lc($res->content_type_charset); + $c->log->debug("Catalyst encoding config is set to encode in '" . + $c->encoding->mime_name . + "', content type is '$ct', not encoding "); } - # Oh my, I wonder what filehandle responses and streams do... - jnap. - # Encode expects plain scalars (IV, NV or PV) and segfaults on ref's - $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) ) - if ref(\$body) eq 'SCALAR'; + if( + ($res->encodable_response) and + (defined($res->body)) and + (ref(\$res->body) eq 'SCALAR') + ) { + $c->res->body( $c->encoding->encode( $c->res->body, $c->_encode_check ) ); + + # Set the charset if necessary. This might be a bit bonkers since encodable response + # is false when the set charset is not the same as the encoding mimetype (maybe + # confusing action at a distance here.. + # Don't try to set the charset if one already exists + $c->res->content_type($c->res->content_type . "; charset=" . $c->encoding->mime_name) + unless($c->res->content_type_charset); + } } =head2 $c->finalize_output @@ -2274,7 +2349,7 @@ Prepares body parameters. sub prepare_body_parameters { my $c = shift; - $c->engine->prepare_body_parameters( $c, @_ ); + $c->request->prepare_body_parameters( $c, @_ ); } =head2 $c->prepare_connection @@ -2368,6 +2443,10 @@ sub log_request { $method ||= ''; $path = '/' unless length $path; $address ||= ''; + + $path =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + $path = decode_utf8($path); + $c->log->debug(qq/"$method" request for "$path" from "$address"/); $c->log_request_headers($request->headers); @@ -2553,37 +2632,6 @@ Prepares uploads. sub prepare_uploads { my $c = shift; $c->engine->prepare_uploads( $c, @_ ); - - my $enc = $c->encoding; - return unless $enc; - - # Uggg we hook prepare uploads to do the encoding crap on post and query - # parameters! Sorry -jnap - for my $key (qw/ parameters query_parameters body_parameters /) { - for my $value ( values %{ $c->request->{$key} } ) { - # N.B. Check if already a character string and if so do not try to double decode. - # http://www.mail-archive.com/catalyst@lists.scsys.co.uk/msg02350.html - # this avoids exception if we have already decoded content, and is _not_ the - # same as not encoding on output which is bad news (as it does the wrong thing - # for latin1 chars for example).. - $value = $c->_handle_unicode_decoding($value); - } - } - for my $value ( values %{ $c->request->uploads } ) { - # skip if it fails for uploads, as we don't usually want uploads touched - # in any way - for my $inner_value ( ref($value) eq 'ARRAY' ? @{$value} : $value ) { - $inner_value->{filename} = try { - $enc->decode( $inner_value->{filename}, $c->_encode_check ) - } catch { - $c->handle_unicode_encoding_exception({ - param_value => $inner_value->{filename}, - error_msg => $_, - encoding_step => 'uploads', - }); - }; - } - } } =head2 $c->prepare_write @@ -3016,7 +3064,9 @@ sub apply_default_middlewares { return $psgi_app; } -=head2 $c->psgi_app +=head2 App->psgi_app + +=head2 App->to_app Returns a PSGI application code reference for the catalyst application C<$c>. This is the bare application without any middlewares @@ -3027,6 +3077,8 @@ reference of your Catalyst application for use in F<.psgi> files. =cut +*to_app = \&psgi_app; + sub psgi_app { my ($app) = @_; my $psgi = $app->engine->build_psgi_app($app); @@ -3063,8 +3115,14 @@ Sets up the input/output encoding. See L sub setup_encoding { my $c = shift; - my $enc = delete $c->config->{encoding}; - $c->encoding( $enc ) if defined $enc; + if( exists($c->config->{encoding}) && !defined($c->config->{encoding}) ) { + # Ok, so the user has explicitly said "I don't want encoding..." + return; + } else { + my $enc = defined($c->config->{encoding}) ? + delete $c->config->{encoding} : 'UTF-8'; # not sure why we delete it... (JNAP) + $c->encoding($enc); + } } =head2 handle_unicode_encoding_exception @@ -3102,8 +3160,13 @@ sub _handle_unicode_decoding { return $value; } elsif ( ref $value eq 'HASH' ) { - foreach ( values %$value ) { - $_ = $self->_handle_unicode_decoding($_); + foreach (keys %$value) { + my $encoded_key = $self->_handle_param_unicode_decoding($_); + $value->{$encoded_key} = $self->_handle_unicode_decoding($value->{$_}); + + # If the key was encoded we now have two (the original and current so + # delete the original. + delete $value->{$_} if $_ ne $encoded_key; } return $value; } @@ -3118,9 +3181,7 @@ sub _handle_param_unicode_decoding { my $enc = $self->encoding; return try { - Encode::is_utf8( $value ) ? - $value - : $enc->decode( $value, $self->_encode_check ); + $enc->decode( $value, $self->_encode_check ); } catch { $self->handle_unicode_encoding_exception({ @@ -3646,6 +3707,9 @@ C - See L. C - See L +This now defaults to 'UTF-8'. You my turn it off by setting this configuration +value to undef. + =item * C @@ -3991,6 +4055,36 @@ Please see L for more on middleware. On request, decodes all params from encoding into a sequence of logical characters. On response, encodes body into encoding. +By default encoding is now 'UTF-8'. You may turn it off by setting +the encoding configuration to undef. + +Encoding is automatically applied when the content-type is set to +a type that can be encoded. Currently we encode when the content type +matches the following regular expression: + + $content_type =~ /^text|xml$|javascript$/ + +Encoding is set on the application, but it is copied to the response object +so you can override encoding rules per request (See L +for more information). + +Be default we don't automatically encode 'application/json' since the most +popular JSON encoders (such as L which is the library that +L can make use of) will do the UTF8 encoding and decoding automatically. +Having it on in Catalyst could result in double encoding. + +If you are producing JSON response in an unconventional manner (such +as via a template or manual strings) you should perform the UTF8 encoding +manually as well such as to conform to the JSON specification. + +NOTE: We also examine the value of $c->response->content_encoding. If +you set this (like for example 'gzip', and manually gzipping the body) +we assume that you have done all the neccessary encoding yourself, since +we cannot encode the gzipped contents. If you use a plugin like +L we will be updating that plugin to work +with the new UTF8 encoding code, or you can use L +or (probably best) do your compression on a front end proxy. + =head2 Methods =over 4 diff --git a/lib/Catalyst/Action.pm b/lib/Catalyst/Action.pm index 555c939..881c120 100644 --- a/lib/Catalyst/Action.pm +++ b/lib/Catalyst/Action.pm @@ -103,6 +103,10 @@ sub number_of_captures { return $self->attributes->{CaptureArgs}[0] || 0; } +sub scheme { + return exists $_[0]->attributes->{Scheme} ? $_[0]->attributes->{Scheme}[0] : undef; +} + sub list_extra_info { my $self = shift; return { @@ -192,6 +196,10 @@ Returns the number of captures this action expects for Lchain }; + my ($scheme, @more) = map { + exists $_->attributes->{Scheme} ? $_->attributes->{Scheme}[0] : (); + } reverse @chain; + + warn "$self is a chain with two many Scheme attributes (only one is allowed)" + if @more; + + return $scheme; +} + __PACKAGE__->meta->make_immutable; 1; @@ -87,6 +103,10 @@ Catalyst::ActionChain object representing a chain of these actions Returns the total number of captures for the entire chain of actions. +=head2 scheme + +Any defined scheme for the actionchain + =head2 meta Provided by Moose diff --git a/lib/Catalyst/ActionRole/Scheme.pm b/lib/Catalyst/ActionRole/Scheme.pm new file mode 100644 index 0000000..0f02827 --- /dev/null +++ b/lib/Catalyst/ActionRole/Scheme.pm @@ -0,0 +1,114 @@ +package Catalyst::ActionRole::Scheme; + +use Moose::Role; + +requires 'match', 'match_captures', 'list_extra_info'; + +around ['match','match_captures'] => sub { + my ($orig, $self, $ctx, @args) = @_; + my $request_scheme = lc($ctx->req->env->{'psgi.url_scheme'}); + my $match_scheme = lc($self->scheme||''); + + return $request_scheme eq $match_scheme ? $self->$orig($ctx, @args) : 0; +}; + +around 'list_extra_info' => sub { + my ($orig, $self, @args) = @_; + return { + %{ $self->$orig(@args) }, + Scheme => $self->attributes->{Scheme}[0]||'', + }; +}; + +1; + +=head1 NAME + +Catalyst::ActionRole::ConsumesContent - Match on HTTP Request Content-Type + +=head1 SYNOPSIS + + package MyApp::Web::Controller::MyController; + + use base 'Catalyst::Controller'; + + sub start : POST Chained('/') CaptureArg(0) { ... } + + sub is_json : Chained('start') Consumes('application/json') { ... } + sub is_urlencoded : Chained('start') Consumes('application/x-www-form-urlencoded') { ... } + sub is_multipart : Chained('start') Consumes('multipart/form-data') { ... } + + ## Alternatively, for common types... + + sub is_json : Chained('start') Consume(JSON) { ... } + sub is_urlencoded : Chained('start') Consumes(UrlEncoded) { ... } + sub is_multipart : Chained('start') Consumes(Multipart) { ... } + + ## Or allow more than one type + + sub is_more_than_one + : Chained('start') + : Consumes('application/x-www-form-urlencoded') + : Consumes('multipart/form-data') + { + ## ... + } + + 1; + +=head1 DESCRIPTION + +This is an action role that lets your L match on the content +type of the incoming request. + +Generally when there's a PUT or POST request, there's a request content body +with a matching MIME content type. Commonly this will be one of the types +used with classic HTML forms ('application/x-www-form-urlencoded' for example) +but there's nothing stopping you specifying any valid content type. + +For matching purposes, we match strings but the casing is insensitive. + +=head1 REQUIRES + +This role requires the following methods in the consuming class. + +=head2 match + +=head2 match_captures + +Returns 1 if the action matches the existing request and zero if not. + +=head1 METHODS + +This role defines the following methods + +=head2 match + +=head2 match_captures + +Around method modifier that return 1 if the request content type matches one of the +allowed content types (see L) and zero otherwise. + +=head2 allowed_content_types + +An array of strings that are the allowed content types for matching this action. + +=head2 can_consume + +Boolean. Does the current request match content type with what this actionrole +can consume? + +=head2 list_extra_info + +Add the accepted content type to the debug screen. + +=head1 AUTHORS + +Catalyst Contributors, see Catalyst.pm + +=head1 COPYRIGHT + +This library is free software. You can redistribute it and/or modify it under +the same terms as Perl itself. + +=cut diff --git a/lib/Catalyst/Controller.pm b/lib/Catalyst/Controller.pm index 02db77a..f2ccfa8 100644 --- a/lib/Catalyst/Controller.pm +++ b/lib/Catalyst/Controller.pm @@ -372,6 +372,8 @@ sub gather_default_action_roles { push @roles, 'Catalyst::ActionRole::ConsumesContent' if $args{attributes}->{Consumes}; + push @roles, 'Catalyst::ActionRole::Scheme' + if $args{attributes}->{Scheme}; return @roles; } @@ -889,6 +891,39 @@ most accurate matches early in the Chain, and your 'catchall' actions last. See L for more. +=head2 Scheme(...) + +Allows you to specify a URI scheme for the action or action chain. For example +you can required that a given path be C or that it is a websocket endpoint +C or C. For an action chain you may currently only have one defined +Scheme. + + package MyApp::Controller::Root; + + use base 'Catalyst::Controller'; + + sub is_http :Path(scheme) Scheme(http) Args(0) { + my ($self, $c) = @_; + $c->response->body("is_http"); + } + + sub is_https :Path(scheme) Scheme(https) Args(0) { + my ($self, $c) = @_; + $c->response->body("is_https"); + } + +In the above example http://localhost/root/scheme would match the first +action (is_http) but https://localhost/root/scheme would match the second. + +As an added benefit, if an action or action chain defines a Scheme, when using +$c->uri_for the scheme of the generated URL will use what you define in the action +or action chain (the current behavior is to set the scheme based on the current +incoming request). This makes it easier to use uri_for on websites where some +paths are secure and others are not. You may also use this to other schemes +like websockets. + +See L for more. + =head1 OPTIONAL METHODS =head2 _parse_[$name]_attr diff --git a/lib/Catalyst/DispatchType/Chained.pm b/lib/Catalyst/DispatchType/Chained.pm index 05fc514..e29e5b5 100644 --- a/lib/Catalyst/DispatchType/Chained.pm +++ b/lib/Catalyst/DispatchType/Chained.pm @@ -8,6 +8,7 @@ use Catalyst::ActionChain; use Catalyst::Utils; use URI; use Scalar::Util (); +use Encode 2.21 'decode_utf8'; has _endpoints => ( is => 'rw', @@ -102,6 +103,7 @@ sub list { my $parent = "DUMMY"; my $extra = $self->_list_extra_http_methods($endpoint); my $consumes = $self->_list_extra_consumes($endpoint); + my $scheme = $self->_list_extra_scheme($endpoint); my $curr = $endpoint; while ($curr) { if (my $cap = $curr->list_extra_info->{CaptureArgs}) { @@ -133,14 +135,18 @@ sub list { if (defined(my $ct = $p->list_extra_info->{Consumes})) { $name .= ' :'.$ct; } + if (defined(my $s = $p->list_extra_info->{Scheme})) { + $scheme = uc $s; + } unless ($p eq $parents[0]) { $name = "-> ${name}"; } push(@rows, [ '', $name ]); } - push(@rows, [ '', (@rows ? "=> " : '').($extra ? "$extra " : '')."/${endpoint}". ($consumes ? " :$consumes":"" ) ]); - $rows[0][0] = join('/', '', @parts) || '/'; + push(@rows, [ '', (@rows ? "=> " : '').($extra ? "$extra " : ''). ($scheme ? "$scheme: ":'')."/${endpoint}". ($consumes ? " :$consumes":"" ) ]); + my @display_parts = map { $_ =~s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; decode_utf8 $_ } @parts; + $rows[0][0] = join('/', '', @display_parts) || '/'; $paths->row(@$_) for @rows; } @@ -162,6 +168,11 @@ sub _list_extra_consumes { return join(', ', @{$action->list_extra_info->{CONSUMES}}); } +sub _list_extra_scheme { + my ( $self, $action ) = @_; + return unless defined $action->list_extra_info->{Scheme}; + return uc $action->list_extra_info->{Scheme}; +} =head2 $self->match( $c, $path ) @@ -362,9 +373,12 @@ sub register { ); } - $action->attributes->{PathPart} = [ $part ]; + my $encoded_part = URI->new($part)->canonical; + $encoded_part =~ s{(?<=[^/])/+\z}{}; + + $action->attributes->{PathPart} = [ $encoded_part ]; - unshift(@{ $children->{$part} ||= [] }, $action); + unshift(@{ $children->{$encoded_part} ||= [] }, $action); $self->_actions->{'/'.$action->reverse} = $action; diff --git a/lib/Catalyst/DispatchType/Path.pm b/lib/Catalyst/DispatchType/Path.pm index 0578ff4..acf0f3a 100644 --- a/lib/Catalyst/DispatchType/Path.pm +++ b/lib/Catalyst/DispatchType/Path.pm @@ -6,6 +6,7 @@ extends 'Catalyst::DispatchType'; use Text::SimpleTable; use Catalyst::Utils; use URI; +use Encode 2.21 'decode_utf8'; has _paths => ( is => 'rw', @@ -60,7 +61,8 @@ sub list { my $display_path = "/$path/$parts"; $display_path =~ s{/{1,}}{/}g; - + $display_path =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; # deconvert urlencoded for pretty view + $display_path = decode_utf8 $display_path; # URI does encoding $paths->row( $display_path, "/$action" ); } } diff --git a/lib/Catalyst/Dispatcher.pm b/lib/Catalyst/Dispatcher.pm index 6fde402..12040b2 100644 --- a/lib/Catalyst/Dispatcher.pm +++ b/lib/Catalyst/Dispatcher.pm @@ -15,6 +15,7 @@ use Text::SimpleTable; use Tree::Simple; use Tree::Simple::Visitor::FindByPath; use Class::Load qw(load_class try_load_class); +use Encode 2.21 'decode_utf8'; use namespace::clean -except => 'meta'; @@ -108,6 +109,9 @@ sub dispatch { } else { my $path = $c->req->path; + $path =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + $path = decode_utf8($path); + my $error = $path ? qq/Unknown resource "$path"/ : "No default action defined"; @@ -385,10 +389,14 @@ sub prepare_action { s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg for grep { defined } @{$req->captures||[]}; - $c->log->debug( 'Path is "' . $req->match . '"' ) - if ( $c->debug && defined $req->match && length $req->match ); + if($c->debug && defined $req->match && length $req->match) { + my $match = $req->match; + $match =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + $match = decode_utf8($match); + $c->log->debug( 'Path is "' . $match . '"' ) + } - $c->log->debug( 'Arguments are "' . join( '/', @args ) . '"' ) + $c->log->debug( 'Arguments are "' . join( '/', map { decode_utf8 $_ } @args ) . '"' ) if ( $c->debug && @args ); } diff --git a/lib/Catalyst/Engine.pm b/lib/Catalyst/Engine.pm index 225a09a..948f28f 100644 --- a/lib/Catalyst/Engine.pm +++ b/lib/Catalyst/Engine.pm @@ -7,16 +7,12 @@ use CGI::Simple::Cookie; use Data::Dump qw/dump/; use Errno 'EWOULDBLOCK'; use HTML::Entities; -use HTTP::Body; use HTTP::Headers; -use URI::QueryParam; use Plack::Loader; use Catalyst::EngineLoader; -use Encode (); +use Encode 2.21 'decode_utf8'; use Plack::Request::Upload; use Hash::MultiValue; -use utf8; - use namespace::clean -except => 'meta'; # Amount of data to read from input on each pass @@ -593,7 +589,9 @@ sub prepare_query_parameters { # Check for keywords (no = signs) # (yes, index() is faster than a regex :)) if ( index( $query_string, '=' ) < 0 ) { - $c->request->query_keywords($self->unescape_uri($query_string)); + my $keywords = $self->unescape_uri($query_string); + $keywords = decode_utf8 $keywords; + $c->request->query_keywords($keywords); return; } @@ -607,10 +605,13 @@ sub prepare_query_parameters { for my $item ( @params ) { my ($param, $value) - = map { $self->unescape_uri($_) } + = map { decode_utf8($self->unescape_uri($_)) } split( /=/, $item, 2 ); - $param = $self->unescape_uri($item) unless defined $param; + unless(defined $param) { + $param = $self->unescape_uri($item); + $param = decode_utf8 $param; + } if ( exists $query{$param} ) { if ( ref $query{$param} ) { @@ -668,20 +669,26 @@ sub prepare_uploads { my $request = $c->request; return unless $request->_body; + my $enc = $c->encoding; my $uploads = $request->_body->upload; my $parameters = $request->parameters; foreach my $name (keys %$uploads) { + $name = $c->_handle_unicode_decoding($name) if $enc; my $files = $uploads->{$name}; my @uploads; for my $upload (ref $files eq 'ARRAY' ? @$files : ($files)) { my $headers = HTTP::Headers->new( %{ $upload->{headers} } ); + my $filename = $upload->{filename}; + $filename = $c->_handle_unicode_decoding($filename) if $enc; + my $u = Catalyst::Request::Upload->new ( size => $upload->{size}, type => scalar $headers->content_type, + charset => scalar $headers->content_type_charset, headers => $headers, tempname => $upload->{tempname}, - filename => $upload->{filename}, + filename => $filename, ); push @uploads, $u; } diff --git a/lib/Catalyst/Log.pm b/lib/Catalyst/Log.pm index e70197f..a599284 100644 --- a/lib/Catalyst/Log.pm +++ b/lib/Catalyst/Log.pm @@ -141,6 +141,7 @@ sub _send_to_log { if ($self->can('_has_psgi_errors') and $self->_has_psgi_errors) { $self->_psgi_errors->print(@_); } else { + binmode STDERR, ":utf8"; print STDERR @_; } } diff --git a/lib/Catalyst/Middleware/Stash.pm b/lib/Catalyst/Middleware/Stash.pm index bd02b9c..e31f2d6 100644 --- a/lib/Catalyst/Middleware/Stash.pm +++ b/lib/Catalyst/Middleware/Stash.pm @@ -9,12 +9,12 @@ use Carp 'croak'; our @EXPORT_OK = qw(stash get_stash); -sub PSGI_KEY { 'Catalyst.Stash.v1' }; +sub PSGI_KEY () { 'Catalyst.Stash.v1' } sub get_stash { my $env = shift; - return $env->{&PSGI_KEY} || - _init_stash_in($env); + return $env->{+PSGI_KEY} || + croak "You requested a stash, but one does not exist."; } sub stash { @@ -38,16 +38,13 @@ sub _create_stash { }; } -sub _init_stash_in { - my ($env) = @_; - return $env->{&PSGI_KEY} ||= - _create_stash; -} - sub call { my ($self, $env) = @_; - _init_stash_in($env); - return $self->app->($env); + my $new_env = +{ %$env }; + my %stash = %{ ($env->{+PSGI_KEY} || sub {})->() || +{} }; + + $new_env->{+PSGI_KEY} = _create_stash( \%stash ); + return $self->app->($new_env); } =head1 NAME @@ -63,6 +60,15 @@ alone distribution We store a coderef under the C which can be dereferenced with key values or nothing to access the underly hashref. +The stash middleware is designed so that you can 'nest' applications that +use it. If for example you have a L application that is called +by a controller under a parent L application, the child application +will inherit the full stash of the parent BUT any new keys added by the child +will NOT bubble back up to the parent. However, children of children will. + +For more information the current test case t/middleware-stash.t is the best +documentation. + =head1 SUBROUTINES This class defines the following subroutines. @@ -104,7 +110,7 @@ clients. Stash key / value are stored in memory. ["I found $stashed in the stash!"]]; }; -If the stash does not yet exist, we initialize one and return that. +If the stash does not yet exist, an exception is thrown. =head1 METHODS diff --git a/lib/Catalyst/Request.pm b/lib/Catalyst/Request.pm index 671dd51..5e57305 100644 --- a/lib/Catalyst/Request.pm +++ b/lib/Catalyst/Request.pm @@ -10,6 +10,7 @@ use HTTP::Headers; use Stream::Buffered; use Hash::MultiValue; use Scalar::Util; +use HTTP::Body; use Catalyst::Exception; use Moose; @@ -316,7 +317,7 @@ sub prepare_body_chunk { } sub prepare_body_parameters { - my ( $self ) = @_; + my ( $self, $c ) = @_; $self->prepare_body if ! $self->_has_body; @@ -324,9 +325,29 @@ sub prepare_body_parameters { return $self->_use_hash_multivalue ? Hash::MultiValue->new : {}; } + my $params = $self->_body->param; + + # If we have an encoding configured (like UTF-8) in general we expect a client + # to POST with the encoding we fufilled the request in. Otherwise don't do any + # encoding (good change wide chars could be in HTML entity style llike the old + # days -JNAP + + # so, now that HTTP::Body prepared the body params, we gotta 'walk' the structure + # and do any needed decoding. + + # This only does something if the encoding is set via the encoding param. Remember + # this is assuming the client is not bad and responds with what you provided. In + # general you can just use utf8 and get away with it. + # + # I need to see if $c is here since this also doubles as a builder for the object :( + + if($c and $c->encoding) { + $params = $c->_handle_unicode_decoding($params); + } + return $self->_use_hash_multivalue ? - Hash::MultiValue->from_mixed($self->_body->param) : - $self->_body->param; + Hash::MultiValue->from_mixed($params) : + $params; } sub prepare_connection { @@ -938,7 +959,7 @@ sub mangle_params { next unless defined $value; for ( ref $value eq 'ARRAY' ? @$value : $value ) { $_ = "$_"; - utf8::encode( $_ ) if utf8::is_utf8($_); + # utf8::encode($_); } }; diff --git a/lib/Catalyst/Request/Upload.pm b/lib/Catalyst/Request/Upload.pm index d8e58be..6df2dff 100644 --- a/lib/Catalyst/Request/Upload.pm +++ b/lib/Catalyst/Request/Upload.pm @@ -15,6 +15,8 @@ has size => (is => 'rw'); has tempname => (is => 'rw'); has type => (is => 'rw'); has basename => (is => 'ro', lazy_build => 1); +has raw_basename => (is => 'ro', lazy_build => 1); +has charset => (is=>'ro', predicate=>'has_charset'); has fh => ( is => 'rw', @@ -29,17 +31,21 @@ has fh => ( Catalyst::Exception->throw( message => qq/Can't open '$filename': '$!'/ ); } - return $fh; }, ); sub _build_basename { + my $basename = shift->raw_basename; + $basename =~ s|[^\w\.-]+|_|g; + return $basename; +} + +sub _build_raw_basename { my $self = shift; my $basename = $self->filename; $basename =~ s|\\|/|g; $basename = ( File::Spec::Unix->splitpath($basename) )[2]; - $basename =~ s|[^\w\.-]+|_|g; return $basename; } @@ -58,13 +64,16 @@ Catalyst::Request::Upload - handles file upload requests $upload->basename; $upload->copy_to; $upload->fh; + $upload->decoded_fh $upload->filename; $upload->headers; $upload->link_to; $upload->size; $upload->slurp; + $upload->decoded_slurp; $upload->tempname; $upload->type; + $upload->charset; To specify where Catalyst should put the temporary files, set the 'uploadtmp' option in the Catalyst config. If unset, Catalyst will use the system temp dir. @@ -97,10 +106,56 @@ sub copy_to { return File::Copy::copy( $self->tempname, @_ ); } +=head2 $upload->is_utf8_encoded + +Returns true of the upload defines a character set at that value is 'UTF-8'. +This does not try to inspect your upload and make any guesses if the Content +Type charset is undefined. + +=cut + +sub is_utf8_encoded { + my $self = shift; + if(my $charset = $self->charset) { + return $charset eq 'UTF-8' ? 1 : 0; + } + return 0; +} + =head2 $upload->fh Opens a temporary file (see tempname below) and returns an L handle. +This is a filehandle that is opened with no additional IO Layers. + +=head2 $upload->decoded_fh(?$encoding) + +Returns a filehandle that has binmode set to UTF-8 if a UTF-8 character set +is found. This also accepts an override encoding value that you can use to +force a particular L layer. If neither are found the filehandle is +set to :raw. + +This is useful if you are pulling the file into code and inspecting bit and +maybe then sending those bits back as the response. (Please not this is not +a suitable filehandle to set in the body; use C if you are doing that). + +Please note that using this method sets the underlying filehandle IO layer +so once you use this method if you go back and use the C method you +still get the IO layer applied. + +=cut + +sub decoded_fh { + my ($self, $layer) = @_; + my $fh = $self->fh; + + $layer = ":encoding(UTF-8)" if !$layer && $self->is_utf8_encoded; + $layer = ':raw' unless $layer; + + binmode($fh, $layer); + return $fh; +} + =head2 $upload->filename Returns the client-supplied filename. @@ -127,13 +182,17 @@ sub link_to { Returns the size of the uploaded file in bytes. -=head2 $upload->slurp +=head2 $upload->slurp(?$encoding) + +Optionally accepts an argument to define an IO Layer (which is applied to +the filehandle via binmode; if no layer is defined the default is set to +":raw". Returns a scalar containing the contents of the temporary file. Note that this will cause the filehandle pointed to by C<< $upload->fh >> to be reset to the start of the file using seek and the file handle to be put -into binary mode. +into whatever encoding mode is applied. =cut @@ -158,9 +217,39 @@ sub slurp { return $content; } +=head2 $upload->decoded_slurp(?$encoding) + +Works just like C except we use C instead of C to +open a filehandle to slurp. This means if your upload charset is UTF8 +we binmode the filehandle to that encoding. + +=cut + +sub decoded_slurp { + my ( $self, $layer ) = @_; + my $handle = $self->decoded_fh($layer); + + my $content = undef; + $handle->seek(0, IO::File::SEEK_SET); + while ( $handle->sysread( my $buffer, 8192 ) ) { + $content .= $buffer; + } + + $handle->seek(0, IO::File::SEEK_SET); + return $content; +} + =head2 $upload->basename -Returns basename for C. +Returns basename for C. This filters the name through a regexp +C to make it safe for filesystems that don't +like advanced characters. This will of course filter UTF8 characters. +If you need the exact basename unfiltered use C. + +=head2 $upload->raw_basename + +Just like C but without filtering the filename for characters that +don't always write to a filesystem. =head2 $upload->tempname @@ -170,6 +259,11 @@ Returns the path to the temporary file. Returns the client-supplied Content-Type. +=head2 $upload->charset + +The character set information part of the content type, if any. Useful if you +need to figure out any encodings on the file upload. + =head2 meta Provided by Moose diff --git a/lib/Catalyst/Response.pm b/lib/Catalyst/Response.pm index f049ebf..28cf7b7 100644 --- a/lib/Catalyst/Response.pm +++ b/lib/Catalyst/Response.pm @@ -4,9 +4,20 @@ use Moose; use HTTP::Headers; use Moose::Util::TypeConstraints; use namespace::autoclean; +use Scalar::Util 'blessed'; +use Catalyst::Response::Writer; +use Catalyst::Utils (); with 'MooseX::Emulate::Class::Accessor::Fast'; +our $DEFAULT_ENCODE_CONTENT_TYPE_MATCH = qr{text|xml$|javascript$}; + +has encodable_content_type => ( + is => 'rw', + required => 1, + default => sub { $DEFAULT_ENCODE_CONTENT_TYPE_MATCH } +); + has _response_cb => ( is => 'ro', isa => 'CodeRef', @@ -51,7 +62,17 @@ has write_fh => ( builder=>'_build_write_fh', ); -sub _build_write_fh { shift ->_writer } +sub _build_write_fh { + my $writer = $_[0]->_writer; # We need to get the finalize headers side effect... + my $requires_encoding = $_[0]->encodable_response; + my %fields = ( + _writer => $writer, + _encoding => $_[0]->_context->encoding, + _requires_encoding => $requires_encoding, + ); + + return bless \%fields, 'Catalyst::Response::Writer'; +} sub DEMOLISH { my $self = shift; @@ -71,7 +92,7 @@ has finalized_headers => (is => 'rw', default => 0); has headers => ( is => 'rw', isa => 'HTTP::Headers', - handles => [qw(content_encoding content_length content_type header)], + handles => [qw(content_encoding content_length content_type content_type_charset header)], default => sub { HTTP::Headers->new() }, required => 1, lazy => 1, @@ -86,9 +107,9 @@ before [qw(status headers content_encoding content_length content_type header)] my $self = shift; $self->_context->log->warn( - "Useless setting a header value after finalize_headers called." . + "Useless setting a header value after finalize_headers and the response callback has been called." . " Not what you want." ) - if ( $self->finalized_headers && @_ ); + if ( $self->finalized_headers && !$self->_has_response_cb && @_ ); }; sub output { shift->body(@_) } @@ -103,6 +124,10 @@ sub write { $buffer = q[] unless defined $buffer; + if($self->encodable_response) { + $buffer = $self->_context->encoding->encode( $buffer, $self->_context->_encode_check ) + } + my $len = length($buffer); $self->_writer->write($buffer); @@ -116,6 +141,9 @@ sub finalize_headers { sub from_psgi_response { my ($self, $psgi_res) = @_; + if(blessed($psgi_res) && $psgi_res->can('as_psgi')) { + $psgi_res = $psgi_res->as_psgi; + } if(ref $psgi_res eq 'ARRAY') { my ($status, $headers, $body) = @$psgi_res; $self->status($status); @@ -175,6 +203,22 @@ you might want to use a L type of object (Something that implements in the same fashion), or a filehandle GLOB. Catalyst will write it piece by piece into the response. +If you are using a filehandle as the body response you are responsible for +making sure it comforms to the L specification with regards to content +encoding. Unlike with scalar body values or when using the streaming interfaces +we currently do not attempt to normalize and encode your filehandle. In general +this means you should be sure to be sending bytes not UTF8 decoded multibyte +characters. + +Most of the time when you do: + + open(my $fh, '<:raw', $path); + +You should be fine. If you open a filehandle with a L layer you probably +are not fine. You can usually fix this by explicitly using binmode to set +the IOLayer to :raw. Its possible future versions of L will try to +'do the right thing'. + When using a L type of object and no content length has been already set in the response headers Catalyst will make a reasonable attempt to determine the size of the Handle. Depending on the implementation of your @@ -286,6 +330,10 @@ This value is typically set by your view or plugin. For example, L will guess the mime type based on the file it found, while L defaults to C. +=head2 $res->content_type_charset + +Shortcut for $res->headers->content_type_charset; + =head2 $res->cookies Returns a reference to a hash containing cookies to be set. The keys of the @@ -347,6 +395,12 @@ qualified (= C, etc.) or that starts with a slash thing and is not a standard behaviour. You may opt to use uri_for() or uri_for_action() instead. +B If $url is an object that does ->as_string (such as L, which is +what you get from ->uri_for) we automatically call that to stringify. This +should ease the common case usage + + return $c->res->redirect( $c->uri_for(...)); + =cut sub redirect { @@ -356,6 +410,10 @@ sub redirect { my $location = shift; my $status = shift || 302; + if(blessed($location) && $location->can('as_string')) { + $location = $location->as_string; + } + $self->location($location); $self->status($status); } @@ -377,13 +435,39 @@ $res->code is an alias for this, to match HTTP::Response->code. =head2 $res->write( $data ) -Writes $data to the output stream. +Writes $data to the output stream. Calling this method will finalize your +headers and send the headers and status code response to the client (so changing +them afterwards is a waste... be sure to set your headers correctly first). + +You may call this as often as you want throughout your response cycle. You may +even set a 'body' afterward. So for example you might write your HTTP headers +and the HEAD section of your document and then set the body from a template +driven from a database. In some cases this can seem to the client as if you had +a faster overall response (but note that unless your server support chunked +body your content is likely to get queued anyway (L and most other +http 1.1 webservers support this). + +If there is an encoding set, we encode each line of the response (the default +encoding is UTF-8). =head2 $res->write_fh -Returns a PSGI $writer object that has two methods, write and close. You can -close over this object for asynchronous and nonblocking applications. For -example (assuming you are using a supporting server, like L +Returns an instance of L, which is a lightweight +decorator over the PSGI C<$writer> object (see L). + +In addition to proxying the C and C method from the underlying PSGI +writer, this proxy object knows any application wide encoding, and provides a method +C that will properly encode your written lines based upon your +encoding settings. By default in L responses are UTF-8 encoded and this +is the encoding used if you respond via C. If you want to handle +encoding yourself, you can use the C method directly. + +Encoding only applies to content types for which it matters. Currently the following +content types are assumed to need encoding: text (including HTML), xml and javascript. + +We provide access to this object so that you can properly close over it for use in +asynchronous and nonblocking applications. For example (assuming you are using a supporting +server, like L: package AsyncExample::Controller::Root; @@ -413,6 +497,10 @@ example (assuming you are using a supporting server, like L }); } +Like the 'write' method, calling this will finalize headers. Unlike 'write' when you +can this it is assumed you are taking control of the response so the body is never +finalized (there isn't one anyway) and you need to call the close method. + =head2 $res->print( @data ) Prints @data to the output stream, separated by $,. This lets you pass @@ -430,6 +518,8 @@ a $responder) set the response from it. Properly supports streaming and delayed response and / or async IO if running under an expected event loop. +If passed an object, will expect that object to do a method C. + Example: package MyApp::Web::Controller::Test; @@ -449,6 +539,67 @@ Example: Please note this does not attempt to map or nest your PSGI application under the Controller and Action namespace or path. +=head2 encodable_content_type + +This is a regular expression used to determine of the current content type +should be considered encodable. Currently we apply default encoding (usually +UTF8) to text type contents. Here's the default regular expression: + +This would match content types like: + + text/plain + text/html + text/xml + application/javascript + application/xml + application/vnd.user+xml + +B: We don't encode JSON content type responses by default since most +of the JSON serializers that are commonly used for this task will do so +automatically and we don't want to double encode. If you are not using a +tool like L to produce JSON type content, (for example you are using +a template system, or creating the strings manually) you will need to either +encoding the body yourself: + + $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) ); + +Or you can alter the regular expression using this attribute. + +=head2 encodable_response + +Given a L return true if its one that can be encoded. + + make sure there is an encoding set on the response + make sure the content type is encodable + make sure no content type charset has been already set to something different from the global encoding + make sure no content encoding is present. + +Note this does not inspect a body since we do allow automatic encoding on streaming +type responses. + +=cut + +sub encodable_response { + my ($self) = @_; + return 0 unless $self->_context; # Cases like returning a HTTP Exception response you don't have a context here... + return 0 unless $self->_context->encoding; + + my $has_manual_charset = 0; + if(my $charset = $self->content_type_charset) { + $has_manual_charset = (uc($charset) ne uc($self->_context->encoding->mime_name)) ? 1:0; + } + + if( + ($self->content_type =~ m/${\$self->encodable_content_type}/) and + (!$has_manual_charset) and + (!$self->content_encoding || $self->content_encoding eq 'identity' ) + ) { + return 1; + } else { + return 0; + } +} + =head2 DEMOLISH Ensures that the response is flushed and closed at the end of the diff --git a/lib/Catalyst/Response/Writer.pm b/lib/Catalyst/Response/Writer.pm new file mode 100644 index 0000000..55cbdd1 --- /dev/null +++ b/lib/Catalyst/Response/Writer.pm @@ -0,0 +1,65 @@ +package Catalyst::Response::Writer; + +sub write { shift->{_writer}->write(@_) } +sub close { shift->{_writer}->close } + +sub write_encoded { + my ($self, $line) = @_; + if((my $enc = $self->{_encoding}) && $self->{_requires_encoding}) { + # Not going to worry about CHECK arg since Unicode always croaks I think - jnap + $line = $enc->encode($line); + } + + $self->write($line); +} + +=head1 TITLE + +Catalyst::Response::Writer - Proxy over the PSGI Writer + +=head1 SYNOPSIS + + sub myaction : Path { + my ($self, $c) = @_; + my $w = $c->response->writer_fh; + + $w->write("hello world"); + $w->close; + } + +=head1 DESCRIPTION + +This wraps the PSGI writer (see L) +for more. We wrap this object so we can provide some additional methods that +make sense from inside L + +=head1 METHODS + +This class does the following methods + +=head2 write + +=head2 close + +These delegate to the underlying L writer object + +=head2 write_encoded + +If the application defines a response encoding (default is UTF8) and the +content type is a type that needs to be encoded (text types like HTML or XML and +Javascript) we first encode the line you want to write. This is probably the +thing you want to always do. If you use the L<\write> method directly you will +need to handle your own encoding. + +=head1 AUTHORS + +Catalyst Contributors, see Catalyst.pm + +=head1 COPYRIGHT + +This library is free software. You can redistribute it and/or modify +it under the same terms as Perl itself. + +=cut + +1; diff --git a/lib/Catalyst/Runtime.pm b/lib/Catalyst/Runtime.pm index 4cc1f98..416f0de 100644 --- a/lib/Catalyst/Runtime.pm +++ b/lib/Catalyst/Runtime.pm @@ -7,7 +7,8 @@ BEGIN { require 5.008003; } # Remember to update this in Catalyst as well! -our $VERSION = '5.90078'; +our $VERSION = '5.90079_005'; +$VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases =head1 NAME diff --git a/lib/Catalyst/Utils.pm b/lib/Catalyst/Utils.pm index 5ee7451..1bccecb 100644 --- a/lib/Catalyst/Utils.pm +++ b/lib/Catalyst/Utils.pm @@ -10,7 +10,6 @@ use Cwd; use Class::Load 'is_class_loaded'; use String::RewritePrefix; use Class::Load (); - use namespace::clean; =head1 NAME @@ -503,6 +502,8 @@ sub apply_registered_middleware { return $new_psgi; } + + =head1 PSGI Helpers Utility functions to make it easier to work with PSGI applications under Catalyst diff --git a/t/aggregate/to_app.t b/t/aggregate/to_app.t new file mode 100644 index 0000000..7bcb497 --- /dev/null +++ b/t/aggregate/to_app.t @@ -0,0 +1,11 @@ +use strict; +use warnings; +use FindBin; +use lib "$FindBin::Bin/../lib"; +use TestApp; +use Test::More; + +ok(TestApp->can('to_app')); +is(ref(TestApp->to_app), 'CODE'); + +done_testing; diff --git a/t/aggregate/unit_core_uri_for_multibytechar.t b/t/aggregate/unit_core_uri_for_multibytechar.t index b167818..f6d4f7c 100644 --- a/t/aggregate/unit_core_uri_for_multibytechar.t +++ b/t/aggregate/unit_core_uri_for_multibytechar.t @@ -1,3 +1,4 @@ +use utf8; use strict; use warnings; use FindBin; @@ -38,14 +39,17 @@ is($context->req->uri_with({ name => "\x{6751}\x{702c}\x{5927}\x{8f14}" }), $uri my $action = $context->controller('Action::Chained') ->action_for('roundtrip_urifor_end'); -{ -use utf8; - is($context->uri_for($action, ['hütte'], 'hütte', { test => 'hütte' }), 'http://127.0.0.1/chained/roundtrip_urifor/h%C3%BCtte/h%C3%BCtte?test=h%C3%BCtte', 'uri_for with utf8 captures and args'); -} + +is( + $context->uri_for($action, ['♥'], '♥', { '♥' => '♥'}), + 'http://127.0.0.1/chained/roundtrip_urifor/' . '%E2%99%A5' . '/' . '%E2%99%A5' . '?' . '%E2%99%A5' . '=' . '%E2%99%A5', + 'uri_for with utf8 captures and args'); + +# ^ the match string is purposefully broken up to aid viewing, please to 'fix' it. done_testing; diff --git a/t/aggregate/utf8_content_length.t b/t/aggregate/utf8_content_length.t index 64d4eb8..bf71b8e 100644 --- a/t/aggregate/utf8_content_length.t +++ b/t/aggregate/utf8_content_length.t @@ -29,4 +29,3 @@ my $size = -s $fn; } done_testing; - diff --git a/t/dispatch_on_scheme.t b/t/dispatch_on_scheme.t new file mode 100644 index 0000000..1da72a2 --- /dev/null +++ b/t/dispatch_on_scheme.t @@ -0,0 +1,123 @@ +use warnings; +use strict; +use Test::More; +use HTTP::Request::Common; + +# Test cases for dispatching on URI Scheme + +{ + package MyApp::Controller::Root; + $INC{'MyApp/Controller/Root.pm'} = __FILE__; + + use base 'Catalyst::Controller'; + + sub is_http :Path(scheme) Scheme(http) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'http'; + $c->response->body("is_http"); + } + + sub is_https :Path(scheme) Scheme(https) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'https'; + $c->response->body("is_https"); + } + + sub base :Chained('/') CaptureArgs(0) { } + + sub is_http_chain :GET Chained('base') PathPart(scheme) Scheme(http) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'http'; + $c->response->body("base/is_http"); + } + + sub is_https_chain :Chained('base') PathPart(scheme) Scheme(https) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'https'; + $c->response->body("base/is_https"); + } + + sub uri_for1 :Chained('base') Scheme(https) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'https'; + $c->response->body($c->uri_for($c->action)->as_string); + } + + sub uri_for2 :Chained('base') Scheme(https) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'https'; + $c->response->body($c->uri_for($self->action_for('is_http'))->as_string); + } + + sub uri_for3 :Chained('base') Scheme(http) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'http'; + $c->response->body($c->uri_for($self->action_for('endpoint'))->as_string); + } + + sub base2 :Chained('/') CaptureArgs(0) { } + sub link :Chained(base2) Scheme(https) CaptureArgs(0) { } + sub endpoint :Chained(link) Args(0) { + my ($self, $c) = @_; + Test::More::is $c->action->scheme, 'https'; + $c->response->body("end"); + } + + + package MyApp; + use Catalyst; + + Test::More::ok(MyApp->setup, 'setup app'); +} + +use Catalyst::Test 'MyApp'; + +{ + my $res = request "/root/scheme"; + is $res->code, 200, 'OK'; + is $res->content, 'is_http', 'correct body'; +} + +{ + my $res = request "https://localhost/root/scheme"; + is $res->code, 200, 'OK'; + is $res->content, 'is_https', 'correct body'; +} + +{ + my $res = request "/base/scheme"; + is $res->code, 200, 'OK'; + is $res->content, 'base/is_http', 'correct body'; +} + +{ + my $res = request "https://localhost/base/scheme"; + is $res->code, 200, 'OK'; + is $res->content, 'base/is_https', 'correct body'; +} + +{ + my $res = request "https://localhost/base/uri_for1"; + is $res->code, 200, 'OK'; + is $res->content, 'https://localhost/base/uri_for1', 'correct body'; +} + +{ + my $res = request "https://localhost/base/uri_for2"; + is $res->code, 200, 'OK'; + is $res->content, 'http://localhost/root/scheme', 'correct body'; +} + +{ + my $res = request "/base/uri_for3"; + is $res->code, 200, 'OK'; + is $res->content, 'https://localhost/base2/link/endpoint', 'correct body'; +} + +{ + my $res = request "https://localhost/base2/link/endpoint"; + is $res->code, 200, 'OK'; + is $res->content, 'end', 'correct body'; +} + +done_testing; diff --git a/t/lib/TestAppEncoding/Controller/Root.pm b/t/lib/TestAppEncoding/Controller/Root.pm index b82e1bf..1c42bfa 100644 --- a/t/lib/TestAppEncoding/Controller/Root.pm +++ b/t/lib/TestAppEncoding/Controller/Root.pm @@ -8,6 +8,7 @@ __PACKAGE__->config->{namespace} = ''; sub binary : Local { my ($self, $c) = @_; + $c->res->content_type('image/gif'); $c->res->body(do { open(my $fh, '<', $c->path_to('..', '..', 'catalyst_130pix.gif')) or die $!; binmode($fh); @@ -31,12 +32,8 @@ sub utf8_non_ascii_content : Local { my $str = 'ʇsʎlɐʇɐɔ'; # 'catalyst' flipped at http://www.revfad.com/flip.html ok utf8::is_utf8($str), '$str is in UTF8 internally'; - - # encode $str into a sequence of octets and turn off the UTF-8 flag, so that - # we don't get the 'Wide character in syswrite' error in Catalyst::Engine - utf8::encode($str); - ok !utf8::is_utf8($str), '$str is a sequence of octets (byte string)'; - + + $c->res->content_type('text/plain'); $c->res->body($str); } diff --git a/t/lib/TestAppUnicode.pm b/t/lib/TestAppUnicode.pm index 55359f7..8338f3d 100644 --- a/t/lib/TestAppUnicode.pm +++ b/t/lib/TestAppUnicode.pm @@ -3,7 +3,7 @@ use strict; use warnings; use TestLogger; use base qw/Catalyst/; -use Catalyst qw/Unicode::Encoding/; +use Catalyst; __PACKAGE__->config( 'name' => 'TestAppUnicode', diff --git a/t/middleware-stash.t b/t/middleware-stash.t new file mode 100644 index 0000000..baeb108 --- /dev/null +++ b/t/middleware-stash.t @@ -0,0 +1,52 @@ +use warnings; +use strict; + +{ + + package MyAppChild::Controller::User; + $INC{'MyAppChild/Controller/User.pm'} = __FILE__; + + use base 'Catalyst::Controller'; + use Test::More; + + sub stash :Local { + my ($self, $c) = @_; + $c->stash->{inner} = "inner"; + $c->res->body( "inner: ${\$c->stash->{inner}}, outer: ${\$c->stash->{outer}}"); + + is_deeply [sort {$a cmp $b} keys(%{$c->stash})], ['inner','outer'], 'both keys in stash'; + } + + package MyAppChild; + $INC{'MyAppChild.pm'} = __FILE__; + + use Catalyst; + MyAppChild->setup; + + package MyAppParent::Controller::User; + $INC{'MyAppParent/Controller/User.pm'} = __FILE__; + + use base 'Catalyst::Controller'; + use Test::More; + + sub stash :Local { + my ($self, $c) = @_; + $c->stash->{outer} = "outer"; + $c->res->from_psgi_response( MyAppChild->to_app->($c->req->env) ); + + is_deeply [keys(%{$c->stash})], ['outer'], 'only one key in stash'; + } + + package MyAppParent; + use Catalyst; + MyAppParent->setup; + +} + +use Test::More; +use Catalyst::Test 'MyAppParent'; + +my $res = request '/user/stash'; +is $res->content, 'inner: inner, outer: outer', 'got expected response'; + +done_testing; diff --git a/t/psgi-log.t b/t/psgi-log.t index 56b9dad..91a36dc 100644 --- a/t/psgi-log.t +++ b/t/psgi-log.t @@ -72,7 +72,7 @@ my $cmp = TestApp->debug ? '>=' : '=='; my $res = $cb->(GET "/log/info"); my @logs = $handle->logs; cmp_ok(scalar(@logs), $cmp, 1, "psgi.errors: one event output"); - like($logs[0], qr/info$/m, "psgi.errors: event matches test data"); + like($logs[0], qr/info$/m, "psgi.errors: event matches test data") unless TestApp->debug; }; }; diff --git a/t/psgi_utils.t b/t/psgi_utils.t index 078dd82..9c05559 100644 --- a/t/psgi_utils.t +++ b/t/psgi_utils.t @@ -9,6 +9,12 @@ my $psgi_app = sub { }; { + package MyApp::PSGIObject; + + sub as_psgi { + return [200, ['Content-Type' => 'text/plain'], ['as_psgi']]; + }; + package MyApp::Controller::Docs; $INC{'MyApp/Controller/Docs.pm'} = __FILE__; @@ -16,6 +22,12 @@ my $psgi_app = sub { use Plack::Request; use Catalyst::Utils; + sub as_psgi :Local { + my ($self, $c) = @_; + my $as_psgi = bless +{}, 'MyApp::PSGIObject'; + $c->res->from_psgi_response($as_psgi); + } + sub name :Local { my ($self, $c) = @_; my $env = $c->Catalyst::Utils::env_at_action; @@ -122,6 +134,11 @@ use Test::More; use Catalyst::Test 'MyApp'; { + my ($res, $c) = ctx_request('/docs/as_psgi'); + is $res->content, 'as_psgi'; +} + +{ my ($res, $c) = ctx_request('/user/mounted/111?path_prefix=1'); is $c->action, 'user/mounted'; is $res->content, 'http://localhost/user/user/local_example_args1/111'; @@ -367,32 +384,3 @@ use Catalyst::Test 'MyApp'; } done_testing(); - -__END__ - - -use Plack::App::URLMap; -use HTTP::Request::Common; -use HTTP::Message::PSGI; - -my $urlmap = Plack::App::URLMap->new; - -my $app1 = sub { - my $env = shift; - return [200, [], [ - "REQUEST_URI: $env->{REQUEST_URI}, FROM: $env->{MAP_TO}, PATH_INFO: $env->{PATH_INFO}, SCRIPT_NAME $env->{SCRIPT_NAME}"]]; -}; - -$urlmap->map("/" => sub { my $env = shift; $env->{MAP_TO} = '/'; $app1->($env)}); -$urlmap->map("/foo" => sub { my $env = shift; $env->{MAP_TO} = '/foo'; $app1->($env)}); -$urlmap->map("/bar/baz" => sub { my $env = shift; $env->{MAP_TO} = '/foo/bar'; $app1->($env)}); - -my $app = $urlmap->to_app; - -warn $app->(req_to_psgi(GET '/'))->[2]->[0]; -warn $app->(req_to_psgi(GET '/111'))->[2]->[0]; -warn $app->(req_to_psgi(GET '/foo'))->[2]->[0]; -warn $app->(req_to_psgi(GET '/foo/222'))->[2]->[0]; -warn $app->(req_to_psgi(GET '/bar/baz'))->[2]->[0]; -warn $app->(req_to_psgi(GET '/bar/baz/333'))->[2]->[0]; - diff --git a/t/undef-params.t b/t/undef-params.t index a6d7594..d592606 100644 --- a/t/undef-params.t +++ b/t/undef-params.t @@ -33,7 +33,8 @@ use Plack::Test; $SIG{__WARN__} = sub { my $error = shift; - Test::More::is($error, "You called ->params with an undefined value at t/undef-params.t line 20.\n"); + Test::More::is($error, "You called ->params with an undefined value at t/undef-params.t line 20.\n") + unless MyApp->debug; }; MyApp->setup, 'setup app'; diff --git a/t/unicode_plugin_charset_utf8.t b/t/unicode_plugin_charset_utf8.t index 81ba9f7..4c7c0c6 100644 --- a/t/unicode_plugin_charset_utf8.t +++ b/t/unicode_plugin_charset_utf8.t @@ -6,7 +6,7 @@ use lib "$Bin/lib"; use Data::Dumper; BEGIN { - $ENV{TESTAPP_ENCODING} = 'UTF-8'; + # $ENV{TESTAPP_ENCODING} = 'UTF-8'; # This is now default $ENV{TESTAPP_DEBUG} = 0; $ENV{CATALYST_DEBUG} = 0; } @@ -27,6 +27,6 @@ is scalar(@TestLogger::LOGS), 1 or diag Dumper(\@TestLogger::LOGS); like $TestLogger::LOGS[0], qr/content type is 'iso-8859-1'/; -like $TestLogger::ELOGS[0], qr/Unicode::Encoding plugin/; +#like $TestLogger::ELOGS[0], qr/Unicode::Encoding plugin/; #no longer a plugin done_testing; diff --git a/t/unicode_plugin_no_encoding.t b/t/unicode_plugin_no_encoding.t index 7b562f8..feed681 100644 --- a/t/unicode_plugin_no_encoding.t +++ b/t/unicode_plugin_no_encoding.t @@ -18,7 +18,12 @@ my $encode_str = "\x{e3}\x{81}\x{82}"; # e38182 is japanese 'あ' my $decode_str = Encode::decode('utf-8' => $encode_str); my $escape_str = uri_escape_utf8($decode_str); -check_parameter(GET "/?myparam=$escape_str"); +# JNAP - I am removing this test case because I think its not correct. I think +# we do not check the server encoding to determine if the parts of a request URL +# both paths and query should be decoded. I think its always safe to assume utf8 +# encoded urlencoded bits. That is my reading of the spec. Please correct me if +# I am wrong +#check_parameter(GET "/?myparam=$escape_str"); check_parameter(POST '/', Content_Type => 'form-data', Content => [ @@ -33,7 +38,6 @@ sub check_parameter { my ( undef, $c ) = ctx_request(shift); my $myparam = $c->req->param('myparam'); - ok !utf8::is_utf8($myparam); unless ( $c->request->method eq 'POST' ) { is $c->res->output => $encode_str; is $myparam => $encode_str; diff --git a/t/unicode_plugin_request_decode.t b/t/unicode_plugin_request_decode.t index c3b7171..42a9a72 100644 --- a/t/unicode_plugin_request_decode.t +++ b/t/unicode_plugin_request_decode.t @@ -1,6 +1,6 @@ use strict; use warnings; -use Test::More tests => 5 * 5; +use Test::More; use utf8; # setup library path @@ -17,42 +17,19 @@ my $encode_str = "\x{e3}\x{81}\x{82}"; # e38182 is japanese 'あ' my $decode_str = Encode::decode('utf-8' => $encode_str); my $escape_str = uri_escape_utf8($decode_str); -check_parameter(GET "/?foo=$escape_str"); -check_parameter(POST '/', ['foo' => $encode_str]); -check_parameter(POST '/', - Content_Type => 'form-data', - Content => [ - 'foo' => [ - "$Bin/unicode_plugin_request_decode.t", - $encode_str, - ] - ], -); - -check_argument(GET "/$escape_str"); -check_capture(GET "/capture/$escape_str"); - -# sending non-utf8 data -my $non_utf8_data = "%C3%E6%CB%AA"; -check_fallback(GET "/?q=${non_utf8_data}"); -check_fallback(GET "/${non_utf8_data}"); -check_fallback(GET "/capture/${non_utf8_data}"); -check_fallback(POST '/', ['foo' => $non_utf8_data]); - sub check_parameter { my ( undef, $c ) = ctx_request(shift); is $c->res->output => '

It works

'; my $foo = $c->req->param('foo'); - ok utf8::is_utf8($foo); - is $foo => $decode_str; + is $foo, $decode_str; my $other_foo = $c->req->method eq 'POST' ? $c->req->upload('foo') ? $c->req->upload('foo')->filename : $c->req->body_parameters->{foo} : $c->req->query_parameters->{foo}; - ok utf8::is_utf8($other_foo); + is $other_foo => $decode_str; } @@ -61,7 +38,6 @@ sub check_argument { is $c->res->output => '

It works

'; my $foo = $c->req->args->[0]; - ok utf8::is_utf8($foo); is $foo => $decode_str; } @@ -70,7 +46,6 @@ sub check_capture { is $c->res->output => '

It works

'; my $foo = $c->req->captures->[0]; - ok utf8::is_utf8($foo); is $foo => $decode_str; } @@ -78,3 +53,27 @@ sub check_fallback { my ( $res, $c ) = ctx_request(shift); ok(!is_server_error($res->code)) or diag('Response code is: ' . $res->code); } + +check_parameter(GET "/?foo=$escape_str"); +check_parameter(POST '/', ['foo' => $encode_str]); +check_parameter(POST '/', + Content_Type => 'form-data', + Content => [ + 'foo' => [ + "$Bin/unicode_plugin_request_decode.t", + $encode_str, + ] + ], +); + +check_argument(GET "/$escape_str"); +check_capture(GET "/capture/$escape_str"); + +# sending non-utf8 data +my $non_utf8_data = "%C3%E6%CB%AA"; +check_fallback(GET "/?q=${non_utf8_data}"); +check_fallback(GET "/${non_utf8_data}"); +check_fallback(GET "/capture/${non_utf8_data}"); +check_fallback(POST '/', ['foo' => $non_utf8_data]); + +done_testing; diff --git a/t/utf8.txt b/t/utf8.txt new file mode 100644 index 0000000..484d2cb --- /dev/null +++ b/t/utf8.txt @@ -0,0 +1 @@ +

This is stream_body_fh action ♥

diff --git a/t/utf_incoming.t b/t/utf_incoming.t new file mode 100644 index 0000000..3b8e965 --- /dev/null +++ b/t/utf_incoming.t @@ -0,0 +1,378 @@ +use utf8; +use warnings; +use strict; +use Test::More; +use HTTP::Request::Common; +use Encode 2.21 'decode_utf8', 'encode_utf8'; +use File::Spec; +use JSON::MaybeXS; + +# Test cases for incoming utf8 + +{ + package MyApp::Controller::Root; + $INC{'MyApp/Controller/Root.pm'} = __FILE__; + + use base 'Catalyst::Controller'; + + sub heart :Path('♥') { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + $c->response->body("

This is path-heart action ♥

"); + # We let the content length middleware find the length... + } + + sub hat :Path('^') { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + $c->response->body("

This is path-hat action ^

"); + } + + sub uri_for :Path('uri_for') { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + $c->response->body("${\$c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'})}"); + } + + sub heart_with_arg :Path('a♥') Args(1) { + my ($self, $c, $arg) = @_; + $c->response->content_type('text/html'); + $c->response->body("

This is path-heart-arg action $arg

"); + Test::More::is $c->req->args->[0], '♥'; + } + + sub base :Chained('/') CaptureArgs(0) { } + sub link :Chained('base') PathPart('♥') Args(0) { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + $c->response->body("

This is base-link action ♥

"); + } + sub arg :Chained('base') PathPart('♥') Args(1) { + my ($self, $c, $arg) = @_; + $c->response->content_type('text/html'); + $c->response->body("

This is base-link action ♥ $arg

"); + } + sub capture :Chained('base') PathPart('♥') CaptureArgs(1) { + my ($self, $c, $arg) = @_; + $c->stash(capture=>$arg); + } + sub argend :Chained('capture') PathPart('♥') Args(1) { + my ($self, $c, $arg) = @_; + $c->response->content_type('text/html'); + + Test::More::is $c->req->args->[0], '♥'; + Test::More::is $c->req->captures->[0], '♥'; + + $c->response->body("

This is base-link action ♥ ${\$c->req->args->[0]}

"); + + # Test to make sure redirect can now take an object (sorry don't have a better place for it + # but wanted test coverage. + my $location = $c->res->redirect( $c->uri_for($c->controller('Root')->action_for('uri_for')) ); + Test::More::ok !ref $location; + } + + sub stream_write :Local { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + $c->response->write("

This is stream_write action ♥

"); + } + + sub stream_write_fh :Local { + my ($self, $c) = @_; + $c->response->content_type('text/html'); + + my $writer = $c->res->write_fh; + $writer->write_encoded('

This is stream_write_fh action ♥

'); + $writer->close; + } + + # Stream a file with utf8 chars directly, you don't need to decode + sub stream_body_fh :Local { + my ($self, $c) = @_; + my $path = File::Spec->catfile('t', 'utf8.txt'); + open(my $fh, '<', $path) || die "trouble: $!"; + $c->response->content_type('text/html'); + $c->response->body($fh); + } + + # If you pull the file contents into a var, NOW you need to specify the + # IO encoding on the FH. Ultimately Plack at the end wants bytes... + sub stream_body_fh2 :Local { + my ($self, $c) = @_; + my $path = File::Spec->catfile('t', 'utf8.txt'); + open(my $fh, '<:encoding(UTF-8)', $path) || die "trouble: $!"; + my $contents = do { local $/; <$fh> }; + + $c->response->content_type('text/html'); + $c->response->body($contents); + } + + sub file_upload :POST Consumes(Multipart) Local { + my ($self, $c) = @_; + Test::More::is $c->req->body_parameters->{'♥'}, '♥♥'; + Test::More::ok my $upload = $c->req->uploads->{file}; + Test::More::is $upload->charset, 'UTF-8'; + + my $text = $upload->slurp; + Test::More::is Encode::decode_utf8($text), "

This is stream_body_fh action ♥

\n"; + + my $decoded_text = $upload->decoded_slurp; + Test::More::is $decoded_text, "

This is stream_body_fh action ♥

\n"; + + Test::More::is $upload->filename, '♥ttachment.txt'; + Test::More::is $upload->raw_basename, '♥ttachment.txt'; + + $c->response->content_type('text/html'); + $c->response->body($decoded_text); + } + + sub json :POST Consumes(JSON) Local { + my ($self, $c) = @_; + my $post = $c->req->body_data; + + Test::More::is $post->{'♥'}, '♥♥'; + $c->response->content_type('application/json'); + + # Encode JSON also encodes to a UTF-8 encoded, binary string. This is why we don't + # have application/json as one of the things we match, otherwise we get double + # encoding. + $c->response->body(JSON::MaybeXS::encode_json($post)); + } + + ## If someone clears encoding, they can do as they wish + sub manual_1 :Local { + my ($self, $c) = @_; + $c->clear_encoding; + $c->res->content_type('text/plain'); + $c->res->content_type_charset('UTF-8'); + $c->response->body( Encode::encode_utf8("manual_1 ♥")); + } + + ## If you do like gzip, well handle that yourself! Basically if you do some sort + ## of content encoding like gzip, you must do on top of the encoding. We will fix + ## the encoding plugins (Catalyst::Plugin::Compress) to do this properly for you. + # + sub gzipped :Local { + require Compress::Zlib; + my ($self, $c) = @_; + $c->res->content_type('text/plain'); + $c->res->content_type_charset('UTF-8'); + $c->res->content_encoding('gzip'); + $c->response->body(Compress::Zlib::memGzip(Encode::encode_utf8("manual_1 ♥"))); + } + + package MyApp; + use Catalyst; + + # Default encoding is now UTF-8 + # MyApp->config(encoding=>'UTF-8'); + + Test::More::ok(MyApp->setup, 'setup app'); +} + +ok my $psgi = MyApp->psgi_app, 'build psgi app'; + +use Catalyst::Test 'MyApp'; + +{ + my $res = request "/root/♥"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is path-heart action ♥

', 'correct body'; + is $res->content_length, 36, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/a♥/♥"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is path-heart-arg action ♥

', 'correct body'; + is $res->content_length, 40, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/^"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is path-hat action ^

', 'correct body'; + is $res->content_length, 32, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/base/♥"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is base-link action ♥

', 'correct body'; + is $res->content_length, 35, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my ($res, $c) = ctx_request POST "/base/♥?♥=♥&♥=♥♥", [a=>1, b=>'', '♥'=>'♥', '♥'=>'♥♥']; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is base-link action ♥

', 'correct body'; + is $res->content_length, 35, 'correct length'; + is $c->req->parameters->{'♥'}[0], '♥'; + is $c->req->query_parameters->{'♥'}[0], '♥'; + is $c->req->body_parameters->{'♥'}[0], '♥'; + is $c->req->parameters->{'♥'}[0], '♥'; + is $c->req->parameters->{a}, 1; + is $c->req->body_parameters->{a}, 1; + is $res->content_charset, 'UTF-8'; +} + +{ + my ($res, $c) = ctx_request GET "/base/♥?♥♥♥"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is base-link action ♥

', 'correct body'; + is $res->content_length, 35, 'correct length'; + is $c->req->query_keywords, '♥♥♥'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/base/♥/♥"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is base-link action ♥ ♥

', 'correct body'; + is $res->content_length, 39, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/base/♥/♥/♥/♥"; + + is decode_utf8($res->content), '

This is base-link action ♥ ♥

', 'correct body'; + is $res->content_length, 39, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my ($res, $c) = ctx_request POST "/base/♥/♥/♥/♥?♥=♥♥", [a=>1, b=>'2', '♥'=>'♥♥']; + + ## Make sure that the urls we generate work the same + my $uri_for1 = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'}); + my $uri_for2 = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥', '♥'], {'♥'=>'♥♥'}); + my $uri = $c->req->uri; + + is "$uri_for1", "$uri_for2"; + is "$uri", "$uri_for1"; + + { + my ($res, $c) = ctx_request POST "$uri_for1", [a=>1, b=>'2', '♥'=>'♥♥']; + is $c->req->query_parameters->{'♥'}, '♥♥'; + is $c->req->body_parameters->{'♥'}, '♥♥'; + is $c->req->parameters->{'♥'}[0], '♥♥'; #combined with query and body + is $res->content_charset, 'UTF-8'; + } +} + +{ + my ($res, $c) = ctx_request "/root/uri_for"; + my $url = $c->uri_for($c->controller('Root')->action_for('argend'), ['♥'], '♥', {'♥'=>'♥♥'}); + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "$url", 'correct body'; #should do nothing + is $res->content, "$url", 'correct body'; + is $res->content_length, 90, 'correct length'; + is $res->content_charset, 'UTF-8'; + + { + my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), '♥'); + is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url"; + } + + { + my $url = $c->uri_for($c->controller->action_for('heart_with_arg'), ['♥']); + is "$url", 'http://localhost/root/a%E2%99%A5/%E2%99%A5', "correct $url"; + } +} + +{ + my $res = request "/root/stream_write"; + + is $res->code, 200, 'OK GET /root/stream_write'; + is decode_utf8($res->content), '

This is stream_write action ♥

', 'correct body'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/stream_body_fh"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n", 'correct body'; + is $res->content_charset, 'UTF-8'; + # Not sure why there is a trailing newline above... its not in catalyst code I can see. Not sure + # if is a problem or just an artifact of the why the test stuff works - JNAP +} + +{ + my $res = request "/root/stream_write_fh"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), '

This is stream_write_fh action ♥

', 'correct body'; + #is $res->content_length, 41, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + my $res = request "/root/stream_body_fh2"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n", 'correct body'; + is $res->content_length, 41, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +{ + ok my $path = File::Spec->catfile('t', 'utf8.txt'); + ok my $req = POST '/root/file_upload', + Content_Type => 'form-data', + Content => [encode_utf8('♥')=>encode_utf8('♥♥'), file=>["$path", encode_utf8('♥ttachment.txt'), 'Content-Type' =>'text/html; charset=UTF-8', ]]; + + ok my $res = request $req; + is decode_utf8($res->content), "

This is stream_body_fh action ♥

\n"; +} + +{ + ok my $req = POST '/root/json', + Content_Type => 'application/json', + Content => encode_json +{'♥'=>'♥♥'}; # Note: JSON does the UTF* encoding for us + + ok my $res = request $req; + + ## decode_json expect the binary utf8 string and does the decoded bit for us. + is_deeply decode_json(($res->content)), +{'♥'=>'♥♥'}; +} + +{ + my $res = request "/root/manual_1"; + + is $res->code, 200, 'OK'; + is decode_utf8($res->content), "manual_1 ♥", 'correct body'; + is $res->content_length, 12, 'correct length'; + is $res->content_charset, 'UTF-8'; +} + +SKIP: { + eval { require Compress::Zlib; 1} || do { + skip "Compress::Zlib needed to test gzip encoding", 5 }; + + my $res = request "/root/gzipped"; + ok my $raw_content = $res->content; + ok my $content = Compress::Zlib::memGunzip($raw_content), 'no gunzip error'; + + is $res->code, 200, 'OK'; + is decode_utf8($content), "manual_1 ♥", 'correct body'; + is $res->content_charset, 'UTF-8'; +} + +## should we use binmode on filehandles to force the encoding...? +## Not sure what else to do with multipart here, if docs are enough... + +done_testing;