X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=blobdiff_plain;f=lib%2FCatalyst.pm;h=3ef4bc867d75587a8a743b24da510e798bc42e22;hp=23ac3ad7df1a808d154e0b019ad0ed47df306c45;hb=c017b11bf96db335cf4efce1808d56d13651b10a;hpb=460bb45ba7f4694c2cbb4f1992f14cd9a34b77f7 diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm index 23ac3ad..3ef4bc8 100644 --- a/lib/Catalyst.pm +++ b/lib/Catalyst.pm @@ -50,7 +50,7 @@ use Plack::Middleware::RemoveRedundantBody; use Catalyst::Middleware::Stash; use Plack::Util; use Class::Load 'load_class'; -use Encode 2.21 (); +use Encode 2.21 'decode_utf8', 'encode_utf8'; BEGIN { require 5.008003; } @@ -86,8 +86,10 @@ has response => ( lazy => 1, ); sub _build_response_constructor_args { - my $self = shift; - { _log => $self->log }; + return +{ + _log => $_[0]->log, + encoding => $_[0]->encoding, + }; } has namespace => (is => 'rw'); @@ -127,7 +129,8 @@ __PACKAGE__->stats_class('Catalyst::Stats'); __PACKAGE__->_encode_check(Encode::FB_CROAK | Encode::LEAVE_SRC); # Remember to update this in Catalyst::Runtime as well! -our $VERSION = '5.90080_001'; +our $VERSION = '5.90079_004'; +$VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases sub import { my ( $class, @arguments ) = @_; @@ -495,6 +498,18 @@ Catalyst). # stash is automatically passed to the view for use in a template $c->forward( 'MyApp::View::TT' ); +The stash hash is currently stored in the PSGI C<$env> and is managed by +L. Since it's part of the C<$env> items in +the stash can be accessed in sub applications mounted under your main +L application. For example if you delegate the response of an +action to another L application, that sub application will have +access to all the stash keys of the main one, and if can of course add +more keys of its own. However those new keys will not 'bubble' back up +to the main application. + +For more information the best thing to do is to review the test case: +t/middleware-stash.t in the distribution /t directory. + =cut sub stash { @@ -998,12 +1013,31 @@ And later: Your log class should implement the methods described in L. +=head2 has_encoding + +Returned True if there's a valid encoding + +=head2 clear_encoding + +Clears the encoding for the current context + =head2 encoding Sets or gets the application encoding. =cut +sub has_encoding { shift->encoding ? 1:0 } + +sub clear_encoding { + my $c = shift; + if(blessed $c) { + $c->encoding(undef); + } else { + $c->debug->error("You can't clear encoding on the application"); + } +} + sub encoding { my $c = shift; my $encoding; @@ -1320,6 +1354,8 @@ sub setup_finalize { =head2 $c->uri_for( $action, \@captures?, @args?, \%query_values? ) +=head2 $c->uri_for( $action, [@captures, @args], \%query_values? ) + Constructs an absolute L object based on the application root, the provided path, and the additional arguments and query parameters provided. When used as a string, provides a textual URI. If you need more flexibility @@ -1359,6 +1395,10 @@ path, use C<< $c->uri_for_action >> instead. # Path to a static resource $c->uri_for('/static/images/logo.png'); +In general the scheme of the generated URI object will follow the incoming request +however if your targeted action or action chain has the Scheme attribute it will +use that instead. + =cut sub uri_for { @@ -1376,30 +1416,38 @@ sub uri_for { ( scalar @args && ref $args[$#args] eq 'HASH' ? pop @args : {} ); carp "uri_for called with undef argument" if grep { ! defined $_ } @args; + + my @encoded_args = (); foreach my $arg (@args) { - utf8::encode($arg) if utf8::is_utf8($arg); - $arg =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + if(ref($arg)||'' eq 'ARRAY') { + push @encoded_args, [map { + my $encoded = encode_utf8 $_; + $encoded =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $encoded; + } @$arg]; + } else { + push @encoded_args, do { + my $encoded = encode_utf8 $arg; + $encoded =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $encoded; + } + } } + my $target_action = $path->$_isa('Catalyst::Action') ? $path : undef; if ( $path->$_isa('Catalyst::Action') ) { # action object - s|/|%2F|g for @args; + s|/|%2F|g for @encoded_args; my $captures = [ map { s|/|%2F|g; $_; } - ( scalar @args && ref $args[0] eq 'ARRAY' - ? @{ shift(@args) } + ( scalar @encoded_args && ref $encoded_args[0] eq 'ARRAY' + ? @{ shift(@encoded_args) } : ()) ]; - foreach my $capture (@$captures) { - utf8::encode($capture) if utf8::is_utf8($capture); - $capture =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; - } - my $action = $path; # ->uri_for( $action, \@captures_and_args, \%query_values? ) - if( !@args && $action->number_of_args ) { + if( !@encoded_args && $action->number_of_args ) { my $expanded_action = $c->dispatcher->expand_action( $action ); - my $num_captures = $expanded_action->number_of_captures; - unshift @args, splice @$captures, $num_captures; + unshift @encoded_args, splice @$captures, $num_captures; } $path = $c->dispatcher->uri_for_action($action, $captures); @@ -1411,25 +1459,37 @@ sub uri_for { $path = '/' if $path eq ''; } - unshift(@args, $path); + unshift(@encoded_args, $path); unless (defined $path && $path =~ s!^/!!) { # in-place strip my $namespace = $c->namespace; if (defined $path) { # cheesy hack to handle path '../foo' - $namespace =~ s{(?:^|/)[^/]+$}{} while $args[0] =~ s{^\.\./}{}; + $namespace =~ s{(?:^|/)[^/]+$}{} while $encoded_args[0] =~ s{^\.\./}{}; } - unshift(@args, $namespace || ''); + unshift(@encoded_args, $namespace || ''); } # join args with '/', or a blank string - my $args = join('/', grep { defined($_) } @args); + my $args = join('/', grep { defined($_) } @encoded_args); $args =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE $args =~ s!^/+!!; my ($base, $class) = ('/', 'URI::_generic'); if(blessed($c)) { $base = $c->req->base; - $class = ref($base); + if($target_action) { + $target_action = $c->dispatcher->expand_action($target_action); + if(my $s = $target_action->scheme) { + $s = lc($s); + $class = "URI::$s"; + $base->scheme($s); + } else { + $class = ref($base); + } + } else { + $class = ref($base); + } + $base =~ s{(?{$_}; - s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/go; + #s/([;\/?:@&=+,\$\[\]%])/$URI::Escape::escapes{$1}/go; ## Commented out because seems to lead to double encoding - JNAP s/ /+/g; my $key = $_; $val = '' unless defined $val; (map { my $param = "$_"; - utf8::encode( $param ) if utf8::is_utf8($param); + $param = encode_utf8($param); # using the URI::Escape pattern here so utf8 chars survive $param =~ s/([^A-Za-z0-9\-_.!~*'() ])/$URI::Escape::escapes{$1}/go; $param =~ s/ /+/g; + + $key = encode_utf8($key); + # using the URI::Escape pattern here so utf8 chars survive + $key =~ s/([^A-Za-z0-9\-_.!~*'() ])/$URI::Escape::escapes{$1}/go; + $key =~ s/ /+/g; + "${key}=$param"; } ( ref $val eq 'ARRAY' ? @$val : $val )); } @keys); } @@ -1775,7 +1841,15 @@ sub execute { if ( my $error = $@ ) { #rethow if this can be handled by middleware - if(blessed $error && ($error->can('as_psgi') || $error->can('code'))) { + if( + blessed $error && ( + $error->can('as_psgi') || + ( + $error->can('code') && + $error->code =~m/^[1-5][0-9][0-9]$/ + ) + ) + ) { foreach my $err (@{$c->error}) { $c->log->error($err); } @@ -1994,6 +2068,8 @@ sub finalize_headers { $c->finalize_cookies; + # This currently is a NOOP but I don't want to remove it since I guess people + # might have Response subclasses that use it for something... (JNAP) $c->response->finalize_headers(); # Done @@ -2002,42 +2078,49 @@ sub finalize_headers { =head2 $c->finalize_encoding -Make sure your headers and body are encoded properly IF you set an encoding. +Make sure your body is encoded properly IF you set an encoding. By +default the encoding is UTF-8 but you can disable it by explictly setting the +encoding configuration value to undef. + +We can only encode when the body is a scalar. Methods for encoding via the +streaming interfaces (such as C and C on L +are available). + See L. =cut sub finalize_encoding { my $c = shift; - - my $body = $c->response->body; - - return unless defined($body); - - my $enc = $c->encoding; - - return unless $enc; - - my ($ct, $ct_enc) = $c->response->content_type; - - # Only touch 'text-like' contents - return unless $c->response->content_type =~ /^text|xml$|javascript$/; - - if ($ct_enc && $ct_enc =~ /charset=([^;]*)/) { - if (uc($1) ne uc($enc->mime_name)) { - $c->log->debug("Unicode::Encoding is set to encode in '" . - $enc->mime_name . - "', content type is '$1', not encoding "); - return; - } - } else { - $c->res->content_type($c->res->content_type . "; charset=" . $enc->mime_name); + my $res = $c->res || return; + + # Warn if the set charset is different from the one you put into encoding. We need + # to do this early since encodable_response is false for this condition and we need + # to match the debug output for backcompat (there's a test for this...) -JNAP + if( + $res->content_type_charset and $c->encoding and + (uc($c->encoding->mime_name) ne uc($res->content_type_charset)) + ) { + my $ct = lc($res->content_type_charset); + $c->log->debug("Catalyst encoding config is set to encode in '" . + $c->encoding->mime_name . + "', content type is '$ct', not encoding "); } - # Oh my, I wonder what filehandle responses and streams do... - jnap. - # Encode expects plain scalars (IV, NV or PV) and segfaults on ref's - $c->response->body( $c->encoding->encode( $body, $c->_encode_check ) ) - if ref(\$body) eq 'SCALAR'; + if( + ($res->encodable_response) and + (defined($res->body)) and + (ref(\$res->body) eq 'SCALAR') + ) { + $c->res->body( $c->encoding->encode( $c->res->body, $c->_encode_check ) ); + + # Set the charset if necessary. This might be a bit bonkers since encodable response + # is false when the set charset is not the same as the encoding mimetype (maybe + # confusing action at a distance here.. + # Don't try to set the charset if one already exists + $c->res->content_type($c->res->content_type . "; charset=" . $c->encoding->mime_name) + unless($c->res->content_type_charset); + } } =head2 $c->finalize_output @@ -2101,7 +2184,15 @@ sub handle_request { $status = $c->finalize; } catch { #rethow if this can be handled by middleware - if(blessed $_ && ($_->can('as_psgi') || $_->can('code'))) { + if( + blessed($_) && ( + $_->can('as_psgi') || + ( + $_->can('code') && + $_->code =~m/^[1-5][0-9][0-9]$/ + ) + ) + ) { $_->can('rethrow') ? $_->rethrow : croak $_; } chomp(my $error = $_); @@ -2251,7 +2342,7 @@ Prepares body parameters. sub prepare_body_parameters { my $c = shift; - $c->engine->prepare_body_parameters( $c, @_ ); + $c->request->prepare_body_parameters( $c, @_ ); } =head2 $c->prepare_connection @@ -2345,6 +2436,10 @@ sub log_request { $method ||= ''; $path = '/' unless length $path; $address ||= ''; + + $path =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; + $path = decode_utf8($path); + $c->log->debug(qq/"$method" request for "$path" from "$address"/); $c->log_request_headers($request->headers); @@ -2530,37 +2625,6 @@ Prepares uploads. sub prepare_uploads { my $c = shift; $c->engine->prepare_uploads( $c, @_ ); - - my $enc = $c->encoding; - return unless $enc; - - # Uggg we hook prepare uploads to do the encoding crap on post and query - # parameters! Sorry -jnap - for my $key (qw/ parameters query_parameters body_parameters /) { - for my $value ( values %{ $c->request->{$key} } ) { - # N.B. Check if already a character string and if so do not try to double decode. - # http://www.mail-archive.com/catalyst@lists.scsys.co.uk/msg02350.html - # this avoids exception if we have already decoded content, and is _not_ the - # same as not encoding on output which is bad news (as it does the wrong thing - # for latin1 chars for example).. - $value = $c->_handle_unicode_decoding($value); - } - } - for my $value ( values %{ $c->request->uploads } ) { - # skip if it fails for uploads, as we don't usually want uploads touched - # in any way - for my $inner_value ( ref($value) eq 'ARRAY' ? @{$value} : $value ) { - $inner_value->{filename} = try { - $enc->decode( $inner_value->{filename}, $c->_encode_check ) - } catch { - $c->handle_unicode_encoding_exception({ - param_value => $inner_value->{filename}, - error_msg => $_, - encoding_step => 'uploads', - }); - }; - } - } } =head2 $c->prepare_write @@ -3044,8 +3108,14 @@ Sets up the input/output encoding. See L sub setup_encoding { my $c = shift; - my $enc = delete $c->config->{encoding}; - $c->encoding( $enc ) if defined $enc; + if( exists($c->config->{encoding}) && !defined($c->config->{encoding}) ) { + # Ok, so the user has explicitly said "I don't want encoding..." + return; + } else { + my $enc = defined($c->config->{encoding}) ? + delete $c->config->{encoding} : 'UTF-8'; # not sure why we delete it... (JNAP) + $c->encoding($enc); + } } =head2 handle_unicode_encoding_exception @@ -3083,8 +3153,13 @@ sub _handle_unicode_decoding { return $value; } elsif ( ref $value eq 'HASH' ) { - foreach ( values %$value ) { - $_ = $self->_handle_unicode_decoding($_); + foreach (keys %$value) { + my $encoded_key = $self->_handle_param_unicode_decoding($_); + $value->{$encoded_key} = $self->_handle_unicode_decoding($value->{$_}); + + # If the key was encoded we now have two (the original and current so + # delete the original. + delete $value->{$_} if $_ ne $encoded_key; } return $value; } @@ -3099,9 +3174,7 @@ sub _handle_param_unicode_decoding { my $enc = $self->encoding; return try { - Encode::is_utf8( $value ) ? - $value - : $enc->decode( $value, $self->_encode_check ); + $enc->decode( $value, $self->_encode_check ); } catch { $self->handle_unicode_encoding_exception({ @@ -3598,6 +3671,9 @@ C - See L. C - See L +This now defaults to 'UTF-8'. You my turn it off by setting this configuration +value to undef. + =item * C @@ -3943,6 +4019,36 @@ Please see L for more on middleware. On request, decodes all params from encoding into a sequence of logical characters. On response, encodes body into encoding. +By default encoding is now 'UTF-8'. You may turn it off by setting +the encoding configuration to undef. + +Encoding is automatically applied when the content-type is set to +a type that can be encoded. Currently we encode when the content type +matches the following regular expression: + + $content_type =~ /^text|xml$|javascript$/ + +Encoding is set on the application, but it is copied to the response object +so you can override encoding rules per request (See L +for more information). + +Be default we don't automatically encode 'application/json' since the most +popular JSON encoders (such as L which is the library that +L can make use of) will do the UTF8 encoding and decoding automatically. +Having it on in Catalyst could result in double encoding. + +If you are producing JSON response in an unconventional manner (such +as via a template or manual strings) you should perform the UTF8 encoding +manually as well such as to conform to the JSON specification. + +NOTE: We also examine the value of $c->response->content_encoding. If +you set this (like for example 'gzip', and manually gzipping the body) +we assume that you have done all the neccessary encoding yourself, since +we cannot encode the gzipped contents. If you use a plugin like +L we will be updating that plugin to work +with the new UTF8 encoding code, or you can use L +or (probably best) do your compression on a front end proxy. + =head2 Methods =over 4