X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=blobdiff_plain;f=lib%2FCatalyst%2FEngine.pm;h=fdd3df9046f4fd849e12fabc04e78ecafb402128;hp=67993f199e8988c9f2c308bbf336ed4bb88ecb22;hb=fbd5d4fa65e4e10dd160ae5637e4b94a5b0d54f4;hpb=9560b708797123875e4743b5dac70c69fbc4a3ef diff --git a/lib/Catalyst/Engine.pm b/lib/Catalyst/Engine.pm index 67993f1..fdd3df9 100644 --- a/lib/Catalyst/Engine.pm +++ b/lib/Catalyst/Engine.pm @@ -7,41 +7,37 @@ use CGI::Simple::Cookie; use Data::Dump qw/dump/; use Errno 'EWOULDBLOCK'; use HTML::Entities; -use HTTP::Body; use HTTP::Headers; -use URI::QueryParam; -use Moose::Util::TypeConstraints; use Plack::Loader; -use Plack::Middleware::Conditional; -use Plack::Middleware::ReverseProxy; - +use Catalyst::EngineLoader; +use Encode 2.21 'decode_utf8', 'encode', 'decode'; +use Plack::Request::Upload; +use Hash::MultiValue; use namespace::clean -except => 'meta'; - -has env => (is => 'ro', writer => '_set_env', clearer => '_clear_env'); - -# input position and length -has read_length => (is => 'rw'); -has read_position => (is => 'rw'); - -has _prepared_write => (is => 'rw'); - -has _response_cb => ( - is => 'ro', - isa => 'CodeRef', - writer => '_set_response_cb', - clearer => '_clear_response_cb', -); - -has _writer => ( - is => 'ro', - isa => duck_type([qw(write close)]), - writer => '_set_writer', - clearer => '_clear_writer', -); +use utf8; # Amount of data to read from input on each pass our $CHUNKSIZE = 64 * 1024; +# XXX - this is only here for compat, do not use! +has env => ( is => 'rw', writer => '_set_env' , weak_ref=>1); +my $WARN_ABOUT_ENV = 0; +around env => sub { + my ($orig, $self, @args) = @_; + if(@args) { + warn "env as a writer is deprecated, you probably need to upgrade Catalyst::Engine::PSGI" + unless $WARN_ABOUT_ENV++; + return $self->_set_env(@args); + } + return $self->$orig; +}; + +# XXX - Only here for Engine::PSGI compat +sub prepare_connection { + my ($self, $ctx) = @_; + $ctx->request->prepare_connection; +} + =head1 NAME Catalyst::Engine - The Catalyst Engine @@ -57,30 +53,123 @@ See L. =head2 $self->finalize_body($c) -Finalize body. Prints the response output. +Finalize body. Prints the response output as blocking stream if it looks like +a filehandle, otherwise write it out all in one go. If there is no body in +the response, we assume you are handling it 'manually', such as for nonblocking +style or asynchronous streaming responses. You do this by calling L +several times (which sends HTTP headers if needed) or you close over +C<< $response->write_fh >>. + +See L and L for more. =cut sub finalize_body { my ( $self, $c ) = @_; - my $body = $c->response->body; - no warnings 'uninitialized'; - if ( blessed($body) && $body->can('read') or ref($body) eq 'GLOB' ) { - my $got; - do { - $got = read $body, my ($buffer), $CHUNKSIZE; - $got = 0 unless $self->write( $c, $buffer ); - } while $got > 0; - - close $body; - } - else { - $self->write( $c, $body ); - } + my $res = $c->response; # We use this all over + + ## If we've asked for the write 'filehandle' that means the application is + ## doing something custom and is expected to close the response + return if $res->_has_write_fh; + + my $body = $res->body; # save some typing + if($res->_has_response_cb) { + ## we have not called the response callback yet, so we are safe to send + ## the whole body to PSGI + + my @headers; + $res->headers->scan(sub { push @headers, @_ }); + + # We need to figure out what kind of body we have and normalize it to something + # PSGI can deal with + if(defined $body) { + # Handle objects first + if(blessed($body)) { + if($body->can('getline')) { + # Body is an IO handle that meets the PSGI spec. Nothing to normalize + } elsif($body->can('read')) { + + # In the past, Catalyst only looked for ->read not ->getline. It is very possible + # that one might have an object that respected read but did not have getline. + # As a result, we need to handle this case for backcompat. + + # We will just do the old loop for now. In a future version of Catalyst this support + # will be removed and one will have to rewrite their custom object or use + # Plack::Middleware::AdaptFilehandleRead. In anycase support for this is officially + # deprecated and described as such as of 5.90060 + + my $got; + do { + $got = read $body, my ($buffer), $CHUNKSIZE; + $got = 0 unless $self->write($c, $buffer ); + } while $got > 0; + + close $body; + return; + } else { + # Looks like for backcompat reasons we need to be able to deal + # with stringyfiable objects. + $body = ["$body"]; + } + } elsif(ref $body) { + if( (ref($body) eq 'GLOB') or (ref($body) eq 'ARRAY')) { + # Again, PSGI can just accept this, no transform needed. We don't officially + # document the body as arrayref at this time (and there's not specific test + # cases. we support it because it simplifies some plack compatibility logic + # and we might make it official at some point. + } else { + $c->log->error("${\ref($body)} is not a valid value for Response->body"); + return; + } + } else { + # Body is defined and not an object or reference. We assume a simple value + # and wrap it in an array for PSGI + $body = [$body]; + } + } else { + # There's no body... + $body = []; + } + $res->_response_cb->([ $res->status, \@headers, $body]); + $res->_clear_response_cb; + + } else { + ## Now, if there's no response callback anymore, that means someone has + ## called ->write in order to stream 'some stuff along the way'. I think + ## for backcompat we still need to handle a ->body. I guess I could see + ## someone calling ->write to presend some stuff, and then doing the rest + ## via ->body, like in a template. + + ## We'll just use the old, existing code for this (or most of it) + + if(my $body = $res->body) { + + if ( blessed($body) && $body->can('read') or ref($body) eq 'GLOB' ) { + + ## In this case we have no choice and will fall back on the old + ## manual streaming stuff. Not optimal. This is deprecated as of 5.900560+ + + my $got; + do { + $got = read $body, my ($buffer), $CHUNKSIZE; + $got = 0 unless $self->write($c, $buffer ); + } while $got > 0; + + close $body; + } + else { + + # Case where body was set after calling ->write. We'd prefer not to + # support this, but I can see some use cases with the way most of the + # views work. Since body has already been encoded, we need to do + # an 'unencoded_write' here. + $self->unencoded_write( $c, $body ); + } + } - $self->_writer->close; - $self->_clear_writer; - $self->_clear_env; + $res->_writer->close; + $res->_clear_writer; + } return; } @@ -115,6 +204,11 @@ sub finalize_cookies { -httponly => $val->{httponly} || 0, ) ); + if (!defined $cookie) { + $c->log->warn("undef passed in '$name' cookie value - not setting cookie") + if $c->debug; + next; + } push @cookies, $cookie->as_string; } @@ -155,6 +249,14 @@ sub finalize_error { $c->res->content_type('text/html; charset=utf-8'); my $name = ref($c)->config->{name} || join(' ', split('::', ref $c)); + + # Prevent Catalyst::Plugin::Unicode::Encoding from running. + # This is a little nasty, but it's the best way to be clean whether or + # not the user has an encoding plugin. + + if ($c->can('encoding')) { + $c->{encoding} = ''; + } my ( $title, $error, $infos ); if ( $c->debug ) { @@ -171,7 +273,6 @@ sub finalize_error { $name = "

$name

"; # Don't show context in the dump - $c->req->_clear_context; $c->res->_clear_context; # Don't show body parser in the dump @@ -200,6 +301,7 @@ sub finalize_error { (pt) Por favor volte mais tarde (ru) Попробуйте еще раз позже (ua) Спробуйте ще раз пізніше +(it) Per favore riprova più tardi $name = ''; @@ -303,39 +405,29 @@ sub finalize_error { - # Trick IE. Old versions of IE would display their own error page instead # of ours if we'd give it less than 512 bytes. $c->res->{body} .= ( ' ' x 512 ); + $c->res->{body} = Encode::encode("UTF-8", $c->res->{body}); + # Return 500 $c->res->status(500); } =head2 $self->finalize_headers($c) -Abstract method, allows engines to write headers to response +Allows engines to write headers to response =cut sub finalize_headers { my ($self, $ctx) = @_; - my @headers; - $ctx->response->headers->scan(sub { push @headers, @_ }); - - $self->_set_writer($self->_response_cb->([ $ctx->response->status, \@headers ])); - $self->_clear_response_cb; - + $ctx->finalize_headers unless $ctx->response->finalized_headers; return; } -=head2 $self->finalize_read($c) - -=cut - -sub finalize_read { } - =head2 $self->finalize_uploads($c) Clean up after uploads, deleting temp files. @@ -345,6 +437,8 @@ Clean up after uploads, deleting temp files. sub finalize_uploads { my ( $self, $c ) = @_; + # N.B. This code is theoretically entirely unneeded due to ->cleanup(1) + # on the HTTP::Body object. my $request = $c->request; foreach my $key (keys %{ $request->uploads }) { my $upload = $request->uploads->{$key}; @@ -363,33 +457,7 @@ sets up the L object body using L sub prepare_body { my ( $self, $c ) = @_; - my $appclass = ref($c) || $c; - if ( my $length = $self->read_length ) { - my $request = $c->request; - unless ( $request->_body ) { - my $type = $request->header('Content-Type'); - $request->_body(HTTP::Body->new( $type, $length )); - $request->_body->tmpdir( $appclass->config->{uploadtmp} ) - if exists $appclass->config->{uploadtmp}; - } - - # Check for definedness as you could read '0' - while ( defined ( my $buffer = $self->read($c) ) ) { - $c->prepare_body_chunk($buffer); - } - - # paranoia against wrong Content-Length header - my $remaining = $length - $self->read_position; - if ( $remaining > 0 ) { - $self->finalize_read($c); - Catalyst::Exception->throw( - "Wrong Content-Length value: $length" ); - } - } - else { - # Defined but will cause all body code to be skipped - $c->request->_body(0); - } + $c->request->prepare_body; } =head2 $self->prepare_body_chunk($c) @@ -398,10 +466,11 @@ Add a chunk to the request body. =cut +# XXX - Can this be deleted? sub prepare_body_chunk { my ( $self, $c, $chunk ) = @_; - $c->request->_body->add($chunk); + $c->request->prepare_body_chunk($chunk); } =head2 $self->prepare_body_parameters($c) @@ -413,94 +482,22 @@ Sets up parameters from body. sub prepare_body_parameters { my ( $self, $c ) = @_; - return unless $c->request->_body; - - $c->request->body_parameters( $c->request->_body->param ); -} - -=head2 $self->prepare_connection($c) - -Abstract method implemented in engines. - -=cut - -sub prepare_connection { - my ($self, $ctx) = @_; - - my $env = $self->env; - my $request = $ctx->request; - - $request->address( $env->{REMOTE_ADDR} ); - $request->hostname( $env->{REMOTE_HOST} ) - if exists $env->{REMOTE_HOST}; - $request->protocol( $env->{SERVER_PROTOCOL} ); - $request->remote_user( $env->{REMOTE_USER} ); - $request->method( $env->{REQUEST_METHOD} ); - $request->secure( $env->{'psgi.url_scheme'} eq 'https' ? 1 : 0 ); - - return; -} - -=head2 $self->prepare_cookies($c) - -Parse cookies from header. Sets a L object. - -=cut - -sub prepare_cookies { - my ( $self, $c ) = @_; - - if ( my $header = $c->request->header('Cookie') ) { - $c->req->cookies( { CGI::Simple::Cookie->parse($header) } ); - } -} - -=head2 $self->prepare_headers($c) - -=cut - -sub prepare_headers { - my ($self, $ctx) = @_; - - my $env = $self->env; - my $headers = $ctx->request->headers; - - for my $header (keys %{ $env }) { - next unless $header =~ /^(HTTP|CONTENT|COOKIE)/i; - (my $field = $header) =~ s/^HTTPS?_//; - $field =~ tr/_/-/; - $headers->header($field => $env->{$header}); - } + $c->request->prepare_body_parameters; } =head2 $self->prepare_parameters($c) -sets up parameters from query and post parameters. +Sets up parameters from query and post parameters. +If parameters have already been set up will clear +existing parameters and set up again. =cut sub prepare_parameters { my ( $self, $c ) = @_; - my $request = $c->request; - my $parameters = $request->parameters; - my $body_parameters = $request->body_parameters; - my $query_parameters = $request->query_parameters; - # We copy, no references - foreach my $name (keys %$query_parameters) { - my $param = $query_parameters->{$name}; - $parameters->{$name} = ref $param eq 'ARRAY' ? [ @$param ] : $param; - } - - # Merge query and body parameters - foreach my $name (keys %$body_parameters) { - my $param = $body_parameters->{$name}; - my @values = ref $param eq 'ARRAY' ? @$param : ($param); - if ( my $existing = $parameters->{$name} ) { - unshift(@values, (ref $existing eq 'ARRAY' ? @$existing : $existing)); - } - $parameters->{$name} = @values > 1 ? \@values : $values[0]; - } + $c->request->_clear_parameters; + return $c->request->parameters; } =head2 $self->prepare_path($c) @@ -512,7 +509,7 @@ abstract method, implemented by engines. sub prepare_path { my ($self, $ctx) = @_; - my $env = $self->env; + my $env = $ctx->request->env; my $scheme = $ctx->request->secure ? 'https' : 'http'; my $host = $env->{HTTP_HOST} || $env->{SERVER_NAME}; @@ -520,10 +517,24 @@ sub prepare_path { my $base_path = $env->{SCRIPT_NAME} || "/"; # set the request URI - my $req_uri = $env->{REQUEST_URI}; - $req_uri =~ s/\?.*$//; - my $path = $req_uri; - $path =~ s{^/+}{}; + my $path; + if (!$ctx->config->{use_request_uri_for_path}) { + my $path_info = $env->{PATH_INFO}; + if ( exists $env->{REDIRECT_URL} ) { + $base_path = $env->{REDIRECT_URL}; + $base_path =~ s/\Q$path_info\E$//; + } + $path = $base_path . $path_info; + $path =~ s{^/+}{}; + $path =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $path =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE + } + else { + my $req_uri = $env->{REQUEST_URI}; + $req_uri =~ s/\?.*$//; + $path = $req_uri; + $path =~ s{^/+}{}; + } # Using URI directly is way too slow, so we construct the URLs manually my $uri_class = "URI::$scheme"; @@ -561,63 +572,54 @@ process the query string and extract query parameters. sub prepare_query_parameters { my ($self, $c) = @_; + my $env = $c->request->env; + my $do_not_decode_query = $c->config->{do_not_decode_query}; + my $default_query_encoding = $c->config->{default_query_encoding} || + ($c->config->{decode_query_using_global_encoding} ? + $c->encoding : 'UTF-8'); + + my $decoder = sub { + my $str = shift; + return $str if $do_not_decode_query; + return $str unless $default_query_encoding; + return decode( $default_query_encoding, $str); + }; - my $query_string = exists $self->env->{QUERY_STRING} - ? $self->env->{QUERY_STRING} + my $query_string = exists $env->{QUERY_STRING} + ? $env->{QUERY_STRING} : ''; # Check for keywords (no = signs) # (yes, index() is faster than a regex :)) if ( index( $query_string, '=' ) < 0 ) { - $c->request->query_keywords( $self->unescape_uri($query_string) ); + my $keywords = $self->unescape_uri($query_string); + $keywords = $decoder->($keywords); + $c->request->query_keywords($keywords); return; } - my %query; - - # replace semi-colons - $query_string =~ s/;/&/g; + $query_string =~ s/\A[&;]+//; - my @params = grep { length $_ } split /&/, $query_string; - - for my $item ( @params ) { - - my ($param, $value) - = map { $self->unescape_uri($_) } - split( /=/, $item, 2 ); - - $param = $self->unescape_uri($item) unless defined $param; - - if ( exists $query{$param} ) { - if ( ref $query{$param} ) { - push @{ $query{$param} }, $value; - } - else { - $query{$param} = [ $query{$param}, $value ]; - } - } - else { - $query{$param} = $value; - } - } + my $p = Hash::MultiValue->new( + map { defined $_ ? $decoder->($self->unescape_uri($_)) : $_ } + map { ( split /=/, $_, 2 )[0,1] } # slice forces two elements + split /[&;]+/, $query_string + ); - $c->request->query_parameters( \%query ); + $c->request->query_parameters( $c->request->_use_hash_multivalue ? $p : $p->mixed ); } =head2 $self->prepare_read($c) -prepare to read from the engine. +Prepare to read by initializing the Content-Length from headers. =cut sub prepare_read { my ( $self, $c ) = @_; - # Initialize the read position - $self->read_position(0); - # Initialize the amount of data we think we need to read - $self->read_length( $c->request->header('Content-Length') || 0 ); + $c->request->_read_length; } =head2 $self->prepare_request(@arguments) @@ -628,7 +630,10 @@ Populate the context object from the request object. sub prepare_request { my ($self, $ctx, %args) = @_; - $self->_set_env($args{env}); + $ctx->log->psgienv($args{env}) if $ctx->log->can('psgienv'); + $ctx->request->_set_env($args{env}); + $self->_set_env($args{env}); # Nasty back compat! + $ctx->response->_set_response_cb($args{response_cb}); } =head2 $self->prepare_uploads($c) @@ -641,20 +646,26 @@ sub prepare_uploads { my $request = $c->request; return unless $request->_body; + my $enc = $c->encoding; my $uploads = $request->_body->upload; my $parameters = $request->parameters; foreach my $name (keys %$uploads) { my $files = $uploads->{$name}; + $name = $c->_handle_unicode_decoding($name) if $enc; my @uploads; for my $upload (ref $files eq 'ARRAY' ? @$files : ($files)) { my $headers = HTTP::Headers->new( %{ $upload->{headers} } ); + my $filename = $upload->{filename}; + $filename = $c->_handle_unicode_decoding($filename) if $enc; + my $u = Catalyst::Request::Upload->new ( size => $upload->{size}, - type => $headers->content_type, + type => scalar $headers->content_type, + charset => scalar $headers->content_type_charset, headers => $headers, tempname => $upload->{tempname}, - filename => $upload->{filename}, + filename => $filename, ); push @uploads, $u; } @@ -677,13 +688,31 @@ sub prepare_uploads { } } -=head2 $self->prepare_write($c) +=head2 $self->write($c, $buffer) + +Writes the buffer to the client. + +=cut + +sub write { + my ( $self, $c, $buffer ) = @_; + + $c->response->write($buffer); +} + +=head2 $self->unencoded_write($c, $buffer) -Abstract method. Implemented by the engines. +Writes the buffer to the client without encoding. Necessary for +already encoded buffers. Used when a $c->write has been done +followed by $c->res->body. =cut -sub prepare_write { } +sub unencoded_write { + my ( $self, $c, $buffer ) = @_; + + $c->response->unencoded_write($buffer); +} =head2 $self->read($c, [$maxlength]) @@ -696,33 +725,10 @@ Maintains the read_length and read_position counters as data is read. sub read { my ( $self, $c, $maxlength ) = @_; - my $remaining = $self->read_length - $self->read_position; - $maxlength ||= $CHUNKSIZE; - - # Are we done reading? - if ( $remaining <= 0 ) { - $self->finalize_read($c); - return; - } - - my $readlen = ( $remaining > $maxlength ) ? $maxlength : $remaining; - my $rc = $self->read_chunk( $c, my $buffer, $readlen ); - if ( defined $rc ) { - if (0 == $rc) { # Nothing more to read even though Content-Length - # said there should be. - $self->finalize_read; - return; - } - $self->read_position( $self->read_position + $rc ); - return $buffer; - } - else { - Catalyst::Exception->throw( - message => "Unknown error reading input: $!" ); - } + $c->request->read($maxlength); } -=head2 $self->read_chunk($c, $buffer, $length) +=head2 $self->read_chunk($c, \$buffer, $length) Each engine implements read_chunk as its preferred way of reading a chunk of data. Returns the number of bytes read. A return of 0 indicates that @@ -732,86 +738,64 @@ there is no more data to be read. sub read_chunk { my ($self, $ctx) = (shift, shift); - return $self->env->{'psgi.input'}->read(@_); + return $ctx->request->read_chunk(@_); } -=head2 $self->read_length - -The length of input data to be read. This is obtained from the Content-Length -header. - -=head2 $self->read_position - -The amount of input data that has already been read. - =head2 $self->run($app, $server) Start the engine. Builds a PSGI application and calls the -run method on the server passed in.. +run method on the server passed in, which then causes the +engine to loop, handling requests.. =cut sub run { - my ($self, $app, $server, @args) = @_; - $server ||= Plack::Loader->auto(); # We're not being called from a script, - # so auto detect mod_perl or whatever - # FIXME - Do something sensible with the options we're passed - $server->run($self->build_psgi_app($app, @args)); + my ($self, $app, $psgi, @args) = @_; + # @args left here rather than just a $options, $server for back compat with the + # old style scripts which send a few args, then a hashref + + # They should never actually be used in the normal case as the Plack engine is + # passed in got all the 'standard' args via the loader in the script already. + + # FIXME - we should stash the options in an attribute so that custom args + # like Gitalist's --git_dir are possible to get from the app without stupid tricks. + my $server = pop @args if (scalar @args && blessed $args[-1]); + my $options = pop @args if (scalar @args && ref($args[-1]) eq 'HASH'); + # Back compat hack for applications with old (non Catalyst::Script) scripts to work in FCGI. + if (scalar @args && !ref($args[0])) { + if (my $listen = shift @args) { + $options->{listen} ||= [$listen]; + } + } + if (! $server ) { + $server = Catalyst::EngineLoader->new(application_name => ref($self))->auto(%$options); + # We're not being called from a script, so auto detect what backend to + # run on. This should never happen, as mod_perl never calls ->run, + # instead the $app->handle method is called per request. + $app->log->warn("Not supplied a Plack engine, falling back to engine auto-loader (are your scripts ancient?)") + } + $app->run_options($options); + $server->run($psgi, $options); } =head2 build_psgi_app ($app, @args) -Builds and returns a PSGI application closure, wrapping it in the reverse proxy -middleware if the using_frontend_proxy config setting is set. +Builds and returns a PSGI application closure. (Raw, not wrapped in middleware) =cut sub build_psgi_app { my ($self, $app, @args) = @_; - my $psgi_app = sub { + return sub { my ($env) = @_; return sub { my ($respond) = @_; - $self->_set_response_cb($respond); - $app->handle_request(env => $env); + confess("Did not get a response callback for writer, cannot continue") unless $respond; + $app->handle_request(env => $env, response_cb => $respond); }; }; - - $psgi_app = Plack::Middleware::Conditional->wrap( - $psgi_app, - condition => sub { - my ($env) = @_; - return if $app->config->{ignore_frontend_proxy}; - return $env->{REMOTE_ADDR} eq '127.0.0.1' || $app->config->{using_frontend_proxy}; - }, - builder => sub { Plack::Middleware::ReverseProxy->wrap($_[0]) }, - ); - - return $psgi_app; -} - -=head2 $self->write($c, $buffer) - -Writes the buffer to the client. - -=cut - -sub write { - my ( $self, $c, $buffer ) = @_; - - unless ( $self->_prepared_write ) { - $self->prepare_write($c); - $self->_prepared_write(1); - } - - return 0 if !defined $buffer; - - my $len = length($buffer); - $self->_writer->write($buffer); - - return $len; } =head2 $self->unescape_uri($uri) @@ -835,15 +819,15 @@ sub unescape_uri { =head2 $self->env -Hash containing enviroment variables including many special variables inserted +Hash containing environment variables including many special variables inserted by WWW server - like SERVER_*, REMOTE_*, HTTP_* ... -Before accesing enviroment variables consider whether the same information is +Before accessing environment variables consider whether the same information is not directly available via Catalyst objects $c->request, $c->engine ... -BEWARE: If you really need to access some enviroment variable from your Catalyst +BEWARE: If you really need to access some environment variable from your Catalyst application you should use $c->engine->env->{VARNAME} instead of $ENV{VARNAME}, -as in some enviroments the %ENV hash does not contain what you would expect. +as in some environments the %ENV hash does not contain what you would expect. =head1 AUTHORS @@ -856,4 +840,6 @@ the same terms as Perl itself. =cut +__PACKAGE__->meta->make_immutable; + 1;