X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FCatalyst%2FEngine%2FCGI.pm;h=076f5b072bf8d07bb7dffc52289976eef26832cc;hb=fef8c827fc1a87b1c32c487e330b5e967874f581;hp=8950d73dd04dfa0f790337b70dfd92c26267a02c;hpb=6bd2b72cca4b754f154ed1eca98151ac390d9cb5;p=catagits%2FCatalyst-Runtime.git diff --git a/lib/Catalyst/Engine/CGI.pm b/lib/Catalyst/Engine/CGI.pm index 8950d73..076f5b0 100644 --- a/lib/Catalyst/Engine/CGI.pm +++ b/lib/Catalyst/Engine/CGI.pm @@ -1,13 +1,9 @@ package Catalyst::Engine::CGI; -use strict; -use base 'Catalyst::Engine'; +use Moose; +extends 'Catalyst::Engine'; -use CGI; -use URI; -use URI::http; - -__PACKAGE__->mk_accessors('cgi'); +has _header_buf => (is => 'rw', clearer => '_clear_header_buf', predicate => '_has_header_buf'); =head1 NAME @@ -30,227 +26,300 @@ appropriate engine module. =head1 DESCRIPTION -This is the Catalyst engine specialized for the CGI environment (using the -C and C modules). Normally Catalyst will select the -appropriate engine according to the environment that it detects, however you -can force Catalyst to use the CGI engine by specifying the following in your -application module: - - use Catalyst qw(-Engine=CGI); +This is the Catalyst engine specialized for the CGI environment. -The performance of this way of using Catalyst is not expected to be -useful in production applications, but it may be helpful for development. +=head1 PATH DECODING -=head1 METHODS +Most web server environments pass the requested path to the application using environment variables, +from which Catalyst has to reconstruct the request base (i.e. the top level path to / in the application, +exposed as C<< $c->request->base >>) and the request path below that base. -=over 4 +There are two methods of doing this, both of which have advantages and disadvantages. Which method is used +is determined by the C<< $c->config(use_request_uri_for_path) >> setting (which can either be true or false). -=item $c->cgi +=head2 use_request_uri_for_path => 0 -This config parameter contains the C object. +This is the default (and the) traditional method that Catalyst has used for determining the path information. +The path is synthesised from a combination of the C and C environment variables. +The allows the application to behave correctly when C is being used to redirect requests +into the application, as these variables are adjusted by mod_rewrite to take account for the redirect. -=back - -=head1 OVERLOADED METHODS +However this method has the major disadvantage that it is impossible to correctly decode some elements +of the path, as RFC 3875 says: "C<< Unlike a URI path, the PATH_INFO is not URL-encoded, and cannot +contain path-segment parameters. >>" This means PATH_INFO is B decoded, and therefore Catalyst +can't distinguish / vs %2F in paths (in addition to other encoded values). -This class overloads some methods from C. +=head2 use_request_uri_for_path => 1 -=over 4 +This method uses the C and C environment variables. As C is never +decoded, this means that applications using this mode can correctly handle URIs including the %2F character +(i.e. with C set to C in Apache). -=item $c->finalize_body +Given that this method of path resolution is provably more correct, it is recommended that you use +this unless you have a specific need to deploy your application in a non-standard environment, and you are +aware of the implications of not being able to handle encoded URI paths correctly. -Prints the response output to STDOUT. +However it also means that in a number of cases when the app isn't installed directly at a path, but instead +is having paths rewritten into it (e.g. as a .cgi/fcgi in a public_html directory, with mod_rewrite in a +.htaccess file, or when SSI is used to rewrite pages into the app, or when sub-paths of the app are exposed +at other URIs than that which the app is 'normally' based at with C), the resolution of +C<< $c->request->base >> will be incorrect. -=cut +=head1 OVERLOADED METHODS -sub finalize_body { - my $c = shift; - print $c->response->output; -} +This class overloads some methods from C. -=item $c->finalize_headers +=head2 $self->finalize_headers($c) =cut sub finalize_headers { - my $c = shift; + my ( $self, $c ) = @_; $c->response->header( Status => $c->response->status ); - print $c->response->headers->as_string("\015\012"); - print "\015\012"; + $self->_header_buf($c->response->headers->as_string("\015\012") . "\015\012"); } -=item $c->prepare_body +=head2 $self->prepare_connection($c) =cut -sub prepare_body { - my $c = shift; +sub prepare_connection { + my ( $self, $c ) = @_; + local (*ENV) = $self->env || \%ENV; - # XXX this is undocumented in CGI.pm. If Content-Type is not - # application/x-www-form-urlencoded or multipart/form-data - # CGI.pm will read STDIN into a param, POSTDATA. + my $request = $c->request; + $request->address( $ENV{REMOTE_ADDR} ); - $c->request->body( $c->cgi->param('POSTDATA') ); -} + PROXY_CHECK: + { + unless ( ref($c)->config->{using_frontend_proxy} ) { + last PROXY_CHECK if $ENV{REMOTE_ADDR} ne '127.0.0.1'; + last PROXY_CHECK if ref($c)->config->{ignore_frontend_proxy}; + } + last PROXY_CHECK unless $ENV{HTTP_X_FORWARDED_FOR}; + + # If we are running as a backend server, the user will always appear + # as 127.0.0.1. Select the most recent upstream IP (last in the list) + my ($ip) = $ENV{HTTP_X_FORWARDED_FOR} =~ /([^,\s]+)$/; + $request->address($ip); + if ( defined $ENV{HTTP_X_FORWARDED_PORT} ) { + $ENV{SERVER_PORT} = $ENV{HTTP_X_FORWARDED_PORT}; + } + } -=item $c->prepare_connection + $request->hostname( $ENV{REMOTE_HOST} ) if exists $ENV{REMOTE_HOST}; + $request->protocol( $ENV{SERVER_PROTOCOL} ); + $request->user( $ENV{REMOTE_USER} ); # XXX: Deprecated. See Catalyst::Request for removal information + $request->remote_user( $ENV{REMOTE_USER} ); + $request->method( $ENV{REQUEST_METHOD} ); -=cut + if ( $ENV{HTTPS} && uc( $ENV{HTTPS} ) eq 'ON' ) { + $request->secure(1); + } -sub prepare_connection { - my $c = shift; - $c->req->hostname( $ENV{REMOTE_HOST} ); - $c->req->address( $ENV{REMOTE_ADDR} ); + if ( $ENV{SERVER_PORT} == 443 ) { + $request->secure(1); + } + binmode(STDOUT); # Ensure we are sending bytes. } -=item $c->prepare_headers +=head2 $self->prepare_headers($c) =cut sub prepare_headers { - my $c = shift; + my ( $self, $c ) = @_; + local (*ENV) = $self->env || \%ENV; + my $headers = $c->request->headers; + # Read headers from %ENV + foreach my $header ( keys %ENV ) { + next unless $header =~ /^(?:HTTP|CONTENT|COOKIE)/i; + ( my $field = $header ) =~ s/^HTTPS?_//; + $headers->header( $field => $ENV{$header} ); + } +} - while ( my ( $header, $value ) = each %ENV ) { +=head2 $self->prepare_path($c) - next unless $header =~ /^(HTTP|CONTENT)/i; +=cut - ( my $field = $header ) =~ s/^HTTPS?_//; +# Please don't touch this method without adding tests in +# t/aggregate/unit_core_engine_cgi-prepare_path.t +sub prepare_path { + my ( $self, $c ) = @_; + local (*ENV) = $self->env || \%ENV; - $c->req->headers->header( $field => $value ); + my $scheme = $c->request->secure ? 'https' : 'http'; + my $host = $ENV{HTTP_HOST} || $ENV{SERVER_NAME}; + my $port = $ENV{SERVER_PORT} || 80; + + # fix up for IIS + if ($ENV{SERVER_SOFTWARE} && $ENV{SERVER_SOFTWARE} =~ m{IIS/[6-9]\.\d}) { + $ENV{PATH_INFO} =~ s/^\Q$ENV{SCRIPT_NAME}\E//; } - $c->req->method( $ENV{REQUEST_METHOD} || 'GET' ); -} + my $script_name = $ENV{SCRIPT_NAME}; + $script_name =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go if $script_name; -=item $c->prepare_parameters + my $base_path; + if ( exists $ENV{REDIRECT_URL} ) { + $base_path = $ENV{REDIRECT_URL}; + $base_path =~ s/\Q$ENV{PATH_INFO}\E$//; + } + else { + $base_path = $script_name || '/'; + } -=cut + # If we are running as a backend proxy, get the true hostname + PROXY_CHECK: + { + unless ( ref($c)->config->{using_frontend_proxy} ) { + last PROXY_CHECK if $host !~ /localhost|127.0.0.1/; + last PROXY_CHECK if ref($c)->config->{ignore_frontend_proxy}; + } + last PROXY_CHECK unless $ENV{HTTP_X_FORWARDED_HOST}; -sub prepare_parameters { - my $c = shift; - - my ( @params ); - - if ( $c->request->method eq 'POST' ) { - for my $param ( $c->cgi->url_param ) { - for my $value ( $c->cgi->url_param($param) ) { - push ( @params, $param, $value ); - } + $host = $ENV{HTTP_X_FORWARDED_HOST}; + + # backend could be on any port, so + # assume frontend is on the default port + $port = $c->request->secure ? 443 : 80; + if ( $ENV{HTTP_X_FORWARDED_PORT} ) { + $port = $ENV{HTTP_X_FORWARDED_PORT}; } } - for my $param ( $c->cgi->param ) { - for my $value ( $c->cgi->param($param) ) { - push ( @params, $param, $value ); + my $path_info = $ENV{PATH_INFO}; + if ($c->config->{use_request_uri_for_path}) { + # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded, + # and cannot contain path-segment parameters." This means PATH_INFO + # is always decoded, and the script can't distinguish / vs %2F. + # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256 + # Here we try to resurrect the original encoded URI from REQUEST_URI. + if (my $req_uri = $ENV{REQUEST_URI}) { + if (defined $script_name) { + $req_uri =~ s/^\Q$script_name\E//; + } + $req_uri =~ s/\?.*$//; + $path_info = $req_uri if $req_uri; } } - - $c->request->param(\@params); -} -=item $c->prepare_path + # set the request URI + my $path = $base_path . ( $path_info || '' ); + $path =~ s{^/+}{}; -=cut + # Using URI directly is way too slow, so we construct the URLs manually + my $uri_class = "URI::$scheme"; -sub prepare_path { - my $c = shift; + # HTTP_HOST will include the port even if it's 80/443 + $host =~ s/:(?:80|443)$//; - my $base; - { - my $scheme = $ENV{HTTPS} ? 'https' : 'http'; - my $host = $ENV{HTTP_HOST} || $ENV{SERVER_NAME}; - my $port = $ENV{SERVER_PORT} || 80; - my $path = $ENV{SCRIPT_NAME} || '/'; - - $base = URI->new; - $base->scheme($scheme); - $base->host($host); - $base->port($port); - $base->path($path); - - $base = $base->canonical->as_string; + if ( $port !~ /^(?:80|443)$/ && $host !~ /:/ ) { + $host .= ":$port"; } - my $path = $ENV{PATH_INFO} || '/'; - $path =~ s/%([0-9A-Fa-f]{2})/chr(hex($1))/eg; - $path =~ s/^\///; + # Escape the path + $path =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $path =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE + + my $query = $ENV{QUERY_STRING} ? '?' . $ENV{QUERY_STRING} : ''; + my $uri = $scheme . '://' . $host . '/' . $path . $query; + + $c->request->uri( bless(\$uri, $uri_class)->canonical ); + + # set the base URI + # base must end in a slash + $base_path .= '/' unless $base_path =~ m{/$}; - $c->req->base($base); - $c->req->path($path); + my $base_uri = $scheme . '://' . $host . $base_path; + + $c->request->base( bless \$base_uri, $uri_class ); } -=item $c->prepare_request +=head2 $self->prepare_query_parameters($c) =cut -sub prepare_request { - my ( $c, $cgi ) = @_; - $c->cgi( $cgi || CGI->new ); - $c->cgi->_reset_globals; -} +around prepare_query_parameters => sub { + my $orig = shift; + my ( $self, $c ) = @_; + local (*ENV) = $self->env || \%ENV; + + if ( $ENV{QUERY_STRING} ) { + $self->$orig( $c, $ENV{QUERY_STRING} ); + } +}; -=item $c->prepare_uploads +=head2 $self->prepare_request($c, (env => \%env)) =cut -sub prepare_uploads { - my $c = shift; +sub prepare_request { + my ( $self, $c, %args ) = @_; + + if ( $args{env} ) { + $self->env( $args{env} ); + } +} + +=head2 $self->prepare_write($c) - my @uploads; - - for my $param ( $c->cgi->param ) { - - my @values = $c->cgi->param($param); +Enable autoflush on the output handle for CGI-based engines. - next unless ref( $values[0] ); +=cut - for my $fh (@values) { +around prepare_write => sub { + *STDOUT->autoflush(1); + return shift->(@_); +}; - next unless my $size = ( stat $fh )[7]; +=head2 $self->write($c, $buffer) - my $info = $c->cgi->uploadInfo($fh); - my $tempname = $c->cgi->tmpFileName($fh); - my $type = $info->{'Content-Type'}; - my $disposition = $info->{'Content-Disposition'}; - my $filename = ( $disposition =~ / filename="([^;]*)"/ )[0]; +Writes the buffer to the client. - my $upload = Catalyst::Request::Upload->new( - filename => $filename, - size => $size, - tempname => $tempname, - type => $type - ); - - push( @uploads, $param, $upload ); - } +=cut + +around write => sub { + my $orig = shift; + my ( $self, $c, $buffer ) = @_; + + # Prepend the headers if they have not yet been sent + if ( $self->_has_header_buf ) { + $buffer = $self->_clear_header_buf . $buffer; } - - $c->request->upload(\@uploads); -} -=item $c->run + return $self->$orig( $c, $buffer ); +}; + +=head2 $self->read_chunk($c, $buffer, $length) =cut -sub run { shift->handler } +sub read_chunk { shift; shift; *STDIN->sysread(@_); } + +=head2 $self->run + +=cut -=back +sub run { shift; shift->handle_request( env => \%ENV ) } =head1 SEE ALSO -L. +L, L -=head1 AUTHOR +=head1 AUTHORS -Sebastian Riedel, C +Catalyst Contributors, see Catalyst.pm =head1 COPYRIGHT -This program is free software, you can redistribute it and/or modify it under +This library is free software. You can redistribute it and/or modify it under the same terms as Perl itself. =cut +no Moose; 1;