X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FCatalyst%2FEngine%2FCGI.pm;h=bd670dade534524ac6a9ac5f5420a278ed698673;hb=13985c0aedd0003d3a722c6a729ef19e998f891c;hp=3f2ef67d3d13bf700c782b18038a5488264d6786;hpb=e8b299689361f7f8538d0f7adf70fc86fecba8b2;p=catagits%2FCatalyst-Runtime.git diff --git a/lib/Catalyst/Engine/CGI.pm b/lib/Catalyst/Engine/CGI.pm index 3f2ef67..bd670da 100644 --- a/lib/Catalyst/Engine/CGI.pm +++ b/lib/Catalyst/Engine/CGI.pm @@ -28,6 +28,43 @@ appropriate engine module. This is the Catalyst engine specialized for the CGI environment. +=head1 PATH DECODING + +Most web server environments pass the requested path to the application using environment variables, +from which Catalyst has to reconstruct the request base (i.e. the top level path to / in the application, +exposed as C<< $c->request->base >>) and the request path below that base. + +There are two methods of doing this, both of which have advantages and disadvantages. Which method is used +is determined by the C<< $c->config(use_request_uri_for_path) >> setting (which can either be true or false). + +=head2 use_request_uri_for_path => 0 + +This is the default (and the) traditional method that Catalyst has used for determining the path information. +The path is synthesised from a combination of the C and C environment variables. +The allows the application to behave correctly when C is being used to redirect requests +into the application, as these variables are adjusted by mod_rewrite to take account for the redirect. + +However this method has the major disadvantage that it is impossible to correctly decode some elements +of the path, as RFC 3875 says: "C<< Unlike a URI path, the PATH_INFO is not URL-encoded, and cannot +contain path-segment parameters. >>" This means PATH_INFO is B decoded, and therefore Catalyst +can't distinguish / vs %2F in paths (in addition to other encoded values). + +=head2 use_request_uri_for_path => 1 + +This method uses the C and C environment variables. As C is never +decoded, this means that applications using this mode can correctly handle URIs including the %2F character +(i.e. with C set to C in Apache). + +Given that this method of path resolution is provably more correct, it is recommended that you use +this unless you have a specific need to deploy your application in a non-standard environment, and you are +aware of the implications of not being able to handle encoded URI paths correctly. + +However it also means that in a number of cases when the app isn't installed directly at a path, but instead +is having paths rewritten into it (e.g. as a .cgi/fcgi in a public_html directory, with mod_rewrite in a +.htaccess file, or when SSI is used to rewrite pages into the app, or when sub-paths of the app are exposed +at other URIs than that which the app is 'normally' based at with C), the resolution of +C<< $c->request->base >> will be incorrect. + =head1 OVERLOADED METHODS This class overloads some methods from C. @@ -117,6 +154,12 @@ sub prepare_path { my $scheme = $c->request->secure ? 'https' : 'http'; my $host = $ENV{HTTP_HOST} || $ENV{SERVER_NAME}; my $port = $ENV{SERVER_PORT} || 80; + + # fix up for IIS + if ($ENV{SERVER_SOFTWARE} && $ENV{SERVER_SOFTWARE} =~ m{IIS/[6-9]\.\d}) { + $ENV{PATH_INFO} =~ s/^\Q$ENV{SCRIPT_NAME}\E//; + } + my $script_name = $ENV{SCRIPT_NAME}; $script_name =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go if $script_name; @@ -148,33 +191,25 @@ sub prepare_path { } } - # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded, - # and cannot contain path-segment parameters." This means PATH_INFO - # is always decoded, and the script can't distinguish / vs %2F. - # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256 - # Here we try to resurrect the original encoded URI from REQUEST_URI. my $path_info = $ENV{PATH_INFO}; -# if (my $req_uri = $ENV{REQUEST_URI}) { -# $req_uri =~ s/^\Q$base_path\E//; -# $req_uri =~ s/\?.*$//; -# if ($req_uri && $req_uri ne '/') { - # This means that REQUEST_URI needs information from PATH_INFO - # prepending to it to be useful, otherwise the sub path which is - # being redirected to becomes the app base address which is - # incorrect. -# my ($match) = $req_uri =~ m{^(/?[^/]+)}; -# my ($path_info_part) = $path_info =~ m|^(.*?\Q$match\E)|; -# substr($req_uri, 0, length($match), $path_info_part) -# if $path_info_part; -# $path_info = $req_uri; -# } -# } - $path_info =~ s/%2F/%252F/g; + if ($c->config->{use_request_uri_for_path}) { + # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded, + # and cannot contain path-segment parameters." This means PATH_INFO + # is always decoded, and the script can't distinguish / vs %2F. + # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256 + # Here we try to resurrect the original encoded URI from REQUEST_URI. + if (my $req_uri = $ENV{REQUEST_URI}) { + if (defined $script_name) { + $req_uri =~ s/^\Q$script_name\E//; + } + $req_uri =~ s/\?.*$//; + $path_info = $req_uri if $req_uri; + } + } + # set the request URI - warn("Base path $base_path, path_info $path_info"); my $path = $base_path . ( $path_info || '' ); $path =~ s{^/+}{}; - $base_path .= '/' unless $base_path =~ m{/$}; # Using URI directly is way too slow, so we construct the URLs manually my $uri_class = "URI::$scheme"; @@ -226,7 +261,7 @@ sub prepare_request { my ( $self, $c, %args ) = @_; if ( $args{env} ) { - $self->env( $args{env} ); + $self->_set_env( $args{env} ); } } @@ -253,7 +288,10 @@ around write => sub { # Prepend the headers if they have not yet been sent if ( $self->_has_header_buf ) { - $buffer = $self->_clear_header_buf . $buffer; + my $headers = $self->_clear_header_buf; + + $buffer = defined $buffer + ? $headers . $buffer : $headers; } return $self->$orig( $c, $buffer );