This is the Catalyst engine specialized for the CGI environment.
+ =head1 PATH DECODING
+
+ Most web server environments pass the requested path to the application using environment variables,
+ from which Catalyst has to reconstruct the request base (i.e. the top level path to / in the application,
+ exposed as C<< $c->request->base >>) and the request path below that base.
+
+ There are two methods of doing this, both of which have advantages and disadvantages. Which method is used
+ is determined by the C<< $c->config(use_request_uri_for_path) >> setting (which can either be true or false).
+
+ =head2 use_request_uri_for_path => 0
+
+ This is the default (and the) traditional method that Catalyst has used for determining the path information.
+ The path is synthesised from a combination of the C<PATH_INFO> and C<SCRIPT_NAME> environment variables.
+ The allows the application to behave correctly when C<mod_rewrite> is being used to redirect requests
+ into the application, as these variables are adjusted by mod_rewrite to take account for the redirect.
+
+ However this method has the major disadvantage that it is impossible to correctly decode some elements
+ of the path, as RFC 3875 says: "C<< Unlike a URI path, the PATH_INFO is not URL-encoded, and cannot
+ contain path-segment parameters. >>" This means PATH_INFO is B<always> decoded, and therefore Catalyst
+ can't distinguish / vs %2F in paths (in addition to other encoded values).
+
+ =head2 use_request_uri_for_path => 1
+
+ This method uses the C<REQUEST_URI> and C<SCRIPT_NAME> environment variables. As C<REQUEST_URI> is never
+ decoded, this means that applications using this mode can correctly handle URIs including the %2F character
+ (i.e. with C<AllowEncodedSlashes> set to C<On> in Apache).
+
+ Given that this method of path resolution is provably more correct, it is recommended that you use
+ this unless you have a specific need to deploy your application in a non-standard environment, and you are
+ aware of the implications of not being able to handle encoded URI paths correctly.
+
+ However it also means that in a number of cases when the app isn't installed directly at a path, but instead
+ is having paths rewritten into it (e.g. as a .cgi/fcgi in a public_html directory, with mod_rewrite in a
+ .htaccess file, or when SSI is used to rewrite pages into the app, or when sub-paths of the app are exposed
+ at other URIs than that which the app is 'normally' based at with C<mod_rewrite>), the resolution of
+ C<< $c->request->base >> will be incorrect.
+
=head1 OVERLOADED METHODS
This class overloads some methods from C<Catalyst::Engine>.
my $scheme = $c->request->secure ? 'https' : 'http';
my $host = $ENV{HTTP_HOST} || $ENV{SERVER_NAME};
my $port = $ENV{SERVER_PORT} || 80;
+
+ # fix up for IIS
+ if ($ENV{SERVER_SOFTWARE} && $ENV{SERVER_SOFTWARE} =~ m{IIS/[6-9]\.\d}) {
+ $ENV{PATH_INFO} =~ s/^\Q$ENV{SCRIPT_NAME}\E//;
+ }
+
my $script_name = $ENV{SCRIPT_NAME};
$script_name =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go if $script_name;
my $base_path;
if ( exists $ENV{REDIRECT_URL} ) {
$base_path = $ENV{REDIRECT_URL};
- $base_path =~ s/$ENV{PATH_INFO}$//;
+ $base_path =~ s/\Q$ENV{PATH_INFO}\E$//;
}
else {
$base_path = $script_name || '/';
}
}
- # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded,
- # and cannot contain path-segment parameters." This means PATH_INFO
- # is always decoded, and the script can't distinguish / vs %2F.
- # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256
- # Here we try to resurrect the original encoded URI from REQUEST_URI.
my $path_info = $ENV{PATH_INFO};
- if (my $req_uri = $ENV{REQUEST_URI}) {
- $req_uri =~ s/^\Q$base_path\E//;
- $req_uri =~ s/\?.*$//;
- if ($req_uri) {
- # Note that if REQUEST_URI doesn't start with a /, then the user
- # is probably using mod_rewrite or something to rewrite requests
- # into a sub-path of their application..
- # This means that REQUEST_URI needs information from PATH_INFO
- # prepending to it to be useful, otherwise the sub path which is
- # being redirected to becomes the app base address which is
- # incorrect.
- if (substr($req_uri, 0, 1) ne '/') {
- my ($match) = $req_uri =~ m|^([^/]+)|;
- my ($path_info_part) = $path_info =~ m|^(.*?\Q$match\E)|;
- substr($req_uri, 0, length($match), $path_info_part)
- if $path_info_part;
+ if ($c->config->{use_request_uri_for_path}) {
+ # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded,
+ # and cannot contain path-segment parameters." This means PATH_INFO
+ # is always decoded, and the script can't distinguish / vs %2F.
+ # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256
+ # Here we try to resurrect the original encoded URI from REQUEST_URI.
+ if (my $req_uri = $ENV{REQUEST_URI}) {
+ if (defined $script_name) {
+ $req_uri =~ s/^\Q$script_name\E//;
}
- $path_info = $req_uri;
+ $req_uri =~ s/\?.*$//;
+ $path_info = $req_uri if $req_uri;
}
}
# mod_rewrite to app root for non / based app
{
- my $r = get_req (
+ my $r = get_req (0,
REDIRECT_URL => '/comics/',
SCRIPT_NAME => '/comics/dispatch.cgi',
REQUEST_URI => '/comics/',
);
- is ''.$r->uri, 'http://www.foo.com/comics/';
- is ''.$r->base, 'http://www.foo.com/comics/';
+ is ''.$r->uri, 'http://www.foo.com/comics/', 'uri is correct';
+ is ''.$r->base, 'http://www.foo.com/comics/', 'base is correct';
}
# mod_rewrite to sub path under app root for non / based app
{
- my $r = get_req (
+ my $r = get_req (0,
PATH_INFO => '/foo/bar.gif',
REDIRECT_URL => '/comics/foo/bar.gif',
SCRIPT_NAME => '/comics/dispatch.cgi',
# Standard CGI hit for non / based app
{
- my $r = get_req (
+ my $r = get_req (0,
PATH_INFO => '/static/css/blueprint/screen.css',
SCRIPT_NAME => '/~bobtfish/Gitalist/script/gitalist.cgi',
REQUEST_URI => '/~bobtfish/Gitalist/script/gitalist.cgi/static/css/blueprint/screen.css',
}
# / %2F %252F escaping case.
{
- my $r = get_req (
+ my $r = get_req (1,
PATH_INFO => '/%2F/%2F',
SCRIPT_NAME => '/~bobtfish/Gitalist/script/gitalist.cgi',
REQUEST_URI => '/~bobtfish/Gitalist/script/gitalist.cgi/%252F/%252F',
);
- is ''.$r->uri, 'http://www.foo.com/~bobtfish/Gitalist/script/gitalist.cgi/%252F/%252F';
- is ''.$r->base, 'http://www.foo.com/~bobtfish/Gitalist/script/gitalist.cgi/';
+ is ''.$r->uri, 'http://www.foo.com/~bobtfish/Gitalist/script/gitalist.cgi/%252F/%252F', 'uri correct';
+ is ''.$r->base, 'http://www.foo.com/~bobtfish/Gitalist/script/gitalist.cgi/', 'base correct';
}
# Using rewrite rules to ask for a sub-path in your app.
# E.g. RewriteRule ^(.*)$ /path/to/fastcgi/domainprofi.fcgi/iframeredirect$1 [L,NS]
{
- my $r = get_req (
+ my $r = get_req (0,
PATH_INFO => '/iframeredirect/info',
SCRIPT_NAME => '',
REQUEST_URI => '/info',
# nginx example from espent with path /"foo"
{
- my $r = get_req (
+ my $r = get_req (0,
PATH_INFO => '"foo"',
SCRIPT_NAME => '/',
REQUEST_URI => '/%22foo%22',
# nginx example from espent with path /"foo" and the app based at /oslobilder
{
- my $r = get_req (
+ my $r = get_req (1,
PATH_INFO => 'oslobilder/"foo"',
SCRIPT_NAME => '/oslobilder/',
REQUEST_URI => '/oslobilder/%22foo%22',
);
- is ''.$r->path, '%22foo%22';
- is ''.$r->uri, 'http://www.foo.com/oslobilder/%22foo%22';
- is ''.$r->base, 'http://www.foo.com/oslobilder/';
+ is ''.$r->path, '%22foo%22', 'path correct';
+ is ''.$r->uri, 'http://www.foo.com/oslobilder/%22foo%22', 'uri correct';
+ is ''.$r->base, 'http://www.foo.com/oslobilder/', 'base correct';
}
+# CGI hit on IIS for non / based app
+{
- my $r = get_req (
++ my $r = get_req(0,
+ SERVER_SOFTWARE => 'Microsoft-IIS/6.0',
+ PATH_INFO => '/bobtfish/Gitalist/script/gitalist.cgi/static/css/blueprint/screen.css',
+ SCRIPT_NAME => '/bobtfish/Gitalist/script/gitalist.cgi',
+ PATH_TRANSLATED =>
+'C:\\Inetpub\\vhosts\\foo.com\\httpdocs\\bobtfish\\Gitalist\\script\\gitalist.cgi\\static\\css\\blueprint\\screen.css',
+ );
+ is ''.$r->uri, 'http://www.foo.com/bobtfish/Gitalist/script/gitalist.cgi/static/css/blueprint/screen.css';
+ is ''.$r->base, 'http://www.foo.com/bobtfish/Gitalist/script/gitalist.cgi/';
+}
+
+ {
+ my $r = get_req (0,
+ PATH_INFO => '/auth/login',
+ SCRIPT_NAME => '/tx',
+ REQUEST_URI => '/login',
+ );
+ is ''.$r->path, 'auth/login', 'path correct';
+ is ''.$r->uri, 'http://www.foo.com/tx/auth/login', 'uri correct';
+ is ''.$r->base, 'http://www.foo.com/tx/', 'base correct';
+ }
+
+ # test req->base and c->uri_for work correctly after an internally redirected request
+ # (i.e. REDIRECT_URL set) when the PATH_INFO contains a regex
+ {
+ my $path = '/engine/request/uri/Rx(here)';
+ my $r = get_req (0,
+ SCRIPT_NAME => '/',
+ PATH_INFO => $path,
+ REQUEST_URI => $path,
+ REDIRECT_URL => $path,
+ );
+
+ is $r->path, 'engine/request/uri/Rx(here)', 'URI contains correct path';
+ is $r->base, 'http://www.foo.com/', 'Base is correct';
+ }
-
# FIXME - Test proxy logic
# - Test query string
# - Test non standard port numbers
# - Test scheme (secure request on port 80)
sub get_req {
+ my $use_request_uri_for_path = shift;
+
my %template = (
HTTP_HOST => 'www.foo.com',
PATH_INFO => '/',
local %ENV = (%template, @_);
my $i = TestApp->new;
+ $i->setup_finished(0);
+ $i->config(use_request_uri_for_path => $use_request_uri_for_path);
+ $i->setup_finished(1);
$i->engine(Catalyst::Engine::CGI->new);
$i->engine->prepare_path($i);
return $i->req;