Add a pile of docs for the new use_request_uri_for_path setting
[catagits/Catalyst-Runtime.git] / lib / Catalyst / Engine / CGI.pm
CommitLineData
fc7ec1d9 1package Catalyst::Engine::CGI;
2
7fa2c9c1 3use Moose;
4extends 'Catalyst::Engine';
e2fd5b5f 5
02570318 6has _header_buf => (is => 'rw', clearer => '_clear_header_buf', predicate => '_has_header_buf');
84528885 7
fc7ec1d9 8=head1 NAME
9
10Catalyst::Engine::CGI - The CGI Engine
11
12=head1 SYNOPSIS
13
23f9d934 14A script using the Catalyst::Engine::CGI module might look like:
15
9a33da6a 16 #!/usr/bin/perl -w
17
18 use strict;
19 use lib '/path/to/MyApp/lib';
20 use MyApp;
21
22 MyApp->run;
23
23f9d934 24The application module (C<MyApp>) would use C<Catalyst>, which loads the
25appropriate engine module.
fc7ec1d9 26
27=head1 DESCRIPTION
28
fbcc39ad 29This is the Catalyst engine specialized for the CGI environment.
e2fd5b5f 30
a48f9753 31=head1 PATH DECODING
32
33Most web server environments pass the requested path to the application using environment variables,
34from which Catalyst has to reconstruct the request base (i.e. the top level path to / in the application,
35exposed as C<< $c->request->base >>) and the request path below that base.
36
37There are two methods of doing this, both of which have advantages and disadvantages. Which method is used
38is determined by the C<< $c->config(use_request_uri_for_path) >> setting (which can either be true or false).
39
40=head2 use_request_uri_for_path => 0
41
42This is the default (and the) traditional method that Catalyst has used for determining the path information.
43The path is synthesised from a combination of the C<PATH_INFO> and C<SCRIPT_NAME> environment variables.
44The allows the application to behave correctly when C<mod_rewrite> is being used to redirect requests
45into the application, as these variables are adjusted by mod_rewrite to take account for the redirect.
46
47However this method has the major disadvantage that it is impossible to correctly decode some elements
48of the path, as RFC 3875 says: "C<< Unlike a URI path, the PATH_INFO is not URL-encoded, and cannot
49contain path-segment parameters. >>" This means PATH_INFO is B<always> decoded, and therefore Catalyst
50can't distinguish / vs %2F in paths.
51
52=head2 use_request_uri_for_path => 1
53
54This method uses the C<REQUEST_URI> and C<SCRIPT_NAME> environment variables. As C<REQUEST_URI> is never
55decoded, this means that applications using this mode can correctly handle URIs including the %2F character
56(i.e. with C<AllowEncodedSlashes> set to C<On> in Apache).
57
58However it also means that in a number of cases when the app isn't installed directly at a path, but instead
59is having paths rewritten into it (e.g. as a .cgi/fcgi in a public_html directory, with mod_rewrite in a
60.htaccess file, or when SSI is used to rewrite pages into the app, or when sub-paths of the app are exposed
61at other URIs than that which the app is 'normally' based at with C<mod_rewrite>), the resolution of
62C<< $c->request->base >> will be incorrect.
63
23f9d934 64=head1 OVERLOADED METHODS
fc7ec1d9 65
fbcc39ad 66This class overloads some methods from C<Catalyst::Engine>.
fc7ec1d9 67
b5ecfcf0 68=head2 $self->finalize_headers($c)
fc7ec1d9 69
70=cut
71
fbcc39ad 72sub finalize_headers {
73 my ( $self, $c ) = @_;
06e1b616 74
fbcc39ad 75 $c->response->header( Status => $c->response->status );
06e1b616 76
02570318 77 $self->_header_buf($c->response->headers->as_string("\015\012") . "\015\012");
fc7ec1d9 78}
79
b5ecfcf0 80=head2 $self->prepare_connection($c)
fc7ec1d9 81
82=cut
83
fbcc39ad 84sub prepare_connection {
85 my ( $self, $c ) = @_;
b5ecfcf0 86 local (*ENV) = $self->env || \%ENV;
4f5ebacd 87
7fa2c9c1 88 my $request = $c->request;
89 $request->address( $ENV{REMOTE_ADDR} );
4f5ebacd 90
91 PROXY_CHECK:
fbcc39ad 92 {
df960201 93 unless ( ref($c)->config->{using_frontend_proxy} ) {
fbcc39ad 94 last PROXY_CHECK if $ENV{REMOTE_ADDR} ne '127.0.0.1';
df960201 95 last PROXY_CHECK if ref($c)->config->{ignore_frontend_proxy};
5b387dfc 96 }
fbcc39ad 97 last PROXY_CHECK unless $ENV{HTTP_X_FORWARDED_FOR};
4f5ebacd 98
fbcc39ad 99 # If we are running as a backend server, the user will always appear
100 # as 127.0.0.1. Select the most recent upstream IP (last in the list)
101 my ($ip) = $ENV{HTTP_X_FORWARDED_FOR} =~ /([^,\s]+)$/;
7fa2c9c1 102 $request->address($ip);
64d1c3cd 103 if ( defined $ENV{HTTP_X_FORWARDED_PORT} ) {
104 $ENV{SERVER_PORT} = $ENV{HTTP_X_FORWARDED_PORT};
105 }
fc7ec1d9 106 }
08cf3dd6 107
8fc0d39e 108 $request->hostname( $ENV{REMOTE_HOST} ) if exists $ENV{REMOTE_HOST};
7fa2c9c1 109 $request->protocol( $ENV{SERVER_PROTOCOL} );
8026359e 110 $request->user( $ENV{REMOTE_USER} ); # XXX: Deprecated. See Catalyst::Request for removal information
111 $request->remote_user( $ENV{REMOTE_USER} );
7fa2c9c1 112 $request->method( $ENV{REQUEST_METHOD} );
fbcc39ad 113
114 if ( $ENV{HTTPS} && uc( $ENV{HTTPS} ) eq 'ON' ) {
7fa2c9c1 115 $request->secure(1);
5b387dfc 116 }
bfde09a2 117
fbcc39ad 118 if ( $ENV{SERVER_PORT} == 443 ) {
7fa2c9c1 119 $request->secure(1);
fbcc39ad 120 }
afdffc63 121 binmode(STDOUT); # Ensure we are sending bytes.
fc7ec1d9 122}
123
b5ecfcf0 124=head2 $self->prepare_headers($c)
fc7ec1d9 125
126=cut
127
fbcc39ad 128sub prepare_headers {
129 my ( $self, $c ) = @_;
b5ecfcf0 130 local (*ENV) = $self->env || \%ENV;
7fa2c9c1 131 my $headers = $c->request->headers;
fbcc39ad 132 # Read headers from %ENV
c82ed742 133 foreach my $header ( keys %ENV ) {
fbcc39ad 134 next unless $header =~ /^(?:HTTP|CONTENT|COOKIE)/i;
135 ( my $field = $header ) =~ s/^HTTPS?_//;
7fa2c9c1 136 $headers->header( $field => $ENV{$header} );
fbcc39ad 137 }
138}
316bf0f0 139
b5ecfcf0 140=head2 $self->prepare_path($c)
316bf0f0 141
fbcc39ad 142=cut
316bf0f0 143
eb3abf96 144# Please don't touch this method without adding tests in
145# t/aggregate/unit_core_engine_cgi-prepare_path.t
fbcc39ad 146sub prepare_path {
147 my ( $self, $c ) = @_;
b5ecfcf0 148 local (*ENV) = $self->env || \%ENV;
fbcc39ad 149
4f5ebacd 150 my $scheme = $c->request->secure ? 'https' : 'http';
294f78ca 151 my $host = $ENV{HTTP_HOST} || $ENV{SERVER_NAME};
152 my $port = $ENV{SERVER_PORT} || 80;
8bf285ed 153 my $script_name = $ENV{SCRIPT_NAME};
154 $script_name =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go if $script_name;
155
0bcb98c7 156 my $base_path;
157 if ( exists $ENV{REDIRECT_URL} ) {
158 $base_path = $ENV{REDIRECT_URL};
4dfe7bde 159 $base_path =~ s/\Q$ENV{PATH_INFO}\E$//;
0bcb98c7 160 }
161 else {
8bf285ed 162 $base_path = $script_name || '/';
0bcb98c7 163 }
4f5ebacd 164
fbcc39ad 165 # If we are running as a backend proxy, get the true hostname
4f5ebacd 166 PROXY_CHECK:
fbcc39ad 167 {
df960201 168 unless ( ref($c)->config->{using_frontend_proxy} ) {
fbcc39ad 169 last PROXY_CHECK if $host !~ /localhost|127.0.0.1/;
df960201 170 last PROXY_CHECK if ref($c)->config->{ignore_frontend_proxy};
316bf0f0 171 }
fbcc39ad 172 last PROXY_CHECK unless $ENV{HTTP_X_FORWARDED_HOST};
316bf0f0 173
fbcc39ad 174 $host = $ENV{HTTP_X_FORWARDED_HOST};
4f5ebacd 175
176 # backend could be on any port, so
fbcc39ad 177 # assume frontend is on the default port
178 $port = $c->request->secure ? 443 : 80;
64d1c3cd 179 if ( $ENV{HTTP_X_FORWARDED_PORT} ) {
180 $port = $ENV{HTTP_X_FORWARDED_PORT};
181 }
316bf0f0 182 }
183
8bf285ed 184 my $path_info = $ENV{PATH_INFO};
17affec1 185 if ($c->config->{use_request_uri_for_path}) {
f238bbd9 186 # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded,
187 # and cannot contain path-segment parameters." This means PATH_INFO
188 # is always decoded, and the script can't distinguish / vs %2F.
189 # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256
190 # Here we try to resurrect the original encoded URI from REQUEST_URI.
081ea922 191 if (my $req_uri = $ENV{REQUEST_URI}) {
f238bbd9 192 if (defined $script_name) {
193 $req_uri =~ s/^\Q$script_name\E//;
081ea922 194 }
f238bbd9 195 $req_uri =~ s/\?.*$//;
196 $path_info = $req_uri if $req_uri;
081ea922 197 }
198 }
199
8d3c800b 200 # set the request URI
8bf285ed 201 my $path = $base_path . ( $path_info || '' );
fbcc39ad 202 $path =~ s{^/+}{};
b0ad47c1 203
933ba403 204 # Using URI directly is way too slow, so we construct the URLs manually
205 my $uri_class = "URI::$scheme";
b0ad47c1 206
de19de2e 207 # HTTP_HOST will include the port even if it's 80/443
208 $host =~ s/:(?:80|443)$//;
b0ad47c1 209
de19de2e 210 if ( $port !~ /^(?:80|443)$/ && $host !~ /:/ ) {
933ba403 211 $host .= ":$port";
212 }
b0ad47c1 213
933ba403 214 # Escape the path
215 $path =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
216 $path =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE
b0ad47c1 217
933ba403 218 my $query = $ENV{QUERY_STRING} ? '?' . $ENV{QUERY_STRING} : '';
219 my $uri = $scheme . '://' . $host . '/' . $path . $query;
220
ca78941c 221 $c->request->uri( bless(\$uri, $uri_class)->canonical );
933ba403 222
8d3c800b 223 # set the base URI
224 # base must end in a slash
225 $base_path .= '/' unless $base_path =~ m{/$};
b0ad47c1 226
8d3c800b 227 my $base_uri = $scheme . '://' . $host . $base_path;
228
67936fd7 229 $c->request->base( bless \$base_uri, $uri_class );
e7c0c583 230}
fc7ec1d9 231
b5ecfcf0 232=head2 $self->prepare_query_parameters($c)
fc7ec1d9 233
234=cut
235
4090e3bb 236around prepare_query_parameters => sub {
237 my $orig = shift;
fbcc39ad 238 my ( $self, $c ) = @_;
b5ecfcf0 239 local (*ENV) = $self->env || \%ENV;
240
f8109766 241 if ( $ENV{QUERY_STRING} ) {
4090e3bb 242 $self->$orig( $c, $ENV{QUERY_STRING} );
f8109766 243 }
4090e3bb 244};
e7c0c583 245
b5ecfcf0 246=head2 $self->prepare_request($c, (env => \%env))
84528885 247
248=cut
249
250sub prepare_request {
251 my ( $self, $c, %args ) = @_;
252
253 if ( $args{env} ) {
b5ecfcf0 254 $self->env( $args{env} );
84528885 255 }
256}
257
b5ecfcf0 258=head2 $self->prepare_write($c)
bfde09a2 259
fbcc39ad 260Enable autoflush on the output handle for CGI-based engines.
bfde09a2 261
fbcc39ad 262=cut
e7c0c583 263
4090e3bb 264around prepare_write => sub {
4f5ebacd 265 *STDOUT->autoflush(1);
4090e3bb 266 return shift->(@_);
267};
e7c0c583 268
e512dd24 269=head2 $self->write($c, $buffer)
270
271Writes the buffer to the client.
272
273=cut
274
4090e3bb 275around write => sub {
276 my $orig = shift;
e512dd24 277 my ( $self, $c, $buffer ) = @_;
278
279 # Prepend the headers if they have not yet been sent
02570318 280 if ( $self->_has_header_buf ) {
281 $buffer = $self->_clear_header_buf . $buffer;
e512dd24 282 }
7fa2c9c1 283
4090e3bb 284 return $self->$orig( $c, $buffer );
285};
e512dd24 286
b5ecfcf0 287=head2 $self->read_chunk($c, $buffer, $length)
e7c0c583 288
fbcc39ad 289=cut
e7c0c583 290
4f5ebacd 291sub read_chunk { shift; shift; *STDIN->sysread(@_); }
e7c0c583 292
b5ecfcf0 293=head2 $self->run
bfde09a2 294
fbcc39ad 295=cut
bfde09a2 296
0c913601 297sub run { shift; shift->handle_request( env => \%ENV ) }
fc7ec1d9 298
fc7ec1d9 299=head1 SEE ALSO
300
2f381252 301L<Catalyst>, L<Catalyst::Engine>
fbcc39ad 302
303=head1 AUTHORS
304
2f381252 305Catalyst Contributors, see Catalyst.pm
fc7ec1d9 306
307=head1 COPYRIGHT
308
536bee89 309This library is free software. You can redistribute it and/or modify it under
fc7ec1d9 310the same terms as Perl itself.
311
312=cut
4090e3bb 313no Moose;
fc7ec1d9 314
3151;