Merge 'fix_request_uri' into 'trunk'
[catagits/Catalyst-Runtime.git] / lib / Catalyst / Engine / CGI.pm
CommitLineData
fc7ec1d9 1package Catalyst::Engine::CGI;
2
7fa2c9c1 3use Moose;
4extends 'Catalyst::Engine';
e2fd5b5f 5
02570318 6has _header_buf => (is => 'rw', clearer => '_clear_header_buf', predicate => '_has_header_buf');
84528885 7
fc7ec1d9 8=head1 NAME
9
10Catalyst::Engine::CGI - The CGI Engine
11
12=head1 SYNOPSIS
13
23f9d934 14A script using the Catalyst::Engine::CGI module might look like:
15
9a33da6a 16 #!/usr/bin/perl -w
17
18 use strict;
19 use lib '/path/to/MyApp/lib';
20 use MyApp;
21
22 MyApp->run;
23
23f9d934 24The application module (C<MyApp>) would use C<Catalyst>, which loads the
25appropriate engine module.
fc7ec1d9 26
27=head1 DESCRIPTION
28
fbcc39ad 29This is the Catalyst engine specialized for the CGI environment.
e2fd5b5f 30
a48f9753 31=head1 PATH DECODING
32
33Most web server environments pass the requested path to the application using environment variables,
34from which Catalyst has to reconstruct the request base (i.e. the top level path to / in the application,
35exposed as C<< $c->request->base >>) and the request path below that base.
36
37There are two methods of doing this, both of which have advantages and disadvantages. Which method is used
38is determined by the C<< $c->config(use_request_uri_for_path) >> setting (which can either be true or false).
39
40=head2 use_request_uri_for_path => 0
41
42This is the default (and the) traditional method that Catalyst has used for determining the path information.
43The path is synthesised from a combination of the C<PATH_INFO> and C<SCRIPT_NAME> environment variables.
44The allows the application to behave correctly when C<mod_rewrite> is being used to redirect requests
45into the application, as these variables are adjusted by mod_rewrite to take account for the redirect.
46
47However this method has the major disadvantage that it is impossible to correctly decode some elements
48of the path, as RFC 3875 says: "C<< Unlike a URI path, the PATH_INFO is not URL-encoded, and cannot
49contain path-segment parameters. >>" This means PATH_INFO is B<always> decoded, and therefore Catalyst
ff341e2c 50can't distinguish / vs %2F in paths (in addition to other encoded values).
a48f9753 51
52=head2 use_request_uri_for_path => 1
53
54This method uses the C<REQUEST_URI> and C<SCRIPT_NAME> environment variables. As C<REQUEST_URI> is never
55decoded, this means that applications using this mode can correctly handle URIs including the %2F character
56(i.e. with C<AllowEncodedSlashes> set to C<On> in Apache).
57
ff341e2c 58Given that this method of path resolution is provably more correct, it is recommended that you use
59this unless you have a specific need to deploy your application in a non-standard environment, and you are
60aware of the implications of not being able to handle encoded URI paths correctly.
61
a48f9753 62However it also means that in a number of cases when the app isn't installed directly at a path, but instead
63is having paths rewritten into it (e.g. as a .cgi/fcgi in a public_html directory, with mod_rewrite in a
64.htaccess file, or when SSI is used to rewrite pages into the app, or when sub-paths of the app are exposed
65at other URIs than that which the app is 'normally' based at with C<mod_rewrite>), the resolution of
66C<< $c->request->base >> will be incorrect.
67
23f9d934 68=head1 OVERLOADED METHODS
fc7ec1d9 69
fbcc39ad 70This class overloads some methods from C<Catalyst::Engine>.
fc7ec1d9 71
b5ecfcf0 72=head2 $self->finalize_headers($c)
fc7ec1d9 73
74=cut
75
fbcc39ad 76sub finalize_headers {
77 my ( $self, $c ) = @_;
06e1b616 78
fbcc39ad 79 $c->response->header( Status => $c->response->status );
06e1b616 80
02570318 81 $self->_header_buf($c->response->headers->as_string("\015\012") . "\015\012");
fc7ec1d9 82}
83
b5ecfcf0 84=head2 $self->prepare_connection($c)
fc7ec1d9 85
86=cut
87
fbcc39ad 88sub prepare_connection {
89 my ( $self, $c ) = @_;
b5ecfcf0 90 local (*ENV) = $self->env || \%ENV;
4f5ebacd 91
7fa2c9c1 92 my $request = $c->request;
93 $request->address( $ENV{REMOTE_ADDR} );
4f5ebacd 94
95 PROXY_CHECK:
fbcc39ad 96 {
df960201 97 unless ( ref($c)->config->{using_frontend_proxy} ) {
fbcc39ad 98 last PROXY_CHECK if $ENV{REMOTE_ADDR} ne '127.0.0.1';
df960201 99 last PROXY_CHECK if ref($c)->config->{ignore_frontend_proxy};
5b387dfc 100 }
fbcc39ad 101 last PROXY_CHECK unless $ENV{HTTP_X_FORWARDED_FOR};
4f5ebacd 102
fbcc39ad 103 # If we are running as a backend server, the user will always appear
104 # as 127.0.0.1. Select the most recent upstream IP (last in the list)
105 my ($ip) = $ENV{HTTP_X_FORWARDED_FOR} =~ /([^,\s]+)$/;
7fa2c9c1 106 $request->address($ip);
64d1c3cd 107 if ( defined $ENV{HTTP_X_FORWARDED_PORT} ) {
108 $ENV{SERVER_PORT} = $ENV{HTTP_X_FORWARDED_PORT};
109 }
fc7ec1d9 110 }
08cf3dd6 111
8fc0d39e 112 $request->hostname( $ENV{REMOTE_HOST} ) if exists $ENV{REMOTE_HOST};
7fa2c9c1 113 $request->protocol( $ENV{SERVER_PROTOCOL} );
8026359e 114 $request->user( $ENV{REMOTE_USER} ); # XXX: Deprecated. See Catalyst::Request for removal information
115 $request->remote_user( $ENV{REMOTE_USER} );
7fa2c9c1 116 $request->method( $ENV{REQUEST_METHOD} );
fbcc39ad 117
118 if ( $ENV{HTTPS} && uc( $ENV{HTTPS} ) eq 'ON' ) {
7fa2c9c1 119 $request->secure(1);
5b387dfc 120 }
bfde09a2 121
fbcc39ad 122 if ( $ENV{SERVER_PORT} == 443 ) {
7fa2c9c1 123 $request->secure(1);
fbcc39ad 124 }
afdffc63 125 binmode(STDOUT); # Ensure we are sending bytes.
fc7ec1d9 126}
127
b5ecfcf0 128=head2 $self->prepare_headers($c)
fc7ec1d9 129
130=cut
131
fbcc39ad 132sub prepare_headers {
133 my ( $self, $c ) = @_;
b5ecfcf0 134 local (*ENV) = $self->env || \%ENV;
7fa2c9c1 135 my $headers = $c->request->headers;
fbcc39ad 136 # Read headers from %ENV
c82ed742 137 foreach my $header ( keys %ENV ) {
fbcc39ad 138 next unless $header =~ /^(?:HTTP|CONTENT|COOKIE)/i;
139 ( my $field = $header ) =~ s/^HTTPS?_//;
7fa2c9c1 140 $headers->header( $field => $ENV{$header} );
fbcc39ad 141 }
142}
316bf0f0 143
b5ecfcf0 144=head2 $self->prepare_path($c)
316bf0f0 145
fbcc39ad 146=cut
316bf0f0 147
eb3abf96 148# Please don't touch this method without adding tests in
149# t/aggregate/unit_core_engine_cgi-prepare_path.t
fbcc39ad 150sub prepare_path {
151 my ( $self, $c ) = @_;
b5ecfcf0 152 local (*ENV) = $self->env || \%ENV;
fbcc39ad 153
4f5ebacd 154 my $scheme = $c->request->secure ? 'https' : 'http';
294f78ca 155 my $host = $ENV{HTTP_HOST} || $ENV{SERVER_NAME};
156 my $port = $ENV{SERVER_PORT} || 80;
8bf285ed 157 my $script_name = $ENV{SCRIPT_NAME};
158 $script_name =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go if $script_name;
159
0bcb98c7 160 my $base_path;
161 if ( exists $ENV{REDIRECT_URL} ) {
162 $base_path = $ENV{REDIRECT_URL};
4dfe7bde 163 $base_path =~ s/\Q$ENV{PATH_INFO}\E$//;
0bcb98c7 164 }
165 else {
8bf285ed 166 $base_path = $script_name || '/';
0bcb98c7 167 }
4f5ebacd 168
fbcc39ad 169 # If we are running as a backend proxy, get the true hostname
4f5ebacd 170 PROXY_CHECK:
fbcc39ad 171 {
df960201 172 unless ( ref($c)->config->{using_frontend_proxy} ) {
fbcc39ad 173 last PROXY_CHECK if $host !~ /localhost|127.0.0.1/;
df960201 174 last PROXY_CHECK if ref($c)->config->{ignore_frontend_proxy};
316bf0f0 175 }
fbcc39ad 176 last PROXY_CHECK unless $ENV{HTTP_X_FORWARDED_HOST};
316bf0f0 177
fbcc39ad 178 $host = $ENV{HTTP_X_FORWARDED_HOST};
4f5ebacd 179
180 # backend could be on any port, so
fbcc39ad 181 # assume frontend is on the default port
182 $port = $c->request->secure ? 443 : 80;
64d1c3cd 183 if ( $ENV{HTTP_X_FORWARDED_PORT} ) {
184 $port = $ENV{HTTP_X_FORWARDED_PORT};
185 }
316bf0f0 186 }
187
8bf285ed 188 my $path_info = $ENV{PATH_INFO};
17affec1 189 if ($c->config->{use_request_uri_for_path}) {
f238bbd9 190 # RFC 3875: "Unlike a URI path, the PATH_INFO is not URL-encoded,
191 # and cannot contain path-segment parameters." This means PATH_INFO
192 # is always decoded, and the script can't distinguish / vs %2F.
193 # See https://issues.apache.org/bugzilla/show_bug.cgi?id=35256
194 # Here we try to resurrect the original encoded URI from REQUEST_URI.
081ea922 195 if (my $req_uri = $ENV{REQUEST_URI}) {
f238bbd9 196 if (defined $script_name) {
197 $req_uri =~ s/^\Q$script_name\E//;
081ea922 198 }
f238bbd9 199 $req_uri =~ s/\?.*$//;
200 $path_info = $req_uri if $req_uri;
081ea922 201 }
202 }
203
8d3c800b 204 # set the request URI
8bf285ed 205 my $path = $base_path . ( $path_info || '' );
fbcc39ad 206 $path =~ s{^/+}{};
b0ad47c1 207
933ba403 208 # Using URI directly is way too slow, so we construct the URLs manually
209 my $uri_class = "URI::$scheme";
b0ad47c1 210
de19de2e 211 # HTTP_HOST will include the port even if it's 80/443
212 $host =~ s/:(?:80|443)$//;
b0ad47c1 213
de19de2e 214 if ( $port !~ /^(?:80|443)$/ && $host !~ /:/ ) {
933ba403 215 $host .= ":$port";
216 }
b0ad47c1 217
933ba403 218 # Escape the path
219 $path =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go;
220 $path =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE
b0ad47c1 221
933ba403 222 my $query = $ENV{QUERY_STRING} ? '?' . $ENV{QUERY_STRING} : '';
223 my $uri = $scheme . '://' . $host . '/' . $path . $query;
224
ca78941c 225 $c->request->uri( bless(\$uri, $uri_class)->canonical );
933ba403 226
8d3c800b 227 # set the base URI
228 # base must end in a slash
229 $base_path .= '/' unless $base_path =~ m{/$};
b0ad47c1 230
8d3c800b 231 my $base_uri = $scheme . '://' . $host . $base_path;
232
67936fd7 233 $c->request->base( bless \$base_uri, $uri_class );
e7c0c583 234}
fc7ec1d9 235
b5ecfcf0 236=head2 $self->prepare_query_parameters($c)
fc7ec1d9 237
238=cut
239
4090e3bb 240around prepare_query_parameters => sub {
241 my $orig = shift;
fbcc39ad 242 my ( $self, $c ) = @_;
b5ecfcf0 243 local (*ENV) = $self->env || \%ENV;
244
f8109766 245 if ( $ENV{QUERY_STRING} ) {
4090e3bb 246 $self->$orig( $c, $ENV{QUERY_STRING} );
f8109766 247 }
4090e3bb 248};
e7c0c583 249
b5ecfcf0 250=head2 $self->prepare_request($c, (env => \%env))
84528885 251
252=cut
253
254sub prepare_request {
255 my ( $self, $c, %args ) = @_;
256
257 if ( $args{env} ) {
b5ecfcf0 258 $self->env( $args{env} );
84528885 259 }
260}
261
b5ecfcf0 262=head2 $self->prepare_write($c)
bfde09a2 263
fbcc39ad 264Enable autoflush on the output handle for CGI-based engines.
bfde09a2 265
fbcc39ad 266=cut
e7c0c583 267
4090e3bb 268around prepare_write => sub {
4f5ebacd 269 *STDOUT->autoflush(1);
4090e3bb 270 return shift->(@_);
271};
e7c0c583 272
e512dd24 273=head2 $self->write($c, $buffer)
274
275Writes the buffer to the client.
276
277=cut
278
4090e3bb 279around write => sub {
280 my $orig = shift;
e512dd24 281 my ( $self, $c, $buffer ) = @_;
282
283 # Prepend the headers if they have not yet been sent
02570318 284 if ( $self->_has_header_buf ) {
285 $buffer = $self->_clear_header_buf . $buffer;
e512dd24 286 }
7fa2c9c1 287
4090e3bb 288 return $self->$orig( $c, $buffer );
289};
e512dd24 290
b5ecfcf0 291=head2 $self->read_chunk($c, $buffer, $length)
e7c0c583 292
fbcc39ad 293=cut
e7c0c583 294
4f5ebacd 295sub read_chunk { shift; shift; *STDIN->sysread(@_); }
e7c0c583 296
b5ecfcf0 297=head2 $self->run
bfde09a2 298
fbcc39ad 299=cut
bfde09a2 300
0c913601 301sub run { shift; shift->handle_request( env => \%ENV ) }
fc7ec1d9 302
fc7ec1d9 303=head1 SEE ALSO
304
2f381252 305L<Catalyst>, L<Catalyst::Engine>
fbcc39ad 306
307=head1 AUTHORS
308
2f381252 309Catalyst Contributors, see Catalyst.pm
fc7ec1d9 310
311=head1 COPYRIGHT
312
536bee89 313This library is free software. You can redistribute it and/or modify it under
fc7ec1d9 314the same terms as Perl itself.
315
316=cut
4090e3bb 317no Moose;
fc7ec1d9 318
3191;