From: Andy Grundman Date: Thu, 29 Mar 2007 02:58:01 +0000 (+0000) Subject: Refactored prepare_path and prepare_query_parameters to avoid the use of URI.pm and... X-Git-Tag: 5.7099_04~206 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=catagits%2FCatalyst-Runtime.git;a=commitdiff_plain;h=933ba40380c86f9642bcfbee446a04d48efe4544 Refactored prepare_path and prepare_query_parameters to avoid the use of URI.pm and improve performance. Added ->req->keywords method. --- diff --git a/Changes b/Changes index 91bcbf4..7856ab0 100644 --- a/Changes +++ b/Changes @@ -1,13 +1,18 @@ This file documents the revision history for Perl extension Catalyst. 5.7008 XXXX-XX-XX - - Sending SIGHUP to the dev server will now cause it to restart. - - Allow "0" for a path in uri_for. - - Performance improvements to uri_for by inlining encoding. + - Many performance improvements by not using URI.pm: + * $c->uri_for (approx. 8x faster) + * $c->engine->prepare_path (approx. 27x faster) + * $c->engine->prepare_query_parameters (approx. 5x faster) - Updated HTTP::Body dependency to 0.9 which fixes the following issues: * Handle when IE sometimes sends an extra CRLF after the POST body. * Empty fields in multipart/form-data POSTs are no longer ignored. * Uploaded files with the name "0" are no longer ignored. + - Added $c->request->keywords for getting the keywords (a query string with + no parameters). + - Sending SIGHUP to the dev server will now cause it to restart. + - Allow "0" for a path in uri_for. 5.7007 2007-03-13 14:18:00 - Performance and stability improvements to the built-in HTTP server. diff --git a/lib/Catalyst.pm b/lib/Catalyst.pm index ba30a67..40791cd 100644 --- a/lib/Catalyst.pm +++ b/lib/Catalyst.pm @@ -19,6 +19,8 @@ use Path::Class::Dir (); use Path::Class::File (); use Time::HiRes qw/gettimeofday tv_interval/; use URI (); +use URI::http; +use URI::https; use Scalar::Util qw/weaken blessed/; use Tree::Simple qw/use_weak_refs/; use Tree::Simple::Visitor::FindByUID; diff --git a/lib/Catalyst/Engine.pm b/lib/Catalyst/Engine.pm index 6e8c74d..564cb42 100644 --- a/lib/Catalyst/Engine.pm +++ b/lib/Catalyst/Engine.pm @@ -7,6 +7,7 @@ use Data::Dump qw/dump/; use HTML::Entities; use HTTP::Body; use HTTP::Headers; +use URI::Escape (); use URI::QueryParam; use Scalar::Util (); @@ -446,16 +447,42 @@ process the query string and extract query parameters. sub prepare_query_parameters { my ( $self, $c, $query_string ) = @_; + + # Check for keywords (no = signs) + if ( index( $query_string, '=' ) < 0 ) { + $c->request->keywords( $self->unescape_uri($query_string) ); + return; + } + + my %query; # replace semi-colons $query_string =~ s/;/&/g; + + my @params = split /&/, $query_string; - my $u = URI->new( '', 'http' ); - $u->query($query_string); - for my $key ( $u->query_param ) { - my @vals = $u->query_param($key); - $c->request->query_parameters->{$key} = @vals > 1 ? [@vals] : $vals[0]; + for my $item ( @params ) { + + my ($param, $value) + = map { $self->unescape_uri($_) } + split( /=/, $item ); + + $param = $self->unescape_uri($item) unless defined $param; + + if ( exists $query{$param} ) { + if ( ref $query{$param} ) { + push @{ $query{$param} }, $value; + } + else { + $query{$param} = [ $query{$param}, $value ]; + } + } + else { + $query{$param} = $value; + } } + + $c->request->query_parameters( \%query ); } =head2 $self->prepare_read($c) @@ -607,6 +634,21 @@ sub write { print STDOUT $buffer; } +=head2 $self->unescape_uri($uri) + +Unescapes a given URI using the most efficient method available. Engines such +as Apache may implement this using Apache's C-based modules, for example. + +=cut + +sub unescape_uri { + my $self = shift; + + my $e = URI::Escape::uri_unescape(@_); + $e =~ s/\+/ /g; + + return $e; +} =head2 $self->finalize_output diff --git a/lib/Catalyst/Engine/CGI.pm b/lib/Catalyst/Engine/CGI.pm index a86023a..2ee2e01 100644 --- a/lib/Catalyst/Engine/CGI.pm +++ b/lib/Catalyst/Engine/CGI.pm @@ -3,9 +3,7 @@ package Catalyst::Engine::CGI; use strict; use base 'Catalyst::Engine'; use NEXT; -use URI; -my $uri_proto=URI->new(); __PACKAGE__->mk_accessors('env'); =head1 NAME @@ -137,26 +135,36 @@ sub prepare_path { $port = $c->request->secure ? 443 : 80; } - # set the base URI - # base must end in a slash - $base_path .= '/' unless ( $base_path =~ /\/$/ ); - + # set the request URI my $path = $base_path . ( $ENV{PATH_INFO} || '' ); $path =~ s{^/+}{}; + + # Using URI directly is way too slow, so we construct the URLs manually + my $uri_class = "URI::$scheme"; + + # HTTP_HOST will include the port even if it's 80 + $host =~ s/:80$//; + + if ( $port != 80 && $host !~ /:/ ) { + $host .= ":$port"; + } + + # Escape the path + $path =~ s/([^$URI::uric])/$URI::Escape::escapes{$1}/go; + $path =~ s/\?/%3F/g; # STUPID STUPID SPECIAL CASE + + my $query = $ENV{QUERY_STRING} ? '?' . $ENV{QUERY_STRING} : ''; + my $uri = $scheme . '://' . $host . '/' . $path . $query; + + $c->request->uri( bless \$uri, $uri_class ); + + # set the base URI + # base must end in a slash + $base_path .= '/' unless $base_path =~ m{/$}; + + my $base_uri = $scheme . '://' . $host . $base_path; - my $uri = $uri_proto->clone; - $uri->scheme($scheme); - $uri->host($host); - $uri->port($port); - $uri->path($path); - $uri->query( $ENV{QUERY_STRING} ) if $ENV{QUERY_STRING}; - - # sanitize the URI - $uri = $uri->canonical; - $c->request->uri($uri); - my $base = $uri->clone; - $base->path_query($base_path); - $c->request->base($base); + $c->request->base( bless \$base_uri, $uri_class ); } =head2 $self->prepare_query_parameters($c) diff --git a/lib/Catalyst/Request.pm b/lib/Catalyst/Request.pm index 496682d..42b3c8a 100644 --- a/lib/Catalyst/Request.pm +++ b/lib/Catalyst/Request.pm @@ -9,7 +9,7 @@ use utf8; use URI::QueryParam; __PACKAGE__->mk_accessors( - qw/action address arguments cookies headers match method + qw/action address arguments cookies headers keywords match method protocol query_parameters secure captures uri user/ ); @@ -51,6 +51,7 @@ Catalyst::Request - provides information about the current client request $req->headers; $req->hostname; $req->input; + $req->keywords; $req->match; $req->method; $req->param; @@ -259,6 +260,15 @@ sub hostname { Alias for $req->body. +=head2 $req->keywords + +Contains the keywords portion of a query string, when no '=' signs are +present. + + http://localhost/path?some+keywords + + $c->request->keywords will contain 'some keywords' + =head2 $req->match This contains the matching part of a Regex action. Otherwise diff --git a/t/live_engine_request_parameters.t b/t/live_engine_request_parameters.t index b677dc2..40e517c 100644 --- a/t/live_engine_request_parameters.t +++ b/t/live_engine_request_parameters.t @@ -6,7 +6,7 @@ use warnings; use FindBin; use lib "$FindBin::Bin/lib"; -use Test::More tests => 29; +use Test::More tests => 30; use Catalyst::Test 'TestApp'; use Catalyst::Request; @@ -103,14 +103,15 @@ use URI; }; my $request = POST( - 'http://localhost/dump/request/a/b?query_string', + 'http://localhost/dump/request/a/b?query+string', 'Content' => $parameters, 'Content-Type' => 'application/x-www-form-urlencoded' ); ok( my $response = request($request), 'Request' ); ok( eval '$creq = ' . $response->content, 'Unserialize Catalyst::Request' ); - is( $creq->{uri}->query, 'query_string', 'Catalyst::Request POST query_string' ); + is( $creq->{uri}->query, 'query+string', 'Catalyst::Request POST query_string' ); + is( $creq->keywords, 'query string', 'Catalyst::Request keywords' ); is_deeply( $creq->{parameters}, $parameters, 'Catalyst::Request parameters' ); ok( $response = request('http://localhost/dump/request/a/b?x=1&y=1&z=1'), 'Request' );