fixed url path segment match regex so that trailing slashes in /path/info/ + query...
[catagits/Web-Simple.git] / lib / Web / Simple / DispatchParser.pm
CommitLineData
920d6222 1package Web::Simple::DispatchParser;
2
3use strict;
4use warnings FATAL => 'all';
5
a5917caa 6sub DEBUG () { 0 }
7
8BEGIN {
9 if ($ENV{WEB_SIMPLE_DISPATCHPARSER_DEBUG}) {
10 no warnings 'redefine';
11 *DEBUG = sub () { 1 }
12 }
13}
14
15sub diag { if (DEBUG) { warn $_[0] } }
16
920d6222 17sub new { bless({}, ref($_[0])||$_[0]) }
18
19sub _blam {
20 my ($self, $error) = @_;
a4ec359d 21 my $hat = (' ' x (pos||0)).'^';
920d6222 22 die "Error parsing dispatch specification: ${error}\n
23${_}
24${hat} here\n";
25}
26
27sub parse_dispatch_specification {
28 my ($self, $spec) = @_;
c6ea9542 29 return $self->_parse_spec($spec);
30}
31
32sub _parse_spec {
b0420ad6 33 my ($self, $spec, $nested) = @_;
c6ea9542 34 for ($_[1]) {
920d6222 35 my @match;
920d6222 36 /^\G\s*/; # eat leading whitespace
37 PARSE: { do {
c6ea9542 38 push @match, $self->_parse_spec_section($_)
920d6222 39 or $self->_blam("Unable to work out what the next section is");
b0420ad6 40 if (/\G\)/gc) {
41 $self->_blam("Found closing ) with no opening (") unless $nested;
42 last PARSE;
43 }
920d6222 44 last PARSE if (pos == length);
c6ea9542 45 $match[-1] = $self->_parse_spec_combinator($_, $match[-1])
46 or $self->_blam('No valid combinator - expected + or |');
920d6222 47 } until (pos == length) }; # accept trailing whitespace
b0420ad6 48 if ($nested and pos == length) {
a4ec359d 49 pos = $nested - 1;
b0420ad6 50 $self->_blam("No closing ) found for opening (");
51 }
920d6222 52 return $match[0] if (@match == 1);
53 return sub {
54 my $env = { %{$_[0]} };
55 my $new_env;
56 my @got;
57 foreach my $match (@match) {
58 if (my @this_got = $match->($env)) {
59 my %change_env = %{shift(@this_got)};
60 @{$env}{keys %change_env} = values %change_env;
61 @{$new_env}{keys %change_env} = values %change_env;
62 push @got, @this_got;
63 } else {
64 return;
65 }
66 }
67 return ($new_env, @got);
68 };
69 }
70}
71
9b9866ae 72sub _parse_spec_combinator {
73 my ($self, $spec, $match) = @_;
74 for ($_[1]) {
75
76 /\G\+/gc and
77 return $match;
78
79 /\G\|/gc and
80 return do {
81 my @match = $match;
82 PARSE: { do {
83 push @match, $self->_parse_spec_section($_)
84 or $self->_blam("Unable to work out what the next section is");
85 last PARSE if (pos == length);
86 last PARSE unless /\G\|/gc; # give up when next thing isn't |
87 } until (pos == length) }; # accept trailing whitespace
88 return sub {
89 foreach my $try (@match) {
90 if (my @ret = $try->(@_)) {
91 return @ret;
92 }
93 }
94 return;
95 };
96 };
97 }
98 return;
99}
100
920d6222 101sub _parse_spec_section {
102 my ($self) = @_;
103 for ($_[1]) {
104
105 # GET POST PUT HEAD ...
106
107 /\G([A-Z]+)/gc and
108 return $self->_http_method_match($_, $1);
109
110 # /...
111
112 /\G(?=\/)/gc and
113 return $self->_url_path_match($_);
114
c6ea9542 115 # .* and .html
116
117 /\G\.(\*|\w+)/gc and
920d6222 118 return $self->_url_extension_match($_, $1);
b0420ad6 119
2ee4ab06 120 # (...)
b0420ad6 121
122 /\G\(/gc and
123 return $self->_parse_spec($_, pos);
2ee4ab06 124
125 # !something
126
127 /\G!/gc and
128 return do {
129 my $match = $self->_parse_spec_section($_);
130 return sub {
131 return {} unless $match->(@_);
132 return;
133 };
134 };
920d6222 135
9b9866ae 136 # ?<param spec>
137 /\G\?/gc and
138 return $self->_parse_param_handler($_, 'query');
53d47b78 139
140 # %<param spec>
141 /\G\%/gc and
142 return $self->_parse_param_handler($_, 'body');
c6ea9542 143 }
9b9866ae 144 return; # () will trigger the blam in our caller
c6ea9542 145}
146
920d6222 147sub _http_method_match {
148 my ($self, $str, $method) = @_;
920d6222 149 sub { shift->{REQUEST_METHOD} eq $method ? {} : () };
150}
151
152sub _url_path_match {
153 my ($self) = @_;
920d6222 154 for ($_[1]) {
155 my @path;
da8429c9 156 my $full_path = '$';
157 PATH: while (/\G\//gc) {
158 /\G\.\.\./gc
159 and do {
160 $full_path = '';
161 last PATH;
162 };
920d6222 163 push @path, $self->_url_path_segment_match($_)
164 or $self->_blam("Couldn't parse path match segment");
165 }
da8429c9 166 my $re = '^()'.join('/','',@path).($full_path ? '$' : '(/.*)$');
167 $re = qr/$re/;
168 if ($full_path) {
169 return sub {
170 if (my @cap = (shift->{PATH_INFO} =~ /$re/)) {
171 $cap[0] = {}; return @cap;
172 }
173 return ();
174 };
175 }
920d6222 176 return sub {
177 if (my @cap = (shift->{PATH_INFO} =~ /$re/)) {
da8429c9 178 $cap[0] = { PATH_INFO => pop(@cap) }; return @cap;
920d6222 179 }
180 return ();
181 };
182 }
183 return;
184}
185
186sub _url_path_segment_match {
187 my ($self) = @_;
188 for ($_[1]) {
189 # trailing / -> require / on end of URL
6c0f599a 190 /\G(?:(?=[+|\)])|$)/gc and
920d6222 191 return '$';
192 # word chars only -> exact path part match
193 /\G(\w+)/gc and
194 return "\Q$1";
28f3dfd5 195 # ** -> capture unlimited path parts
196 /\G\*\*/gc and
197 return '(.*?[^/])';
920d6222 198 # * -> capture path part
199 /\G\*/gc and
200 return '([^/]+)';
201 }
202 return ();
203}
204
205sub _url_extension_match {
206 my ($self, $str, $extension) = @_;
c6ea9542 207 if ($extension eq '*') {
208 sub {
209 if ((my $tmp = shift->{PATH_INFO}) =~ s/\.(\w+)$//) {
210 ({ PATH_INFO => $tmp }, $1);
211 } else {
212 ();
213 }
214 };
215 } else {
216 sub {
217 if ((my $tmp = shift->{PATH_INFO}) =~ s/\.\Q${extension}\E$//) {
218 ({ PATH_INFO => $tmp });
219 } else {
220 ();
221 }
222 };
223 }
920d6222 224}
225
9b9866ae 226sub _parse_param_handler {
227 my ($self, $spec, $type) = @_;
228
229 require Web::Simple::ParamParser;
230 my $unpacker = Web::Simple::ParamParser->can("get_unpacked_${type}_from");
231
232 for ($_[1]) {
eb9e0e25 233 my (@required, @single, %multi, $star, $multistar, %positional, $have_kw);
234 my $pos_idx = 0;
9b9866ae 235 PARAM: { do {
236
eb9e0e25 237 # ?:foo or ?@:foo
9b9866ae 238
eb9e0e25 239 my $is_kw = /\G\:/gc;
9b9866ae 240
241 # ?@foo or ?@*
242
eb9e0e25 243 my $multi = /\G\@/gc;
9b9866ae 244
245 # @* or *
246
a5917caa 247 if (/\G\*/gc) {
9b9866ae 248
eb9e0e25 249 $self->_blam("* is always named; no need to supply :") if $is_kw;
250
9b9866ae 251 $multi ? ($multistar = 1) : ($star = 1);
a5917caa 252
eb9e0e25 253 $have_kw = 1;
254
a5917caa 255 if ($star && $multistar) {
256 $self->_blam("Can't use * and \@* in the same parameter match");
257 }
9b9866ae 258 } else {
259
260 # @foo= or foo= or @foo~ or foo~
6c0f599a 261
9b9866ae 262 /\G(\w+)/gc or $self->_blam('Expected parameter name');
263
264 my $name = $1;
265
266 # check for = or ~ on the end
267
268 /\G\=/gc
269 ? push(@required, $name)
270 : (/\G\~/gc or $self->_blam('Expected = or ~ after parameter name'));
271
272 # record the key in the right category depending on the multi (@) flag
273
a5917caa 274 $multi ? ($multi{$name} = 1) : (push @single, $name);
eb9e0e25 275
276 # record positional or keyword
277
278 $is_kw ? ($have_kw = 1) : ($positional{$name} = $pos_idx++);
9b9866ae 279 }
280 } while (/\G\&/gc) }
281
282 return sub {
283 my $raw = $unpacker->($_[0]);
284 foreach my $name (@required) {
285 return unless exists $raw->{$name};
286 }
eb9e0e25 287 my (%p, %done);
288 my @p = (undef) x $pos_idx;
9b9866ae 289 foreach my $name (
290 @single,
291 ($star
292 ? (grep { !exists $multi{$_} } keys %$raw)
293 : ()
294 )
295 ) {
eb9e0e25 296 if (exists $raw->{$name}) {
297 if (exists $positional{$name}) {
298 $p[$positional{$name}] = $raw->{$name}->[-1];
299 } else {
300 $p{$name} = $raw->{$name}->[-1];
301 }
302 }
303 $done{$name} = 1;
9b9866ae 304 }
305 foreach my $name (
306 keys %multi,
307 ($multistar
eb9e0e25 308 ? (grep { !exists $done{$_} && !exists $multi{$_} } keys %$raw)
9b9866ae 309 : ()
310 )
311 ) {
eb9e0e25 312 if (exists $positional{$name}) {
313 $p[$positional{$name}] = $raw->{$name}||[];
314 } else {
315 $p{$name} = $raw->{$name}||[];
316 }
9b9866ae 317 }
eb9e0e25 318 $p[$pos_idx] = \%p if $have_kw;
319 return ({}, @p);
9b9866ae 320 };
321 }
322}
323
920d6222 3241;