Commit | Line | Data |
01dd4e4f |
1 | package SQL::Abstract::Tree; |
2 | |
c54740ba |
3 | # DO NOT edit away without talking to riba first, he will just put it back |
4 | # BEGIN pre-Moo2 import block |
5 | BEGIN { |
6 | require warnings; |
7 | my $initial_fatal_bits = (${^WARNING_BITS}||'') & $warnings::DeadBits{all}; |
05593cfc |
8 | |
c54740ba |
9 | local $ENV{PERL_STRICTURES_EXTRA} = 0; |
05593cfc |
10 | # load all of these now, so that lazy-loading does not escape |
11 | # the current PERL_STRICTURES_EXTRA setting |
12 | require Sub::Quote; |
13 | require Sub::Defer; |
14 | require Moo; |
15 | require Moo::Object; |
16 | require Method::Generate::Accessor; |
17 | require Method::Generate::Constructor; |
18 | |
19 | Moo->import; |
20 | Sub::Quote->import('quote_sub'); |
c54740ba |
21 | ${^WARNING_BITS} &= ( $initial_fatal_bits | ~ $warnings::DeadBits{all} ); |
22 | } |
23 | # END pre-Moo2 import block |
24 | |
01dd4e4f |
25 | use strict; |
26 | use warnings; |
b3b79607 |
27 | no warnings 'qw'; |
01dd4e4f |
28 | |
c54740ba |
29 | use Carp; |
1536de15 |
30 | |
0769ac0e |
31 | my $op_look_ahead = '(?: (?= [\s\)\(\;] ) | \z)'; |
b3b79607 |
32 | my $op_look_behind = '(?: (?<= [\,\s\)\(] ) | \A )'; |
33 | |
0769ac0e |
34 | my $quote_left = qr/[\`\'\"\[]/; |
35 | my $quote_right = qr/[\`\'\"\]]/; |
01dd4e4f |
36 | |
4e914a7c |
37 | my $placeholder_re = qr/(?: \? | \$\d+ )/x; |
38 | |
01dd4e4f |
39 | # These SQL keywords always signal end of the current expression (except inside |
40 | # of a parenthesized subexpression). |
0769ac0e |
41 | # Format: A list of strings that will be compiled to extended syntax ie. |
01dd4e4f |
42 | # /.../x) regexes, without capturing parentheses. They will be automatically |
0769ac0e |
43 | # anchored to op boundaries (excluding quotes) to match the whole token. |
44 | my @expression_start_keywords = ( |
01dd4e4f |
45 | 'SELECT', |
7853a177 |
46 | 'UPDATE', |
6c4d8eb8 |
47 | 'SET', |
7853a177 |
48 | 'INSERT \s+ INTO', |
49 | 'DELETE \s+ FROM', |
3d910890 |
50 | 'FROM', |
01dd4e4f |
51 | '(?: |
52 | (?: |
0769ac0e |
53 | (?: (?: LEFT | RIGHT | FULL ) \s+ )? |
54 | (?: (?: CROSS | INNER | OUTER ) \s+ )? |
01dd4e4f |
55 | )? |
56 | JOIN |
57 | )', |
58 | 'ON', |
59 | 'WHERE', |
efc991a0 |
60 | '(?: DEFAULT \s+ )? VALUES', |
01dd4e4f |
61 | 'GROUP \s+ BY', |
62 | 'HAVING', |
63 | 'ORDER \s+ BY', |
c0eaa9fd |
64 | 'SKIP', |
81b3e585 |
65 | 'FETCH', |
c0eaa9fd |
66 | 'FIRST', |
01dd4e4f |
67 | 'LIMIT', |
68 | 'OFFSET', |
69 | 'FOR', |
70 | 'UNION', |
71 | 'INTERSECT', |
72 | 'EXCEPT', |
820bb1f5 |
73 | 'BEGIN \s+ WORK', |
74 | 'COMMIT', |
75 | 'ROLLBACK \s+ TO \s+ SAVEPOINT', |
76 | 'ROLLBACK', |
77 | 'SAVEPOINT', |
78 | 'RELEASE \s+ SAVEPOINT', |
01dd4e4f |
79 | 'RETURNING', |
80 | ); |
81 | |
b3b79607 |
82 | my $expr_start_re = join ("\n\t|\n", @expression_start_keywords ); |
83 | $expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; |
0769ac0e |
84 | |
01dd4e4f |
85 | # These are binary operator keywords always a single LHS and RHS |
86 | # * AND/OR are handled separately as they are N-ary |
87 | # * so is NOT as being unary |
3af02ccb |
88 | # * BETWEEN without parentheses around the ANDed arguments (which |
89 | # makes it a non-binary op) is detected and accommodated in |
01dd4e4f |
90 | # _recurse_parse() |
6c4d8eb8 |
91 | # * AS is not really an operator but is handled here as it's also LHS/RHS |
01dd4e4f |
92 | |
0769ac0e |
93 | # this will be included in the $binary_op_re, the distinction is interesting during |
1ec9b9e3 |
94 | # testing as one is tighter than the other, plus alphanum cmp ops have different |
95 | # look ahead/behind (e.g. "x"="y" ) |
96 | my @alphanum_cmp_op_keywords = (qw/< > != <> = <= >= /); |
97 | my $alphanum_cmp_op_re = join ("\n\t|\n", map |
0769ac0e |
98 | { "(?: (?<= [\\w\\s] | $quote_right ) | \\A )" . quotemeta ($_) . "(?: (?= [\\w\\s] | $quote_left ) | \\z )" } |
1ec9b9e3 |
99 | @alphanum_cmp_op_keywords |
01dd4e4f |
100 | ); |
1ec9b9e3 |
101 | $alphanum_cmp_op_re = qr/$alphanum_cmp_op_re/x; |
0769ac0e |
102 | |
103 | my $binary_op_re = '(?: NOT \s+)? (?:' . join ('|', qw/IN BETWEEN R?LIKE/) . ')'; |
b3b79607 |
104 | $binary_op_re = join "\n\t|\n", |
6c4d8eb8 |
105 | "$op_look_behind (?i: $binary_op_re | AS ) $op_look_ahead", |
1ec9b9e3 |
106 | $alphanum_cmp_op_re, |
b3b79607 |
107 | $op_look_behind . 'IS (?:\s+ NOT)?' . "(?= \\s+ NULL \\b | $op_look_ahead )", |
108 | ; |
b7b0f832 |
109 | $binary_op_re = qr/$binary_op_re/x; |
0769ac0e |
110 | |
4247c384 |
111 | my $rno_re = qr/ROW_NUMBER \s* \( \s* \) \s+ OVER/ix; |
112 | |
113 | my $unary_op_re = 'NOT \s+ EXISTS | NOT | ' . $rno_re; |
6f2a5b66 |
114 | $unary_op_re = join "\n\t|\n", |
115 | "$op_look_behind (?i: $unary_op_re ) $op_look_ahead", |
116 | ; |
117 | $unary_op_re = qr/$unary_op_re/x; |
118 | |
ad591616 |
119 | my $asc_desc_re = qr/$op_look_behind (?i: ASC | DESC ) $op_look_ahead /x; |
120 | my $and_or_re = qr/$op_look_behind (?i: AND | OR ) $op_look_ahead /x; |
6f2a5b66 |
121 | |
ad591616 |
122 | my $tokenizer_re = join("\n\t|\n", |
b3b79607 |
123 | $expr_start_re, |
0769ac0e |
124 | $binary_op_re, |
6f2a5b66 |
125 | $unary_op_re, |
ad591616 |
126 | $asc_desc_re, |
127 | $and_or_re, |
1ec9b9e3 |
128 | $op_look_behind . ' \* ' . $op_look_ahead, |
257ecc8a |
129 | (map { quotemeta $_ } qw/, ( )/), |
4e914a7c |
130 | $placeholder_re, |
0769ac0e |
131 | ); |
01dd4e4f |
132 | |
ad591616 |
133 | # this one *is* capturing for the split below |
b3b79607 |
134 | # splits on whitespace if all else fails |
3af02ccb |
135 | # has to happen before the composing qr's are anchored (below) |
ad591616 |
136 | $tokenizer_re = qr/ \s* ( $tokenizer_re ) \s* | \s+ /x; |
b3b79607 |
137 | |
138 | # Parser states for _recurse_parse() |
139 | use constant PARSE_TOP_LEVEL => 0; |
140 | use constant PARSE_IN_EXPR => 1; |
141 | use constant PARSE_IN_PARENS => 2; |
142 | use constant PARSE_IN_FUNC => 3; |
143 | use constant PARSE_RHS => 4; |
6f2a5b66 |
144 | use constant PARSE_LIST_ELT => 5; |
b3b79607 |
145 | |
ad591616 |
146 | my $expr_term_re = qr/$expr_start_re | \)/x; |
147 | my $rhs_term_re = qr/ $expr_term_re | $binary_op_re | $unary_op_re | $asc_desc_re | $and_or_re | \, /x; |
1ec9b9e3 |
148 | my $all_std_keywords_re = qr/ $rhs_term_re | \( | $placeholder_re /x; |
ad591616 |
149 | |
150 | # anchor everything - even though keywords are separated by the tokenizer, leakage may occur |
151 | for ( |
152 | $quote_left, |
153 | $quote_right, |
154 | $placeholder_re, |
155 | $expr_start_re, |
1ec9b9e3 |
156 | $alphanum_cmp_op_re, |
ad591616 |
157 | $binary_op_re, |
158 | $unary_op_re, |
159 | $asc_desc_re, |
160 | $and_or_re, |
161 | $expr_term_re, |
162 | $rhs_term_re, |
ad591616 |
163 | $all_std_keywords_re, |
164 | ) { |
165 | $_ = qr/ \A $_ \z /x; |
166 | } |
167 | |
b4085a1a |
168 | # what can be bunched together under one MISC in an AST |
169 | my $compressable_node_re = qr/^ \- (?: MISC | LITERAL | PLACEHOLDER ) $/x; |
01dd4e4f |
170 | |
7e5600e9 |
171 | my %indents = ( |
7853a177 |
172 | select => 0, |
173 | update => 0, |
174 | 'insert into' => 0, |
175 | 'delete from' => 0, |
3d910890 |
176 | from => 1, |
91916220 |
177 | where => 0, |
7853a177 |
178 | join => 1, |
179 | 'left join' => 1, |
180 | on => 2, |
2867f4f5 |
181 | having => 0, |
91916220 |
182 | 'group by' => 0, |
183 | 'order by' => 0, |
7853a177 |
184 | set => 1, |
185 | into => 1, |
91916220 |
186 | values => 1, |
c0eaa9fd |
187 | limit => 1, |
188 | offset => 1, |
189 | skip => 1, |
190 | first => 1, |
7e5600e9 |
191 | ); |
192 | |
c54740ba |
193 | |
194 | has [qw( |
195 | newline indent_string indent_amount fill_in_placeholders placeholder_surround |
196 | )] => (is => 'ro'); |
197 | |
198 | has [qw( indentmap colormap )] => ( is => 'ro', default => quote_sub('{}') ); |
199 | |
200 | # class global is in fact desired |
201 | my $merger; |
202 | |
203 | sub BUILDARGS { |
204 | my $class = shift; |
205 | my $args = ref $_[0] eq 'HASH' ? $_[0] : {@_}; |
206 | |
207 | if (my $p = delete $args->{profile}) { |
208 | my %extra_args; |
209 | if ($p eq 'console') { |
210 | %extra_args = ( |
211 | fill_in_placeholders => 1, |
212 | placeholder_surround => ['?/', ''], |
213 | indent_string => ' ', |
214 | indent_amount => 2, |
215 | newline => "\n", |
216 | colormap => {}, |
217 | indentmap => \%indents, |
218 | |
219 | ! ( eval { require Term::ANSIColor } ) ? () : do { |
aafbf833 |
220 | my $c = \&Term::ANSIColor::color; |
6d388c84 |
221 | |
222 | my $red = [$c->('red') , $c->('reset')]; |
223 | my $cyan = [$c->('cyan') , $c->('reset')]; |
224 | my $green = [$c->('green') , $c->('reset')]; |
225 | my $yellow = [$c->('yellow') , $c->('reset')]; |
226 | my $blue = [$c->('blue') , $c->('reset')]; |
227 | my $magenta = [$c->('magenta'), $c->('reset')]; |
228 | my $b_o_w = [$c->('black on_white'), $c->('reset')]; |
aafbf833 |
229 | ( |
fb98df48 |
230 | placeholder_surround => [$c->('black on_magenta'), $c->('reset')], |
aafbf833 |
231 | colormap => { |
6d388c84 |
232 | 'begin work' => $b_o_w, |
233 | commit => $b_o_w, |
234 | rollback => $b_o_w, |
235 | savepoint => $b_o_w, |
236 | 'rollback to savepoint' => $b_o_w, |
237 | 'release savepoint' => $b_o_w, |
238 | |
239 | select => $red, |
240 | 'insert into' => $red, |
241 | update => $red, |
242 | 'delete from' => $red, |
243 | |
244 | set => $cyan, |
245 | from => $cyan, |
246 | |
247 | where => $green, |
248 | values => $yellow, |
249 | |
250 | join => $magenta, |
251 | 'left join' => $magenta, |
252 | on => $blue, |
253 | |
254 | 'group by' => $yellow, |
2867f4f5 |
255 | having => $yellow, |
6d388c84 |
256 | 'order by' => $yellow, |
257 | |
258 | skip => $green, |
259 | first => $green, |
260 | limit => $green, |
261 | offset => $green, |
aafbf833 |
262 | } |
263 | ); |
c54740ba |
264 | }, |
265 | ); |
266 | } |
267 | elsif ($p eq 'console_monochrome') { |
268 | %extra_args = ( |
269 | fill_in_placeholders => 1, |
270 | placeholder_surround => ['?/', ''], |
271 | indent_string => ' ', |
272 | indent_amount => 2, |
273 | newline => "\n", |
274 | indentmap => \%indents, |
275 | ); |
276 | } |
277 | elsif ($p eq 'html') { |
278 | %extra_args = ( |
279 | fill_in_placeholders => 1, |
280 | placeholder_surround => ['<span class="placeholder">', '</span>'], |
281 | indent_string => ' ', |
282 | indent_amount => 2, |
283 | newline => "<br />\n", |
284 | colormap => { map { |
285 | (my $class = $_) =~ s/\s+/-/g; |
286 | ( $_ => [ qq|<span class="$class">|, '</span>' ] ) |
287 | } ( |
288 | keys %indents, |
289 | qw(commit rollback savepoint), |
290 | 'begin work', 'rollback to savepoint', 'release savepoint', |
291 | ) }, |
292 | indentmap => \%indents, |
293 | ); |
294 | } |
295 | elsif ($p eq 'none') { |
296 | # nada |
297 | } |
298 | else { |
299 | croak "No such profile '$p'"; |
300 | } |
301 | |
302 | # see if we got any duplicates and merge if needed |
303 | if (scalar grep { exists $args->{$_} } keys %extra_args) { |
304 | # heavy-duty merge |
305 | $args = ($merger ||= do { |
306 | require Hash::Merge; |
307 | my $m = Hash::Merge->new; |
308 | |
309 | $m->specify_behavior({ |
310 | SCALAR => { |
311 | SCALAR => sub { $_[1] }, |
312 | ARRAY => sub { [ $_[0], @{$_[1]} ] }, |
313 | HASH => sub { $_[1] }, |
314 | }, |
315 | ARRAY => { |
316 | SCALAR => sub { $_[1] }, |
317 | ARRAY => sub { $_[1] }, |
318 | HASH => sub { $_[1] }, |
319 | }, |
320 | HASH => { |
321 | SCALAR => sub { $_[1] }, |
322 | ARRAY => sub { [ values %{$_[0]}, @{$_[1]} ] }, |
323 | HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) }, |
324 | }, |
325 | }, 'SQLA::Tree Behavior' ); |
326 | |
327 | $m; |
328 | })->merge(\%extra_args, $args ); |
329 | |
330 | } |
331 | else { |
332 | $args = { %extra_args, %$args }; |
333 | } |
334 | } |
335 | |
336 | $args; |
75c3a063 |
337 | } |
d695b0ad |
338 | |
01dd4e4f |
339 | sub parse { |
d695b0ad |
340 | my ($self, $s) = @_; |
01dd4e4f |
341 | |
70c6f0e9 |
342 | return [] unless defined $s; |
343 | |
01dd4e4f |
344 | # tokenize string, and remove all optional whitespace |
345 | my $tokens = []; |
346 | foreach my $token (split $tokenizer_re, $s) { |
b3b79607 |
347 | push @$tokens, $token if ( |
348 | defined $token |
349 | and |
350 | length $token |
09931431 |
351 | and |
b3b79607 |
352 | $token =~ /\S/ |
353 | ); |
01dd4e4f |
354 | } |
6f2a5b66 |
355 | |
356 | return [ $self->_recurse_parse($tokens, PARSE_TOP_LEVEL) ]; |
01dd4e4f |
357 | } |
358 | |
359 | sub _recurse_parse { |
d695b0ad |
360 | my ($self, $tokens, $state) = @_; |
01dd4e4f |
361 | |
6f2a5b66 |
362 | my @left; |
01dd4e4f |
363 | while (1) { # left-associative parsing |
364 | |
6f2a5b66 |
365 | if ( ! @$tokens |
01dd4e4f |
366 | or |
6f2a5b66 |
367 | ($state == PARSE_IN_PARENS && $tokens->[0] eq ')') |
01dd4e4f |
368 | or |
6f2a5b66 |
369 | ($state == PARSE_IN_EXPR && $tokens->[0] =~ $expr_term_re ) |
0769ac0e |
370 | or |
6f2a5b66 |
371 | ($state == PARSE_RHS && $tokens->[0] =~ $rhs_term_re ) |
01dd4e4f |
372 | or |
ad591616 |
373 | ($state == PARSE_LIST_ELT && ( $tokens->[0] eq ',' or $tokens->[0] =~ $expr_term_re ) ) |
01dd4e4f |
374 | ) { |
6f2a5b66 |
375 | return @left; |
01dd4e4f |
376 | } |
377 | |
378 | my $token = shift @$tokens; |
379 | |
380 | # nested expression in () |
381 | if ($token eq '(' ) { |
6f2a5b66 |
382 | my @right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); |
383 | $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse(\@right); |
384 | $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse(\@right); |
385 | |
386 | push @left, [ '-PAREN' => \@right ]; |
387 | } |
388 | |
389 | # AND/OR |
ad591616 |
390 | elsif ($token =~ $and_or_re) { |
6f2a5b66 |
391 | my $op = uc $token; |
01dd4e4f |
392 | |
6f2a5b66 |
393 | my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
394 | |
395 | # Merge chunks if "logic" matches |
396 | @left = [ $op => [ @left, (@right and $op eq $right[0][0]) |
397 | ? @{ $right[0][1] } |
398 | : @right |
399 | ] ]; |
01dd4e4f |
400 | } |
b3b79607 |
401 | |
6f2a5b66 |
402 | # LIST (,) |
403 | elsif ($token eq ',') { |
404 | |
405 | my @right = $self->_recurse_parse($tokens, PARSE_LIST_ELT); |
406 | |
407 | # deal with malformed lists ( foo, bar, , baz ) |
408 | @right = [] unless @right; |
01dd4e4f |
409 | |
6f2a5b66 |
410 | @right = [ -MISC => [ @right ] ] if @right > 1; |
411 | |
412 | if (!@left) { |
413 | @left = [ -LIST => [ [], @right ] ]; |
414 | } |
415 | elsif ($left[0][0] eq '-LIST') { |
416 | push @{$left[0][1]}, (@{$right[0]} and $right[0][0] eq '-LIST') |
417 | ? @{$right[0][1]} |
418 | : @right |
419 | ; |
01dd4e4f |
420 | } |
421 | else { |
6f2a5b66 |
422 | @left = [ -LIST => [ @left, @right ] ]; |
01dd4e4f |
423 | } |
424 | } |
6f2a5b66 |
425 | |
01dd4e4f |
426 | # binary operator keywords |
6f2a5b66 |
427 | elsif ($token =~ $binary_op_re) { |
01dd4e4f |
428 | my $op = uc $token; |
6f2a5b66 |
429 | |
430 | my @right = $self->_recurse_parse($tokens, PARSE_RHS); |
01dd4e4f |
431 | |
432 | # A between with a simple LITERAL for a 1st RHS argument needs a |
433 | # rerun of the search to (hopefully) find the proper AND construct |
6f2a5b66 |
434 | if ($op eq 'BETWEEN' and $right[0] eq '-LITERAL') { |
435 | unshift @$tokens, $right[1][0]; |
436 | @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
437 | } |
438 | |
08e16360 |
439 | push @left, [$op => [ (@left ? pop @left : ''), @right ]]; |
01dd4e4f |
440 | } |
6f2a5b66 |
441 | |
442 | # unary op keywords |
443 | elsif ( $token =~ $unary_op_re ) { |
01dd4e4f |
444 | my $op = uc $token; |
4247c384 |
445 | |
446 | # normalize RNO explicitly |
447 | $op = 'ROW_NUMBER() OVER' if $op =~ /^$rno_re$/; |
448 | |
6f2a5b66 |
449 | my @right = $self->_recurse_parse ($tokens, PARSE_RHS); |
450 | |
451 | push @left, [ $op => \@right ]; |
01dd4e4f |
452 | } |
6f2a5b66 |
453 | |
454 | # expression terminator keywords |
455 | elsif ( $token =~ $expr_start_re ) { |
01dd4e4f |
456 | my $op = uc $token; |
6f2a5b66 |
457 | my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
458 | |
6f2a5b66 |
459 | push @left, [ $op => \@right ]; |
01dd4e4f |
460 | } |
6f2a5b66 |
461 | |
462 | # a '?' |
4e914a7c |
463 | elsif ( $token =~ $placeholder_re) { |
6f2a5b66 |
464 | push @left, [ -PLACEHOLDER => [ $token ] ]; |
465 | } |
466 | |
467 | # check if the current token is an unknown op-start |
1ec9b9e3 |
468 | elsif (@$tokens and ($tokens->[0] eq '(' or $tokens->[0] =~ $placeholder_re ) ) { |
6f2a5b66 |
469 | push @left, [ $token => [ $self->_recurse_parse($tokens, PARSE_RHS) ] ]; |
4e914a7c |
470 | } |
6f2a5b66 |
471 | |
b3b79607 |
472 | # we're now in "unknown token" land - start eating tokens until |
6f30911f |
473 | # we see something familiar, OR in the case of RHS (binop) stop |
474 | # after the first token |
e2a120ce |
475 | # Also stop processing when we could end up with an unknown func |
01dd4e4f |
476 | else { |
6f2a5b66 |
477 | my @lits = [ -LITERAL => [$token] ]; |
b3b79607 |
478 | |
b4085a1a |
479 | unshift @lits, pop @left if @left == 1; |
480 | |
6f30911f |
481 | unless ( $state == PARSE_RHS ) { |
e2a120ce |
482 | while ( |
483 | @$tokens |
484 | and |
485 | $tokens->[0] !~ $all_std_keywords_re |
486 | and |
487 | ! ( @$tokens > 1 and $tokens->[1] eq '(' ) |
488 | ) { |
6f30911f |
489 | push @lits, [ -LITERAL => [ shift @$tokens ] ]; |
b4085a1a |
490 | } |
6f30911f |
491 | } |
6f2a5b66 |
492 | |
6f2a5b66 |
493 | @lits = [ -MISC => [ @lits ] ] if @lits > 1; |
494 | |
495 | push @left, @lits; |
496 | } |
b3b79607 |
497 | |
b4085a1a |
498 | # compress -LITERAL -MISC and -PLACEHOLDER pieces into a single |
499 | # -MISC container |
500 | if (@left > 1) { |
501 | my $i = 0; |
502 | while ($#left > $i) { |
503 | if ($left[$i][0] =~ $compressable_node_re and $left[$i+1][0] =~ $compressable_node_re) { |
504 | splice @left, $i, 2, [ -MISC => [ |
505 | map { $_->[0] eq '-MISC' ? @{$_->[1]} : $_ } (@left[$i, $i+1]) |
506 | ]]; |
507 | } |
508 | else { |
509 | $i++; |
510 | } |
511 | } |
512 | } |
513 | |
514 | return @left if $state == PARSE_RHS; |
6f30911f |
515 | |
b4085a1a |
516 | # deal with post-fix operators |
517 | if (@$tokens) { |
518 | # asc/desc |
6f2a5b66 |
519 | if ($tokens->[0] =~ $asc_desc_re) { |
1ec9b9e3 |
520 | @left = [ ('-' . uc (shift @$tokens)) => [ @left ] ]; |
6f2a5b66 |
521 | } |
01dd4e4f |
522 | } |
523 | } |
524 | } |
525 | |
d695b0ad |
526 | sub format_keyword { |
527 | my ($self, $keyword) = @_; |
528 | |
1536de15 |
529 | if (my $around = $self->colormap->{lc $keyword}) { |
d695b0ad |
530 | $keyword = "$around->[0]$keyword$around->[1]"; |
531 | } |
532 | |
533 | return $keyword |
534 | } |
535 | |
728f26a2 |
536 | my %starters = ( |
537 | select => 1, |
538 | update => 1, |
539 | 'insert into' => 1, |
540 | 'delete from' => 1, |
541 | ); |
542 | |
f2ab166a |
543 | sub pad_keyword { |
a24cc3a0 |
544 | my ($self, $keyword, $depth) = @_; |
e171c446 |
545 | |
546 | my $before = ''; |
1536de15 |
547 | if (defined $self->indentmap->{lc $keyword}) { |
548 | $before = $self->newline . $self->indent($depth + $self->indentmap->{lc $keyword}); |
a24cc3a0 |
549 | } |
728f26a2 |
550 | $before = '' if $depth == 0 and defined $starters{lc $keyword}; |
e4570c8e |
551 | return [$before, '']; |
a24cc3a0 |
552 | } |
553 | |
1536de15 |
554 | sub indent { ($_[0]->indent_string||'') x ( ( $_[0]->indent_amount || 0 ) * $_[1] ) } |
a24cc3a0 |
555 | |
a97eb57c |
556 | sub _is_key { |
557 | my ($self, $tree) = @_; |
0569a14f |
558 | $tree = $tree->[0] while ref $tree; |
559 | |
a97eb57c |
560 | defined $tree && defined $self->indentmap->{lc $tree}; |
0569a14f |
561 | } |
562 | |
9d11f0d4 |
563 | sub fill_in_placeholder { |
fb272e73 |
564 | my ($self, $bindargs) = @_; |
565 | |
566 | if ($self->fill_in_placeholders) { |
ad46269d |
567 | my $val = shift @{$bindargs} || ''; |
4712657d |
568 | my $quoted = $val =~ s/^(['"])(.*)\1$/$2/; |
9d11f0d4 |
569 | my ($left, $right) = @{$self->placeholder_surround}; |
fb272e73 |
570 | $val =~ s/\\/\\\\/g; |
571 | $val =~ s/'/\\'/g; |
4712657d |
572 | $val = qq('$val') if $quoted; |
573 | return qq($left$val$right) |
fb272e73 |
574 | } |
575 | return '?' |
576 | } |
577 | |
3a247d23 |
578 | # FIXME - terrible name for a user facing API |
01dd4e4f |
579 | sub unparse { |
3a247d23 |
580 | my ($self, $tree, $bindargs) = @_; |
581 | $self->_unparse($tree, [@{$bindargs||[]}], 0); |
582 | } |
a24cc3a0 |
583 | |
3a247d23 |
584 | sub _unparse { |
585 | my ($self, $tree, $bindargs, $depth) = @_; |
01dd4e4f |
586 | |
0769ac0e |
587 | if (not $tree or not @$tree) { |
01dd4e4f |
588 | return ''; |
589 | } |
a24cc3a0 |
590 | |
007f0853 |
591 | # FIXME - needs a config switch to disable |
c01ac648 |
592 | $self->_parenthesis_unroll($tree); |
007f0853 |
593 | |
6f2a5b66 |
594 | my ($op, $args) = @{$tree}[0,1]; |
0769ac0e |
595 | |
6f2a5b66 |
596 | if (! defined $op or (! ref $op and ! defined $args) ) { |
0769ac0e |
597 | require Data::Dumper; |
598 | Carp::confess( sprintf ( "Internal error - malformed branch at depth $depth:\n%s", |
599 | Data::Dumper::Dumper($tree) |
600 | ) ); |
601 | } |
a24cc3a0 |
602 | |
6f2a5b66 |
603 | if (ref $op) { |
3a247d23 |
604 | return join (' ', map $self->_unparse($_, $bindargs, $depth), @$tree); |
01dd4e4f |
605 | } |
6f2a5b66 |
606 | elsif ($op eq '-LITERAL') { # literal has different sig |
607 | return $args->[0]; |
01dd4e4f |
608 | } |
6f2a5b66 |
609 | elsif ($op eq '-PLACEHOLDER') { |
4e914a7c |
610 | return $self->fill_in_placeholder($bindargs); |
611 | } |
6f2a5b66 |
612 | elsif ($op eq '-PAREN') { |
c4d7cfcf |
613 | return sprintf ('( %s )', |
6f2a5b66 |
614 | join (' ', map { $self->_unparse($_, $bindargs, $depth + 2) } @{$args} ) |
e4570c8e |
615 | . |
6f2a5b66 |
616 | ($self->_is_key($args) |
e4570c8e |
617 | ? ( $self->newline||'' ) . $self->indent($depth + 1) |
618 | : '' |
619 | ) |
620 | ); |
01dd4e4f |
621 | } |
ad591616 |
622 | elsif ($op eq 'AND' or $op eq 'OR' or $op =~ $binary_op_re ) { |
6f2a5b66 |
623 | return join (" $op ", map $self->_unparse($_, $bindargs, $depth), @{$args}); |
624 | } |
625 | elsif ($op eq '-LIST' ) { |
626 | return join (', ', map $self->_unparse($_, $bindargs, $depth), @{$args}); |
01dd4e4f |
627 | } |
6f2a5b66 |
628 | elsif ($op eq '-MISC' ) { |
629 | return join (' ', map $self->_unparse($_, $bindargs, $depth), @{$args}); |
b3b79607 |
630 | } |
73835ff0 |
631 | elsif ($op =~ qr/^-(ASC|DESC)$/ ) { |
632 | my $dir = $1; |
633 | return join (' ', (map $self->_unparse($_, $bindargs, $depth), @{$args}), $dir); |
634 | } |
01dd4e4f |
635 | else { |
6f2a5b66 |
636 | my ($l, $r) = @{$self->pad_keyword($op, $depth)}; |
87abf9bc |
637 | |
638 | my $rhs = $self->_unparse($args, $bindargs, $depth); |
639 | |
640 | return sprintf "$l%s$r", join( |
6f2a5b66 |
641 | ( ref $args eq 'ARRAY' and @{$args} == 1 and $args->[0][0] eq '-PAREN' ) |
c4d7cfcf |
642 | ? '' # mysql-- |
643 | : ' ' |
644 | , |
87abf9bc |
645 | $self->format_keyword($op), |
646 | (length $rhs ? $rhs : () ), |
647 | ); |
01dd4e4f |
648 | } |
649 | } |
650 | |
bb54fcb4 |
651 | # All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics |
652 | my @unrollable_ops = ( |
653 | 'ON', |
654 | 'WHERE', |
655 | 'GROUP \s+ BY', |
656 | 'HAVING', |
657 | 'ORDER \s+ BY', |
6e9a377b |
658 | 'I?LIKE', |
bb54fcb4 |
659 | ); |
660 | my $unrollable_ops_re = join ' | ', @unrollable_ops; |
661 | $unrollable_ops_re = qr/$unrollable_ops_re/xi; |
662 | |
663 | sub _parenthesis_unroll { |
664 | my $self = shift; |
665 | my $ast = shift; |
666 | |
bb54fcb4 |
667 | return unless (ref $ast and ref $ast->[1]); |
668 | |
669 | my $changes; |
670 | do { |
671 | my @children; |
672 | $changes = 0; |
673 | |
674 | for my $child (@{$ast->[1]}) { |
007f0853 |
675 | |
bb54fcb4 |
676 | # the current node in this loop is *always* a PAREN |
6f2a5b66 |
677 | if (! ref $child or ! @$child or $child->[0] ne '-PAREN') { |
bb54fcb4 |
678 | push @children, $child; |
679 | next; |
680 | } |
681 | |
a4d17ff1 |
682 | my $parent_op = $ast->[0]; |
683 | |
bb54fcb4 |
684 | # unroll nested parenthesis |
a4d17ff1 |
685 | while ( $parent_op ne 'IN' and @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { |
bb54fcb4 |
686 | $child = $child->[1][0]; |
687 | $changes++; |
688 | } |
689 | |
a4d17ff1 |
690 | # set to CHILD in the case of PARENT ( CHILD ) |
691 | # but NOT in the case of PARENT( CHILD1, CHILD2 ) |
692 | my $single_child_op = (@{$child->[1]} == 1) ? $child->[1][0][0] : ''; |
693 | |
694 | my $child_op_argc = $single_child_op ? scalar @{$child->[1][0][1]} : undef; |
695 | |
696 | my $single_grandchild_op |
697 | = ( $child_op_argc||0 == 1 and ref $child->[1][0][1][0] eq 'ARRAY' ) |
698 | ? $child->[1][0][1][0][0] |
699 | : '' |
700 | ; |
701 | |
0ec2e265 |
702 | # if the parent operator explicitly allows it AND the child isn't a subselect |
703 | # nuke the parenthesis |
a4d17ff1 |
704 | if ($parent_op =~ $unrollable_ops_re and $single_child_op ne 'SELECT') { |
6f2a5b66 |
705 | push @children, @{$child->[1]}; |
706 | $changes++; |
707 | } |
708 | |
bb54fcb4 |
709 | # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list |
6f2a5b66 |
710 | elsif ( |
a4d17ff1 |
711 | $single_child_op eq $parent_op |
712 | and |
713 | ( $parent_op eq 'AND' or $parent_op eq 'OR') |
bb54fcb4 |
714 | ) { |
715 | push @children, @{$child->[1][0][1]}; |
716 | $changes++; |
717 | } |
718 | |
bb54fcb4 |
719 | # only *ONE* LITERAL or placeholder element |
6e9a377b |
720 | # as an AND/OR/NOT argument |
bb54fcb4 |
721 | elsif ( |
a4d17ff1 |
722 | ( $single_child_op eq '-LITERAL' or $single_child_op eq '-PLACEHOLDER' ) |
723 | and |
724 | ( $parent_op eq 'AND' or $parent_op eq 'OR' or $parent_op eq 'NOT' ) |
bb54fcb4 |
725 | ) { |
6f2a5b66 |
726 | push @children, @{$child->[1]}; |
bb54fcb4 |
727 | $changes++; |
728 | } |
729 | |
007f0853 |
730 | # an AND/OR expression with only one binop in the parenthesis |
731 | # with exactly two grandchildren |
bb54fcb4 |
732 | # the only time when we can *not* unroll this is when both |
733 | # the parent and the child are mathops (in which case we'll |
734 | # break precedence) or when the child is BETWEEN (special |
735 | # case) |
736 | elsif ( |
a4d17ff1 |
737 | ($parent_op eq 'AND' or $parent_op eq 'OR') |
007f0853 |
738 | and |
a4d17ff1 |
739 | $single_child_op =~ $binary_op_re |
bb54fcb4 |
740 | and |
a4d17ff1 |
741 | $single_child_op ne 'BETWEEN' |
bb54fcb4 |
742 | and |
a4d17ff1 |
743 | $child_op_argc == 2 |
bb54fcb4 |
744 | and |
745 | ! ( |
a4d17ff1 |
746 | $single_child_op =~ $alphanum_cmp_op_re |
bb54fcb4 |
747 | and |
a4d17ff1 |
748 | $parent_op =~ $alphanum_cmp_op_re |
bb54fcb4 |
749 | ) |
750 | ) { |
6f2a5b66 |
751 | push @children, @{$child->[1]}; |
bb54fcb4 |
752 | $changes++; |
753 | } |
754 | |
755 | # a function binds tighter than a mathop - see if our ancestor is a |
756 | # mathop, and our content is: |
757 | # a single non-mathop child with a single PAREN grandchild which |
758 | # would indicate mathop ( nonmathop ( ... ) ) |
759 | # or a single non-mathop with a single LITERAL ( nonmathop foo ) |
760 | # or a single non-mathop with a single PLACEHOLDER ( nonmathop ? ) |
761 | elsif ( |
a4d17ff1 |
762 | $single_child_op |
bb54fcb4 |
763 | and |
a4d17ff1 |
764 | $parent_op =~ $alphanum_cmp_op_re |
bb54fcb4 |
765 | and |
a4d17ff1 |
766 | $single_child_op !~ $alphanum_cmp_op_re |
bb54fcb4 |
767 | and |
a4d17ff1 |
768 | $child_op_argc == 1 |
bb54fcb4 |
769 | and |
770 | ( |
a4d17ff1 |
771 | $single_grandchild_op eq '-PAREN' |
bb54fcb4 |
772 | or |
a4d17ff1 |
773 | $single_grandchild_op eq '-LITERAL' |
bb54fcb4 |
774 | or |
a4d17ff1 |
775 | $single_grandchild_op eq '-PLACEHOLDER' |
bb54fcb4 |
776 | ) |
777 | ) { |
6f2a5b66 |
778 | push @children, @{$child->[1]}; |
bb54fcb4 |
779 | $changes++; |
780 | } |
781 | |
1de1d085 |
782 | # a construct of ... ( somefunc ( ... ) ) ... can safely lose the outer parens |
783 | # except for the case of ( NOT ( ... ) ) which has already been handled earlier |
0ec2e265 |
784 | # and except for the case of RNO, where the double are explicit syntax |
1de1d085 |
785 | elsif ( |
a4d17ff1 |
786 | $parent_op ne 'ROW_NUMBER() OVER' |
1de1d085 |
787 | and |
a4d17ff1 |
788 | $single_child_op |
1de1d085 |
789 | and |
a4d17ff1 |
790 | $single_child_op ne 'NOT' |
1de1d085 |
791 | and |
a4d17ff1 |
792 | $child_op_argc == 1 |
1de1d085 |
793 | and |
a4d17ff1 |
794 | $single_grandchild_op eq '-PAREN' |
1de1d085 |
795 | ) { |
796 | push @children, @{$child->[1]}; |
797 | $changes++; |
798 | } |
799 | |
bb54fcb4 |
800 | |
801 | # otherwise no more mucking for this pass |
802 | else { |
803 | push @children, $child; |
804 | } |
805 | } |
806 | |
807 | $ast->[1] = \@children; |
808 | |
809 | } while ($changes); |
bb54fcb4 |
810 | } |
811 | |
0c2de280 |
812 | sub _strip_asc_from_order_by { |
813 | my ($self, $ast) = @_; |
814 | |
815 | return $ast if ( |
816 | ref $ast ne 'ARRAY' |
817 | or |
818 | $ast->[0] ne 'ORDER BY' |
819 | ); |
820 | |
821 | |
822 | my $to_replace; |
823 | |
824 | if (@{$ast->[1]} == 1 and $ast->[1][0][0] eq '-ASC') { |
825 | $to_replace = [ $ast->[1][0] ]; |
826 | } |
827 | elsif (@{$ast->[1]} == 1 and $ast->[1][0][0] eq '-LIST') { |
828 | $to_replace = [ grep { $_->[0] eq '-ASC' } @{$ast->[1][0][1]} ]; |
829 | } |
830 | |
831 | @$_ = @{$_->[1][0]} for @$to_replace; |
832 | |
833 | $ast; |
834 | } |
835 | |
fb272e73 |
836 | sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } |
01dd4e4f |
837 | |
838 | 1; |
839 | |
3be357b0 |
840 | =pod |
841 | |
b912ee1e |
842 | =head1 NAME |
843 | |
844 | SQL::Abstract::Tree - Represent SQL as an AST |
845 | |
3be357b0 |
846 | =head1 SYNOPSIS |
847 | |
848 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
849 | |
850 | print $sqla_tree->format('SELECT * FROM foo WHERE foo.a > 2'); |
851 | |
852 | # SELECT * |
853 | # FROM foo |
854 | # WHERE foo.a > 2 |
855 | |
6b1bf9f8 |
856 | =head1 METHODS |
857 | |
858 | =head2 new |
859 | |
860 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
861 | |
c22f502d |
862 | $args = { |
863 | profile => 'console', # predefined profile to use (default: 'none') |
864 | fill_in_placeholders => 1, # true for placeholder population |
9d11f0d4 |
865 | placeholder_surround => # The strings that will be wrapped around |
866 | [GREEN, RESET], # populated placeholders if the above is set |
c22f502d |
867 | indent_string => ' ', # the string used when indenting |
868 | indent_amount => 2, # how many of above string to use for a single |
869 | # indent level |
870 | newline => "\n", # string for newline |
871 | colormap => { |
872 | select => [RED, RESET], # a pair of strings defining what to surround |
873 | # the keyword with for colorization |
874 | # ... |
875 | }, |
876 | indentmap => { |
877 | select => 0, # A zero means that the keyword will start on |
878 | # a new line |
879 | from => 1, # Any other positive integer means that after |
880 | on => 2, # said newline it will get that many indents |
881 | # ... |
882 | }, |
883 | } |
884 | |
885 | Returns a new SQL::Abstract::Tree object. All arguments are optional. |
886 | |
887 | =head3 profiles |
888 | |
889 | There are four predefined profiles, C<none>, C<console>, C<console_monochrome>, |
890 | and C<html>. Typically a user will probably just use C<console> or |
891 | C<console_monochrome>, but if something about a profile bothers you, merely |
892 | use the profile and override the parts that you don't like. |
893 | |
6b1bf9f8 |
894 | =head2 format |
895 | |
c22f502d |
896 | $sqlat->format('SELECT * FROM bar WHERE x = ?', [1]) |
897 | |
898 | Takes C<$sql> and C<\@bindargs>. |
6b1bf9f8 |
899 | |
1a3cc911 |
900 | Returns a formatting string based on the string passed in |
ee4227a7 |
901 | |
902 | =head2 parse |
903 | |
904 | $sqlat->parse('SELECT * FROM bar WHERE x = ?') |
905 | |
906 | Returns a "tree" representing passed in SQL. Please do not depend on the |
907 | structure of the returned tree. It may be stable at some point, but not yet. |
908 | |
909 | =head2 unparse |
910 | |
90d0250b |
911 | $sqlat->unparse($tree_structure, \@bindargs) |
ee4227a7 |
912 | |
913 | Transform "tree" into SQL, applying various transforms on the way. |
914 | |
915 | =head2 format_keyword |
916 | |
917 | $sqlat->format_keyword('SELECT') |
918 | |
919 | Currently this just takes a keyword and puts the C<colormap> stuff around it. |
920 | Later on it may do more and allow for coderef based transforms. |
921 | |
f2ab166a |
922 | =head2 pad_keyword |
ee4227a7 |
923 | |
f2ab166a |
924 | my ($before, $after) = @{$sqlat->pad_keyword('SELECT')}; |
ee4227a7 |
925 | |
926 | Returns whitespace to be inserted around a keyword. |
9d11f0d4 |
927 | |
928 | =head2 fill_in_placeholder |
929 | |
930 | my $value = $sqlat->fill_in_placeholder(\@bindargs) |
931 | |
932 | Removes last arg from passed arrayref and returns it, surrounded with |
933 | the values in placeholder_surround, and then surrounded with single quotes. |
f2ab166a |
934 | |
935 | =head2 indent |
936 | |
937 | Returns as many indent strings as indent amounts times the first argument. |
938 | |
939 | =head1 ACCESSORS |
940 | |
941 | =head2 colormap |
942 | |
943 | See L</new> |
944 | |
945 | =head2 fill_in_placeholders |
946 | |
947 | See L</new> |
948 | |
949 | =head2 indent_amount |
950 | |
951 | See L</new> |
952 | |
953 | =head2 indent_string |
954 | |
955 | See L</new> |
956 | |
957 | =head2 indentmap |
958 | |
959 | See L</new> |
960 | |
961 | =head2 newline |
962 | |
963 | See L</new> |
964 | |
965 | =head2 placeholder_surround |
966 | |
967 | See L</new> |
968 | |