Commit | Line | Data |
01dd4e4f |
1 | package SQL::Abstract::Tree; |
2 | |
3 | use strict; |
4 | use warnings; |
b3b79607 |
5 | no warnings 'qw'; |
01dd4e4f |
6 | use Carp; |
7 | |
0769ac0e |
8 | use Hash::Merge qw//; |
9 | |
10 | use base 'Class::Accessor::Grouped'; |
11 | |
12 | __PACKAGE__->mk_group_accessors( simple => $_ ) for qw( |
13 | newline indent_string indent_amount colormap indentmap fill_in_placeholders |
14 | placeholder_surround |
15 | ); |
2fed0b4b |
16 | |
bc482085 |
17 | my $merger = Hash::Merge->new; |
18 | |
19 | $merger->specify_behavior({ |
2fed0b4b |
20 | SCALAR => { |
21 | SCALAR => sub { $_[1] }, |
22 | ARRAY => sub { [ $_[0], @{$_[1]} ] }, |
23 | HASH => sub { $_[1] }, |
24 | }, |
25 | ARRAY => { |
26 | SCALAR => sub { $_[1] }, |
27 | ARRAY => sub { $_[1] }, |
28 | HASH => sub { $_[1] }, |
29 | }, |
30 | HASH => { |
31 | SCALAR => sub { $_[1] }, |
32 | ARRAY => sub { [ values %{$_[0]}, @{$_[1]} ] }, |
33 | HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) }, |
34 | }, |
0769ac0e |
35 | }, 'SQLA::Tree Behavior' ); |
1536de15 |
36 | |
0769ac0e |
37 | my $op_look_ahead = '(?: (?= [\s\)\(\;] ) | \z)'; |
b3b79607 |
38 | my $op_look_behind = '(?: (?<= [\,\s\)\(] ) | \A )'; |
39 | |
0769ac0e |
40 | my $quote_left = qr/[\`\'\"\[]/; |
41 | my $quote_right = qr/[\`\'\"\]]/; |
01dd4e4f |
42 | |
4e914a7c |
43 | my $placeholder_re = qr/(?: \? | \$\d+ )/x; |
44 | |
01dd4e4f |
45 | # These SQL keywords always signal end of the current expression (except inside |
46 | # of a parenthesized subexpression). |
0769ac0e |
47 | # Format: A list of strings that will be compiled to extended syntax ie. |
01dd4e4f |
48 | # /.../x) regexes, without capturing parentheses. They will be automatically |
0769ac0e |
49 | # anchored to op boundaries (excluding quotes) to match the whole token. |
50 | my @expression_start_keywords = ( |
01dd4e4f |
51 | 'SELECT', |
7853a177 |
52 | 'UPDATE', |
6c4d8eb8 |
53 | 'SET', |
7853a177 |
54 | 'INSERT \s+ INTO', |
55 | 'DELETE \s+ FROM', |
3d910890 |
56 | 'FROM', |
01dd4e4f |
57 | '(?: |
58 | (?: |
0769ac0e |
59 | (?: (?: LEFT | RIGHT | FULL ) \s+ )? |
60 | (?: (?: CROSS | INNER | OUTER ) \s+ )? |
01dd4e4f |
61 | )? |
62 | JOIN |
63 | )', |
64 | 'ON', |
65 | 'WHERE', |
efc991a0 |
66 | '(?: DEFAULT \s+ )? VALUES', |
6c4d8eb8 |
67 | '(?: NOT \s+)? EXISTS', |
01dd4e4f |
68 | 'GROUP \s+ BY', |
69 | 'HAVING', |
70 | 'ORDER \s+ BY', |
c0eaa9fd |
71 | 'SKIP', |
72 | 'FIRST', |
01dd4e4f |
73 | 'LIMIT', |
74 | 'OFFSET', |
75 | 'FOR', |
76 | 'UNION', |
77 | 'INTERSECT', |
78 | 'EXCEPT', |
820bb1f5 |
79 | 'BEGIN \s+ WORK', |
80 | 'COMMIT', |
81 | 'ROLLBACK \s+ TO \s+ SAVEPOINT', |
82 | 'ROLLBACK', |
83 | 'SAVEPOINT', |
84 | 'RELEASE \s+ SAVEPOINT', |
01dd4e4f |
85 | 'RETURNING', |
8d0dd7dc |
86 | 'ROW_NUMBER \s* \( \s* \) \s+ OVER', |
01dd4e4f |
87 | ); |
88 | |
b3b79607 |
89 | my $expr_start_re = join ("\n\t|\n", @expression_start_keywords ); |
90 | $expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; |
0769ac0e |
91 | |
01dd4e4f |
92 | # These are binary operator keywords always a single LHS and RHS |
93 | # * AND/OR are handled separately as they are N-ary |
94 | # * so is NOT as being unary |
95 | # * BETWEEN without paranthesis around the ANDed arguments (which |
96 | # makes it a non-binary op) is detected and accomodated in |
97 | # _recurse_parse() |
6c4d8eb8 |
98 | # * AS is not really an operator but is handled here as it's also LHS/RHS |
01dd4e4f |
99 | |
0769ac0e |
100 | # this will be included in the $binary_op_re, the distinction is interesting during |
101 | # testing as one is tighter than the other, plus mathops have different look |
102 | # ahead/behind (e.g. "x"="y" ) |
103 | my @math_op_keywords = (qw/ < > != <> = <= >= /); |
104 | my $math_re = join ("\n\t|\n", map |
105 | { "(?: (?<= [\\w\\s] | $quote_right ) | \\A )" . quotemeta ($_) . "(?: (?= [\\w\\s] | $quote_left ) | \\z )" } |
106 | @math_op_keywords |
01dd4e4f |
107 | ); |
b7b0f832 |
108 | $math_re = qr/$math_re/x; |
0769ac0e |
109 | |
110 | sub _math_op_re { $math_re } |
111 | |
112 | |
113 | my $binary_op_re = '(?: NOT \s+)? (?:' . join ('|', qw/IN BETWEEN R?LIKE/) . ')'; |
b3b79607 |
114 | $binary_op_re = join "\n\t|\n", |
6c4d8eb8 |
115 | "$op_look_behind (?i: $binary_op_re | AS ) $op_look_ahead", |
b3b79607 |
116 | $math_re, |
117 | $op_look_behind . 'IS (?:\s+ NOT)?' . "(?= \\s+ NULL \\b | $op_look_ahead )", |
118 | ; |
b7b0f832 |
119 | $binary_op_re = qr/$binary_op_re/x; |
0769ac0e |
120 | |
121 | sub _binary_op_re { $binary_op_re } |
122 | |
b3b79607 |
123 | my $all_known_re = join("\n\t|\n", |
124 | $expr_start_re, |
0769ac0e |
125 | $binary_op_re, |
257ecc8a |
126 | "$op_look_behind (?i: AND|OR|NOT|\\* ) $op_look_ahead", |
127 | (map { quotemeta $_ } qw/, ( )/), |
4e914a7c |
128 | $placeholder_re, |
0769ac0e |
129 | ); |
01dd4e4f |
130 | |
b3b79607 |
131 | $all_known_re = qr/$all_known_re/x; |
132 | |
133 | #this one *is* capturing for the split below |
134 | # splits on whitespace if all else fails |
135 | my $tokenizer_re = qr/ \s* ( $all_known_re ) \s* | \s+ /x; |
136 | |
137 | # Parser states for _recurse_parse() |
138 | use constant PARSE_TOP_LEVEL => 0; |
139 | use constant PARSE_IN_EXPR => 1; |
140 | use constant PARSE_IN_PARENS => 2; |
141 | use constant PARSE_IN_FUNC => 3; |
142 | use constant PARSE_RHS => 4; |
143 | |
144 | my $expr_term_re = qr/ ^ (?: $expr_start_re | \) ) $/x; |
145 | my $rhs_term_re = qr/ ^ (?: $expr_term_re | $binary_op_re | (?i: AND | OR | NOT | \, ) ) $/x; |
4e914a7c |
146 | my $func_start_re = qr/^ (?: \* | $placeholder_re | \( ) $/x; |
01dd4e4f |
147 | |
7e5600e9 |
148 | my %indents = ( |
7853a177 |
149 | select => 0, |
150 | update => 0, |
151 | 'insert into' => 0, |
152 | 'delete from' => 0, |
3d910890 |
153 | from => 1, |
91916220 |
154 | where => 0, |
7853a177 |
155 | join => 1, |
156 | 'left join' => 1, |
157 | on => 2, |
2867f4f5 |
158 | having => 0, |
91916220 |
159 | 'group by' => 0, |
160 | 'order by' => 0, |
7853a177 |
161 | set => 1, |
162 | into => 1, |
91916220 |
163 | values => 1, |
c0eaa9fd |
164 | limit => 1, |
165 | offset => 1, |
166 | skip => 1, |
167 | first => 1, |
7e5600e9 |
168 | ); |
169 | |
75c3a063 |
170 | my %profiles = ( |
171 | console => { |
84c65032 |
172 | fill_in_placeholders => 1, |
9d11f0d4 |
173 | placeholder_surround => ['?/', ''], |
1536de15 |
174 | indent_string => ' ', |
75c3a063 |
175 | indent_amount => 2, |
1536de15 |
176 | newline => "\n", |
3be357b0 |
177 | colormap => {}, |
6d388c84 |
178 | indentmap => \%indents, |
aafbf833 |
179 | |
180 | eval { require Term::ANSIColor } |
181 | ? do { |
182 | my $c = \&Term::ANSIColor::color; |
6d388c84 |
183 | |
184 | my $red = [$c->('red') , $c->('reset')]; |
185 | my $cyan = [$c->('cyan') , $c->('reset')]; |
186 | my $green = [$c->('green') , $c->('reset')]; |
187 | my $yellow = [$c->('yellow') , $c->('reset')]; |
188 | my $blue = [$c->('blue') , $c->('reset')]; |
189 | my $magenta = [$c->('magenta'), $c->('reset')]; |
190 | my $b_o_w = [$c->('black on_white'), $c->('reset')]; |
aafbf833 |
191 | ( |
fb98df48 |
192 | placeholder_surround => [$c->('black on_magenta'), $c->('reset')], |
aafbf833 |
193 | colormap => { |
6d388c84 |
194 | 'begin work' => $b_o_w, |
195 | commit => $b_o_w, |
196 | rollback => $b_o_w, |
197 | savepoint => $b_o_w, |
198 | 'rollback to savepoint' => $b_o_w, |
199 | 'release savepoint' => $b_o_w, |
200 | |
201 | select => $red, |
202 | 'insert into' => $red, |
203 | update => $red, |
204 | 'delete from' => $red, |
205 | |
206 | set => $cyan, |
207 | from => $cyan, |
208 | |
209 | where => $green, |
210 | values => $yellow, |
211 | |
212 | join => $magenta, |
213 | 'left join' => $magenta, |
214 | on => $blue, |
215 | |
216 | 'group by' => $yellow, |
2867f4f5 |
217 | having => $yellow, |
6d388c84 |
218 | 'order by' => $yellow, |
219 | |
220 | skip => $green, |
221 | first => $green, |
222 | limit => $green, |
223 | offset => $green, |
aafbf833 |
224 | } |
225 | ); |
226 | } : (), |
3be357b0 |
227 | }, |
228 | console_monochrome => { |
84c65032 |
229 | fill_in_placeholders => 1, |
9d11f0d4 |
230 | placeholder_surround => ['?/', ''], |
3be357b0 |
231 | indent_string => ' ', |
232 | indent_amount => 2, |
233 | newline => "\n", |
234 | colormap => {}, |
6d388c84 |
235 | indentmap => \%indents, |
7e5600e9 |
236 | }, |
237 | html => { |
84c65032 |
238 | fill_in_placeholders => 1, |
9d11f0d4 |
239 | placeholder_surround => ['<span class="placeholder">', '</span>'], |
7e5600e9 |
240 | indent_string => ' ', |
241 | indent_amount => 2, |
242 | newline => "<br />\n", |
243 | colormap => { |
7853a177 |
244 | select => ['<span class="select">' , '</span>'], |
245 | 'insert into' => ['<span class="insert-into">' , '</span>'], |
246 | update => ['<span class="select">' , '</span>'], |
247 | 'delete from' => ['<span class="delete-from">' , '</span>'], |
c0eaa9fd |
248 | |
249 | set => ['<span class="set">', '</span>'], |
7853a177 |
250 | from => ['<span class="from">' , '</span>'], |
c0eaa9fd |
251 | |
252 | where => ['<span class="where">' , '</span>'], |
253 | values => ['<span class="values">', '</span>'], |
254 | |
7853a177 |
255 | join => ['<span class="join">' , '</span>'], |
c0eaa9fd |
256 | 'left join' => ['<span class="left-join">','</span>'], |
7853a177 |
257 | on => ['<span class="on">' , '</span>'], |
c0eaa9fd |
258 | |
7853a177 |
259 | 'group by' => ['<span class="group-by">', '</span>'], |
2867f4f5 |
260 | having => ['<span class="having">', '</span>'], |
7853a177 |
261 | 'order by' => ['<span class="order-by">', '</span>'], |
c0eaa9fd |
262 | |
263 | skip => ['<span class="skip">', '</span>'], |
264 | first => ['<span class="first">', '</span>'], |
265 | limit => ['<span class="limit">', '</span>'], |
266 | offset => ['<span class="offset">', '</span>'], |
820bb1f5 |
267 | |
268 | 'begin work' => ['<span class="begin-work">', '</span>'], |
269 | commit => ['<span class="commit">', '</span>'], |
270 | rollback => ['<span class="rollback">', '</span>'], |
271 | savepoint => ['<span class="savepoint">', '</span>'], |
272 | 'rollback to savepoint' => ['<span class="rollback-to-savepoint">', '</span>'], |
273 | 'release savepoint' => ['<span class="release-savepoint">', '</span>'], |
1536de15 |
274 | }, |
6d388c84 |
275 | indentmap => \%indents, |
75c3a063 |
276 | }, |
277 | none => { |
1536de15 |
278 | colormap => {}, |
279 | indentmap => {}, |
75c3a063 |
280 | }, |
281 | ); |
282 | |
283 | sub new { |
2fed0b4b |
284 | my $class = shift; |
285 | my $args = shift || {}; |
75c3a063 |
286 | |
287 | my $profile = delete $args->{profile} || 'none'; |
1c33db5d |
288 | |
289 | die "No such profile '$profile'!" unless exists $profiles{$profile}; |
290 | |
bc482085 |
291 | my $data = $merger->merge( $profiles{$profile}, $args ); |
75c3a063 |
292 | |
293 | bless $data, $class |
294 | } |
d695b0ad |
295 | |
01dd4e4f |
296 | sub parse { |
d695b0ad |
297 | my ($self, $s) = @_; |
01dd4e4f |
298 | |
299 | # tokenize string, and remove all optional whitespace |
300 | my $tokens = []; |
301 | foreach my $token (split $tokenizer_re, $s) { |
b3b79607 |
302 | push @$tokens, $token if ( |
303 | defined $token |
304 | and |
305 | length $token |
09931431 |
306 | and |
b3b79607 |
307 | $token =~ /\S/ |
308 | ); |
01dd4e4f |
309 | } |
b3b79607 |
310 | $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); |
01dd4e4f |
311 | } |
312 | |
0f9a26cb |
313 | { |
314 | # this is temporary, lists can be parsed *without* recursing, but |
315 | # it requires a massive rewrite of the AST generator |
316 | no warnings qw/recursion/; |
01dd4e4f |
317 | sub _recurse_parse { |
d695b0ad |
318 | my ($self, $tokens, $state) = @_; |
01dd4e4f |
319 | |
320 | my $left; |
321 | while (1) { # left-associative parsing |
322 | |
323 | my $lookahead = $tokens->[0]; |
324 | if ( not defined($lookahead) |
325 | or |
326 | ($state == PARSE_IN_PARENS && $lookahead eq ')') |
327 | or |
b3b79607 |
328 | ($state == PARSE_IN_EXPR && $lookahead =~ $expr_term_re ) |
0769ac0e |
329 | or |
b3b79607 |
330 | ($state == PARSE_RHS && $lookahead =~ $rhs_term_re ) |
01dd4e4f |
331 | or |
b3b79607 |
332 | ($state == PARSE_IN_FUNC && $lookahead !~ $func_start_re) # if there are multiple values - the parenthesis will switch the $state |
01dd4e4f |
333 | ) { |
0769ac0e |
334 | return $left||(); |
01dd4e4f |
335 | } |
336 | |
337 | my $token = shift @$tokens; |
338 | |
339 | # nested expression in () |
340 | if ($token eq '(' ) { |
d695b0ad |
341 | my $right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); |
342 | $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse($right); |
343 | $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse($right); |
01dd4e4f |
344 | |
0769ac0e |
345 | $left = $left ? [$left, [PAREN => [$right||()] ]] |
346 | : [PAREN => [$right||()] ]; |
01dd4e4f |
347 | } |
b3b79607 |
348 | # AND/OR and LIST (,) |
349 | elsif ($token =~ /^ (?: OR | AND | \, ) $/xi ) { |
350 | my $op = ($token eq ',') ? 'LIST' : uc $token; |
351 | |
7cc47319 |
352 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR) || []; |
01dd4e4f |
353 | |
354 | # Merge chunks if logic matches |
7cc47319 |
355 | if (ref $right and @$right and $op eq $right->[0]) { |
356 | $left = [ (shift @$right ), [$left||[], map { @$_ } @$right] ]; |
01dd4e4f |
357 | } |
358 | else { |
7cc47319 |
359 | $left = [$op => [ $left||[], $right ]]; |
01dd4e4f |
360 | } |
361 | } |
362 | # binary operator keywords |
a1e204f4 |
363 | elsif ( $token =~ /^ $binary_op_re $ /x ) { |
01dd4e4f |
364 | my $op = uc $token; |
d695b0ad |
365 | my $right = $self->_recurse_parse($tokens, PARSE_RHS); |
01dd4e4f |
366 | |
367 | # A between with a simple LITERAL for a 1st RHS argument needs a |
368 | # rerun of the search to (hopefully) find the proper AND construct |
369 | if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') { |
370 | unshift @$tokens, $right->[1][0]; |
d695b0ad |
371 | $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
372 | } |
373 | |
374 | $left = [$op => [$left, $right] ]; |
375 | } |
376 | # expression terminator keywords (as they start a new expression) |
b3b79607 |
377 | elsif ( $token =~ / ^ $expr_start_re $ /x ) { |
01dd4e4f |
378 | my $op = uc $token; |
d695b0ad |
379 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
efc991a0 |
380 | $left = $left ? [ $left, [$op => [$right||()] ]] |
381 | : [ $op => [$right||()] ]; |
01dd4e4f |
382 | } |
0769ac0e |
383 | # NOT |
384 | elsif ( $token =~ /^ NOT $/ix ) { |
01dd4e4f |
385 | my $op = uc $token; |
d695b0ad |
386 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
af75bd59 |
387 | $left = $left ? [ @$left, [$op => [$right||()] ]] |
388 | : [ $op => [$right||()] ]; |
01dd4e4f |
389 | |
390 | } |
4e914a7c |
391 | elsif ( $token =~ $placeholder_re) { |
392 | $left = $left ? [ $left, [ PLACEHOLDER => [ $token ] ] ] |
393 | : [ PLACEHOLDER => [ $token ] ]; |
394 | } |
b3b79607 |
395 | # we're now in "unknown token" land - start eating tokens until |
396 | # we see something familiar |
01dd4e4f |
397 | else { |
b3b79607 |
398 | my $right; |
399 | |
400 | # check if the current token is an unknown op-start |
401 | if (@$tokens and $tokens->[0] =~ $func_start_re) { |
402 | $right = [ $token => [ $self->_recurse_parse($tokens, PARSE_IN_FUNC) || () ] ]; |
403 | } |
404 | else { |
405 | $right = [ LITERAL => [ $token ] ]; |
406 | } |
407 | |
408 | $left = $left ? [ $left, $right ] |
409 | : $right; |
01dd4e4f |
410 | } |
411 | } |
412 | } |
0f9a26cb |
413 | } |
01dd4e4f |
414 | |
d695b0ad |
415 | sub format_keyword { |
416 | my ($self, $keyword) = @_; |
417 | |
1536de15 |
418 | if (my $around = $self->colormap->{lc $keyword}) { |
d695b0ad |
419 | $keyword = "$around->[0]$keyword$around->[1]"; |
420 | } |
421 | |
422 | return $keyword |
423 | } |
424 | |
728f26a2 |
425 | my %starters = ( |
426 | select => 1, |
427 | update => 1, |
428 | 'insert into' => 1, |
429 | 'delete from' => 1, |
430 | ); |
431 | |
f2ab166a |
432 | sub pad_keyword { |
a24cc3a0 |
433 | my ($self, $keyword, $depth) = @_; |
e171c446 |
434 | |
435 | my $before = ''; |
1536de15 |
436 | if (defined $self->indentmap->{lc $keyword}) { |
437 | $before = $self->newline . $self->indent($depth + $self->indentmap->{lc $keyword}); |
a24cc3a0 |
438 | } |
728f26a2 |
439 | $before = '' if $depth == 0 and defined $starters{lc $keyword}; |
e4570c8e |
440 | return [$before, '']; |
a24cc3a0 |
441 | } |
442 | |
1536de15 |
443 | sub indent { ($_[0]->indent_string||'') x ( ( $_[0]->indent_amount || 0 ) * $_[1] ) } |
a24cc3a0 |
444 | |
a97eb57c |
445 | sub _is_key { |
446 | my ($self, $tree) = @_; |
0569a14f |
447 | $tree = $tree->[0] while ref $tree; |
448 | |
a97eb57c |
449 | defined $tree && defined $self->indentmap->{lc $tree}; |
0569a14f |
450 | } |
451 | |
9d11f0d4 |
452 | sub fill_in_placeholder { |
fb272e73 |
453 | my ($self, $bindargs) = @_; |
454 | |
455 | if ($self->fill_in_placeholders) { |
ad46269d |
456 | my $val = shift @{$bindargs} || ''; |
4712657d |
457 | my $quoted = $val =~ s/^(['"])(.*)\1$/$2/; |
9d11f0d4 |
458 | my ($left, $right) = @{$self->placeholder_surround}; |
fb272e73 |
459 | $val =~ s/\\/\\\\/g; |
460 | $val =~ s/'/\\'/g; |
4712657d |
461 | $val = qq('$val') if $quoted; |
462 | return qq($left$val$right) |
fb272e73 |
463 | } |
464 | return '?' |
465 | } |
466 | |
3a247d23 |
467 | # FIXME - terrible name for a user facing API |
01dd4e4f |
468 | sub unparse { |
3a247d23 |
469 | my ($self, $tree, $bindargs) = @_; |
470 | $self->_unparse($tree, [@{$bindargs||[]}], 0); |
471 | } |
a24cc3a0 |
472 | |
3a247d23 |
473 | sub _unparse { |
474 | my ($self, $tree, $bindargs, $depth) = @_; |
01dd4e4f |
475 | |
0769ac0e |
476 | if (not $tree or not @$tree) { |
01dd4e4f |
477 | return ''; |
478 | } |
a24cc3a0 |
479 | |
007f0853 |
480 | # FIXME - needs a config switch to disable |
c01ac648 |
481 | $self->_parenthesis_unroll($tree); |
007f0853 |
482 | |
0769ac0e |
483 | my ($car, $cdr) = @{$tree}[0,1]; |
484 | |
485 | if (! defined $car or (! ref $car and ! defined $cdr) ) { |
486 | require Data::Dumper; |
487 | Carp::confess( sprintf ( "Internal error - malformed branch at depth $depth:\n%s", |
488 | Data::Dumper::Dumper($tree) |
489 | ) ); |
490 | } |
a24cc3a0 |
491 | |
492 | if (ref $car) { |
3a247d23 |
493 | return join (' ', map $self->_unparse($_, $bindargs, $depth), @$tree); |
01dd4e4f |
494 | } |
a24cc3a0 |
495 | elsif ($car eq 'LITERAL') { |
496 | return $cdr->[0]; |
01dd4e4f |
497 | } |
4e914a7c |
498 | elsif ($car eq 'PLACEHOLDER') { |
499 | return $self->fill_in_placeholder($bindargs); |
500 | } |
a24cc3a0 |
501 | elsif ($car eq 'PAREN') { |
c4d7cfcf |
502 | return sprintf ('( %s )', |
e4570c8e |
503 | join (' ', map { $self->_unparse($_, $bindargs, $depth + 2) } @{$cdr} ) |
504 | . |
505 | ($self->_is_key($cdr) |
506 | ? ( $self->newline||'' ) . $self->indent($depth + 1) |
507 | : '' |
508 | ) |
509 | ); |
01dd4e4f |
510 | } |
0769ac0e |
511 | elsif ($car eq 'AND' or $car eq 'OR' or $car =~ / ^ $binary_op_re $ /x ) { |
3a247d23 |
512 | return join (" $car ", map $self->_unparse($_, $bindargs, $depth), @{$cdr}); |
01dd4e4f |
513 | } |
b3b79607 |
514 | elsif ($car eq 'LIST' ) { |
3a247d23 |
515 | return join (', ', map $self->_unparse($_, $bindargs, $depth), @{$cdr}); |
b3b79607 |
516 | } |
01dd4e4f |
517 | else { |
f2ab166a |
518 | my ($l, $r) = @{$self->pad_keyword($car, $depth)}; |
c4d7cfcf |
519 | |
520 | return sprintf "$l%s%s%s$r", |
521 | $self->format_keyword($car), |
522 | ( ref $cdr eq 'ARRAY' and ref $cdr->[0] eq 'ARRAY' and $cdr->[0][0] and $cdr->[0][0] eq 'PAREN' ) |
523 | ? '' # mysql-- |
524 | : ' ' |
525 | , |
526 | $self->_unparse($cdr, $bindargs, $depth), |
527 | ; |
01dd4e4f |
528 | } |
529 | } |
530 | |
bb54fcb4 |
531 | # All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics |
532 | my @unrollable_ops = ( |
533 | 'ON', |
534 | 'WHERE', |
535 | 'GROUP \s+ BY', |
536 | 'HAVING', |
537 | 'ORDER \s+ BY', |
6e9a377b |
538 | 'I?LIKE', |
bb54fcb4 |
539 | ); |
540 | my $unrollable_ops_re = join ' | ', @unrollable_ops; |
541 | $unrollable_ops_re = qr/$unrollable_ops_re/xi; |
542 | |
543 | sub _parenthesis_unroll { |
544 | my $self = shift; |
545 | my $ast = shift; |
546 | |
bb54fcb4 |
547 | return unless (ref $ast and ref $ast->[1]); |
548 | |
549 | my $changes; |
550 | do { |
551 | my @children; |
552 | $changes = 0; |
553 | |
554 | for my $child (@{$ast->[1]}) { |
007f0853 |
555 | |
bb54fcb4 |
556 | # the current node in this loop is *always* a PAREN |
d282bb50 |
557 | if (! ref $child or ! @$child or $child->[0] ne 'PAREN') { |
bb54fcb4 |
558 | push @children, $child; |
559 | next; |
560 | } |
561 | |
562 | # unroll nested parenthesis |
563 | while ( @{$child->[1]} && $child->[1][0][0] eq 'PAREN') { |
564 | $child = $child->[1][0]; |
565 | $changes++; |
566 | } |
567 | |
568 | # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list |
569 | if ( |
570 | ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') |
571 | and |
572 | $child->[1][0][0] eq $ast->[0] |
573 | ) { |
574 | push @children, @{$child->[1][0][1]}; |
575 | $changes++; |
576 | } |
577 | |
578 | # if the parent operator explcitly allows it nuke the parenthesis |
579 | elsif ( $ast->[0] =~ $unrollable_ops_re ) { |
580 | push @children, $child->[1][0]; |
581 | $changes++; |
582 | } |
583 | |
584 | # only *ONE* LITERAL or placeholder element |
6e9a377b |
585 | # as an AND/OR/NOT argument |
bb54fcb4 |
586 | elsif ( |
587 | @{$child->[1]} == 1 && ( |
588 | $child->[1][0][0] eq 'LITERAL' |
589 | or |
590 | $child->[1][0][0] eq 'PLACEHOLDER' |
6e9a377b |
591 | ) && ( |
592 | $ast->[0] eq 'AND' or $ast->[0] eq 'OR' or $ast->[0] eq 'NOT' |
bb54fcb4 |
593 | ) |
594 | ) { |
595 | push @children, $child->[1][0]; |
596 | $changes++; |
597 | } |
598 | |
007f0853 |
599 | # an AND/OR expression with only one binop in the parenthesis |
600 | # with exactly two grandchildren |
bb54fcb4 |
601 | # the only time when we can *not* unroll this is when both |
602 | # the parent and the child are mathops (in which case we'll |
603 | # break precedence) or when the child is BETWEEN (special |
604 | # case) |
605 | elsif ( |
606 | @{$child->[1]} == 1 |
607 | and |
007f0853 |
608 | ($ast->[0] eq 'AND' or $ast->[0] eq 'OR') |
609 | and |
bb54fcb4 |
610 | $child->[1][0][0] =~ SQL::Abstract::Tree::_binary_op_re() |
611 | and |
612 | $child->[1][0][0] ne 'BETWEEN' |
613 | and |
614 | @{$child->[1][0][1]} == 2 |
615 | and |
616 | ! ( |
617 | $child->[1][0][0] =~ SQL::Abstract::Tree::_math_op_re() |
618 | and |
619 | $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() |
620 | ) |
621 | ) { |
622 | push @children, $child->[1][0]; |
623 | $changes++; |
624 | } |
625 | |
626 | # a function binds tighter than a mathop - see if our ancestor is a |
627 | # mathop, and our content is: |
628 | # a single non-mathop child with a single PAREN grandchild which |
629 | # would indicate mathop ( nonmathop ( ... ) ) |
630 | # or a single non-mathop with a single LITERAL ( nonmathop foo ) |
631 | # or a single non-mathop with a single PLACEHOLDER ( nonmathop ? ) |
632 | elsif ( |
633 | @{$child->[1]} == 1 |
634 | and |
635 | @{$child->[1][0][1]} == 1 |
636 | and |
637 | $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() |
638 | and |
639 | $child->[1][0][0] !~ SQL::Abstract::Tree::_math_op_re |
640 | and |
641 | ( |
642 | $child->[1][0][1][0][0] eq 'PAREN' |
643 | or |
644 | $child->[1][0][1][0][0] eq 'LITERAL' |
645 | or |
646 | $child->[1][0][1][0][0] eq 'PLACEHOLDER' |
647 | ) |
648 | ) { |
649 | push @children, $child->[1][0]; |
650 | $changes++; |
651 | } |
652 | |
653 | |
654 | # otherwise no more mucking for this pass |
655 | else { |
656 | push @children, $child; |
657 | } |
658 | } |
659 | |
660 | $ast->[1] = \@children; |
661 | |
662 | } while ($changes); |
663 | |
664 | } |
665 | |
fb272e73 |
666 | sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } |
01dd4e4f |
667 | |
668 | 1; |
669 | |
3be357b0 |
670 | =pod |
671 | |
b912ee1e |
672 | =head1 NAME |
673 | |
674 | SQL::Abstract::Tree - Represent SQL as an AST |
675 | |
3be357b0 |
676 | =head1 SYNOPSIS |
677 | |
678 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
679 | |
680 | print $sqla_tree->format('SELECT * FROM foo WHERE foo.a > 2'); |
681 | |
682 | # SELECT * |
683 | # FROM foo |
684 | # WHERE foo.a > 2 |
685 | |
6b1bf9f8 |
686 | =head1 METHODS |
687 | |
688 | =head2 new |
689 | |
690 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
691 | |
c22f502d |
692 | $args = { |
693 | profile => 'console', # predefined profile to use (default: 'none') |
694 | fill_in_placeholders => 1, # true for placeholder population |
9d11f0d4 |
695 | placeholder_surround => # The strings that will be wrapped around |
696 | [GREEN, RESET], # populated placeholders if the above is set |
c22f502d |
697 | indent_string => ' ', # the string used when indenting |
698 | indent_amount => 2, # how many of above string to use for a single |
699 | # indent level |
700 | newline => "\n", # string for newline |
701 | colormap => { |
702 | select => [RED, RESET], # a pair of strings defining what to surround |
703 | # the keyword with for colorization |
704 | # ... |
705 | }, |
706 | indentmap => { |
707 | select => 0, # A zero means that the keyword will start on |
708 | # a new line |
709 | from => 1, # Any other positive integer means that after |
710 | on => 2, # said newline it will get that many indents |
711 | # ... |
712 | }, |
713 | } |
714 | |
715 | Returns a new SQL::Abstract::Tree object. All arguments are optional. |
716 | |
717 | =head3 profiles |
718 | |
719 | There are four predefined profiles, C<none>, C<console>, C<console_monochrome>, |
720 | and C<html>. Typically a user will probably just use C<console> or |
721 | C<console_monochrome>, but if something about a profile bothers you, merely |
722 | use the profile and override the parts that you don't like. |
723 | |
6b1bf9f8 |
724 | =head2 format |
725 | |
c22f502d |
726 | $sqlat->format('SELECT * FROM bar WHERE x = ?', [1]) |
727 | |
728 | Takes C<$sql> and C<\@bindargs>. |
6b1bf9f8 |
729 | |
1a3cc911 |
730 | Returns a formatting string based on the string passed in |
ee4227a7 |
731 | |
732 | =head2 parse |
733 | |
734 | $sqlat->parse('SELECT * FROM bar WHERE x = ?') |
735 | |
736 | Returns a "tree" representing passed in SQL. Please do not depend on the |
737 | structure of the returned tree. It may be stable at some point, but not yet. |
738 | |
739 | =head2 unparse |
740 | |
741 | $sqlat->parse($tree_structure, \@bindargs) |
742 | |
743 | Transform "tree" into SQL, applying various transforms on the way. |
744 | |
745 | =head2 format_keyword |
746 | |
747 | $sqlat->format_keyword('SELECT') |
748 | |
749 | Currently this just takes a keyword and puts the C<colormap> stuff around it. |
750 | Later on it may do more and allow for coderef based transforms. |
751 | |
f2ab166a |
752 | =head2 pad_keyword |
ee4227a7 |
753 | |
f2ab166a |
754 | my ($before, $after) = @{$sqlat->pad_keyword('SELECT')}; |
ee4227a7 |
755 | |
756 | Returns whitespace to be inserted around a keyword. |
9d11f0d4 |
757 | |
758 | =head2 fill_in_placeholder |
759 | |
760 | my $value = $sqlat->fill_in_placeholder(\@bindargs) |
761 | |
762 | Removes last arg from passed arrayref and returns it, surrounded with |
763 | the values in placeholder_surround, and then surrounded with single quotes. |
f2ab166a |
764 | |
765 | =head2 indent |
766 | |
767 | Returns as many indent strings as indent amounts times the first argument. |
768 | |
769 | =head1 ACCESSORS |
770 | |
771 | =head2 colormap |
772 | |
773 | See L</new> |
774 | |
775 | =head2 fill_in_placeholders |
776 | |
777 | See L</new> |
778 | |
779 | =head2 indent_amount |
780 | |
781 | See L</new> |
782 | |
783 | =head2 indent_string |
784 | |
785 | See L</new> |
786 | |
787 | =head2 indentmap |
788 | |
789 | See L</new> |
790 | |
791 | =head2 newline |
792 | |
793 | See L</new> |
794 | |
795 | =head2 placeholder_surround |
796 | |
797 | See L</new> |
798 | |