Commit | Line | Data |
01dd4e4f |
1 | package SQL::Abstract::Tree; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Carp; |
6 | |
a97eb57c |
7 | use List::Util; |
bc482085 |
8 | use Hash::Merge; |
2fed0b4b |
9 | |
bc482085 |
10 | my $merger = Hash::Merge->new; |
11 | |
12 | $merger->specify_behavior({ |
2fed0b4b |
13 | SCALAR => { |
14 | SCALAR => sub { $_[1] }, |
15 | ARRAY => sub { [ $_[0], @{$_[1]} ] }, |
16 | HASH => sub { $_[1] }, |
17 | }, |
18 | ARRAY => { |
19 | SCALAR => sub { $_[1] }, |
20 | ARRAY => sub { $_[1] }, |
21 | HASH => sub { $_[1] }, |
22 | }, |
23 | HASH => { |
24 | SCALAR => sub { $_[1] }, |
25 | ARRAY => sub { [ values %{$_[0]}, @{$_[1]} ] }, |
26 | HASH => sub { Hash::Merge::_merge_hashes( $_[0], $_[1] ) }, |
27 | }, |
28 | }, 'My Behavior' ); |
1536de15 |
29 | |
1536de15 |
30 | use base 'Class::Accessor::Grouped'; |
31 | |
32 | __PACKAGE__->mk_group_accessors( simple => $_ ) for qw( |
fb272e73 |
33 | newline indent_string indent_amount colormap indentmap fill_in_placeholders |
9d11f0d4 |
34 | placeholder_surround |
1536de15 |
35 | ); |
36 | |
01dd4e4f |
37 | # Parser states for _recurse_parse() |
38 | use constant PARSE_TOP_LEVEL => 0; |
39 | use constant PARSE_IN_EXPR => 1; |
40 | use constant PARSE_IN_PARENS => 2; |
41 | use constant PARSE_RHS => 3; |
42 | |
43 | # These SQL keywords always signal end of the current expression (except inside |
44 | # of a parenthesized subexpression). |
45 | # Format: A list of strings that will be compiled to extended syntax (ie. |
46 | # /.../x) regexes, without capturing parentheses. They will be automatically |
47 | # anchored to word boundaries to match the whole token). |
48 | my @expression_terminator_sql_keywords = ( |
49 | 'SELECT', |
7853a177 |
50 | 'UPDATE', |
51 | 'INSERT \s+ INTO', |
52 | 'DELETE \s+ FROM', |
3d910890 |
53 | 'FROM', |
7853a177 |
54 | 'SET', |
01dd4e4f |
55 | '(?: |
56 | (?: |
57 | (?: \b (?: LEFT | RIGHT | FULL ) \s+ )? |
58 | (?: \b (?: CROSS | INNER | OUTER ) \s+ )? |
59 | )? |
60 | JOIN |
61 | )', |
62 | 'ON', |
63 | 'WHERE', |
7853a177 |
64 | 'VALUES', |
01dd4e4f |
65 | 'EXISTS', |
66 | 'GROUP \s+ BY', |
67 | 'HAVING', |
68 | 'ORDER \s+ BY', |
69 | 'LIMIT', |
70 | 'OFFSET', |
71 | 'FOR', |
72 | 'UNION', |
73 | 'INTERSECT', |
74 | 'EXCEPT', |
75 | 'RETURNING', |
8d0dd7dc |
76 | 'ROW_NUMBER \s* \( \s* \) \s+ OVER', |
01dd4e4f |
77 | ); |
78 | |
79 | # These are binary operator keywords always a single LHS and RHS |
80 | # * AND/OR are handled separately as they are N-ary |
81 | # * so is NOT as being unary |
82 | # * BETWEEN without paranthesis around the ANDed arguments (which |
83 | # makes it a non-binary op) is detected and accomodated in |
84 | # _recurse_parse() |
85 | my $stuff_around_mathops = qr/[\w\s\`\'\"\)]/; |
86 | my @binary_op_keywords = ( |
87 | ( map |
88 | { |
89 | ' ^ ' . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", |
90 | " (?<= $stuff_around_mathops)" . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", |
91 | } |
92 | (qw/< > != <> = <= >=/) |
93 | ), |
94 | ( map |
95 | { '\b (?: NOT \s+)?' . $_ . '\b' } |
96 | (qw/IN BETWEEN LIKE/) |
97 | ), |
98 | ); |
99 | |
100 | my $tokenizer_re_str = join("\n\t|\n", |
101 | ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'), |
102 | @binary_op_keywords, |
103 | ); |
104 | |
105 | my $tokenizer_re = qr/ \s* ( $tokenizer_re_str | \( | \) | \? ) \s* /xi; |
106 | |
107 | sub _binary_op_keywords { @binary_op_keywords } |
108 | |
7e5600e9 |
109 | my %indents = ( |
7853a177 |
110 | select => 0, |
111 | update => 0, |
112 | 'insert into' => 0, |
113 | 'delete from' => 0, |
3d910890 |
114 | from => 1, |
91916220 |
115 | where => 0, |
7853a177 |
116 | join => 1, |
117 | 'left join' => 1, |
118 | on => 2, |
91916220 |
119 | 'group by' => 0, |
120 | 'order by' => 0, |
7853a177 |
121 | set => 1, |
122 | into => 1, |
91916220 |
123 | values => 1, |
7e5600e9 |
124 | ); |
125 | |
75c3a063 |
126 | my %profiles = ( |
127 | console => { |
84c65032 |
128 | fill_in_placeholders => 1, |
9d11f0d4 |
129 | placeholder_surround => ['?/', ''], |
1536de15 |
130 | indent_string => ' ', |
75c3a063 |
131 | indent_amount => 2, |
1536de15 |
132 | newline => "\n", |
3be357b0 |
133 | colormap => {}, |
7e5600e9 |
134 | indentmap => { %indents }, |
3be357b0 |
135 | }, |
136 | console_monochrome => { |
84c65032 |
137 | fill_in_placeholders => 1, |
9d11f0d4 |
138 | placeholder_surround => ['?/', ''], |
3be357b0 |
139 | indent_string => ' ', |
140 | indent_amount => 2, |
141 | newline => "\n", |
142 | colormap => {}, |
7e5600e9 |
143 | indentmap => { %indents }, |
144 | }, |
145 | html => { |
84c65032 |
146 | fill_in_placeholders => 1, |
9d11f0d4 |
147 | placeholder_surround => ['<span class="placeholder">', '</span>'], |
7e5600e9 |
148 | indent_string => ' ', |
149 | indent_amount => 2, |
150 | newline => "<br />\n", |
151 | colormap => { |
7853a177 |
152 | select => ['<span class="select">' , '</span>'], |
153 | 'insert into' => ['<span class="insert-into">' , '</span>'], |
154 | update => ['<span class="select">' , '</span>'], |
155 | 'delete from' => ['<span class="delete-from">' , '</span>'], |
156 | where => ['<span class="where">' , '</span>'], |
157 | from => ['<span class="from">' , '</span>'], |
158 | join => ['<span class="join">' , '</span>'], |
159 | on => ['<span class="on">' , '</span>'], |
160 | 'group by' => ['<span class="group-by">', '</span>'], |
161 | 'order by' => ['<span class="order-by">', '</span>'], |
162 | set => ['<span class="set">', '</span>'], |
163 | into => ['<span class="into">', '</span>'], |
164 | values => ['<span class="values">', '</span>'], |
1536de15 |
165 | }, |
7e5600e9 |
166 | indentmap => { %indents }, |
75c3a063 |
167 | }, |
168 | none => { |
1536de15 |
169 | colormap => {}, |
170 | indentmap => {}, |
75c3a063 |
171 | }, |
172 | ); |
173 | |
3be357b0 |
174 | eval { |
175 | require Term::ANSIColor; |
9d11f0d4 |
176 | |
177 | $profiles{console}->{placeholder_surround} = |
178 | [Term::ANSIColor::color('black on_cyan'), Term::ANSIColor::color('reset')]; |
179 | |
3be357b0 |
180 | $profiles{console}->{colormap} = { |
7853a177 |
181 | select => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
182 | 'insert into' => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
183 | update => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
184 | 'delete from' => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
185 | |
186 | set => [Term::ANSIColor::color('cyan'), Term::ANSIColor::color('reset')], |
c1b89c4f |
187 | from => [Term::ANSIColor::color('cyan'), Term::ANSIColor::color('reset')], |
7853a177 |
188 | |
189 | where => [Term::ANSIColor::color('green'), Term::ANSIColor::color('reset')], |
190 | values => [Term::ANSIColor::color('yellow'), Term::ANSIColor::color('reset')], |
191 | |
192 | join => [Term::ANSIColor::color('magenta'), Term::ANSIColor::color('reset')], |
193 | 'left join' => [Term::ANSIColor::color('magenta'), Term::ANSIColor::color('reset')], |
194 | on => [Term::ANSIColor::color('blue'), Term::ANSIColor::color('reset')], |
195 | |
196 | 'group by' => [Term::ANSIColor::color('yellow'), Term::ANSIColor::color('reset')], |
197 | 'order by' => [Term::ANSIColor::color('yellow'), Term::ANSIColor::color('reset')], |
3be357b0 |
198 | }; |
199 | }; |
200 | |
75c3a063 |
201 | sub new { |
2fed0b4b |
202 | my $class = shift; |
203 | my $args = shift || {}; |
75c3a063 |
204 | |
205 | my $profile = delete $args->{profile} || 'none'; |
bc482085 |
206 | my $data = $merger->merge( $profiles{$profile}, $args ); |
75c3a063 |
207 | |
208 | bless $data, $class |
209 | } |
d695b0ad |
210 | |
01dd4e4f |
211 | sub parse { |
d695b0ad |
212 | my ($self, $s) = @_; |
01dd4e4f |
213 | |
214 | # tokenize string, and remove all optional whitespace |
215 | my $tokens = []; |
216 | foreach my $token (split $tokenizer_re, $s) { |
217 | push @$tokens, $token if (length $token) && ($token =~ /\S/); |
218 | } |
219 | |
d695b0ad |
220 | my $tree = $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); |
01dd4e4f |
221 | return $tree; |
222 | } |
223 | |
224 | sub _recurse_parse { |
d695b0ad |
225 | my ($self, $tokens, $state) = @_; |
01dd4e4f |
226 | |
227 | my $left; |
228 | while (1) { # left-associative parsing |
229 | |
230 | my $lookahead = $tokens->[0]; |
231 | if ( not defined($lookahead) |
232 | or |
233 | ($state == PARSE_IN_PARENS && $lookahead eq ')') |
234 | or |
235 | ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) ) |
236 | or |
237 | ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) ) |
238 | ) { |
239 | return $left; |
240 | } |
241 | |
242 | my $token = shift @$tokens; |
243 | |
244 | # nested expression in () |
245 | if ($token eq '(' ) { |
d695b0ad |
246 | my $right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); |
247 | $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse($right); |
248 | $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse($right); |
01dd4e4f |
249 | |
250 | $left = $left ? [@$left, [PAREN => [$right] ]] |
251 | : [PAREN => [$right] ]; |
252 | } |
253 | # AND/OR |
254 | elsif ($token =~ /^ (?: OR | AND ) $/xi ) { |
255 | my $op = uc $token; |
d695b0ad |
256 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
257 | |
258 | # Merge chunks if logic matches |
259 | if (ref $right and $op eq $right->[0]) { |
260 | $left = [ (shift @$right ), [$left, map { @$_ } @$right] ]; |
261 | } |
262 | else { |
263 | $left = [$op => [$left, $right]]; |
264 | } |
265 | } |
266 | # binary operator keywords |
267 | elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) { |
268 | my $op = uc $token; |
d695b0ad |
269 | my $right = $self->_recurse_parse($tokens, PARSE_RHS); |
01dd4e4f |
270 | |
271 | # A between with a simple LITERAL for a 1st RHS argument needs a |
272 | # rerun of the search to (hopefully) find the proper AND construct |
273 | if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') { |
274 | unshift @$tokens, $right->[1][0]; |
d695b0ad |
275 | $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
276 | } |
277 | |
278 | $left = [$op => [$left, $right] ]; |
279 | } |
280 | # expression terminator keywords (as they start a new expression) |
281 | elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) { |
282 | my $op = uc $token; |
d695b0ad |
283 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
284 | $left = $left ? [ $left, [$op => [$right] ]] |
285 | : [ $op => [$right] ]; |
286 | } |
287 | # NOT (last as to allow all other NOT X pieces first) |
288 | elsif ( $token =~ /^ not $/ix ) { |
289 | my $op = uc $token; |
d695b0ad |
290 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
01dd4e4f |
291 | $left = $left ? [ @$left, [$op => [$right] ]] |
292 | : [ $op => [$right] ]; |
293 | |
294 | } |
295 | # literal (eat everything on the right until RHS termination) |
296 | else { |
d695b0ad |
297 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
298 | $left = $left ? [ $left, [LITERAL => [join ' ', $token, $self->unparse($right)||()] ] ] |
299 | : [ LITERAL => [join ' ', $token, $self->unparse($right)||()] ]; |
01dd4e4f |
300 | } |
301 | } |
302 | } |
303 | |
d695b0ad |
304 | sub format_keyword { |
305 | my ($self, $keyword) = @_; |
306 | |
1536de15 |
307 | if (my $around = $self->colormap->{lc $keyword}) { |
d695b0ad |
308 | $keyword = "$around->[0]$keyword$around->[1]"; |
309 | } |
310 | |
311 | return $keyword |
312 | } |
313 | |
728f26a2 |
314 | my %starters = ( |
315 | select => 1, |
316 | update => 1, |
317 | 'insert into' => 1, |
318 | 'delete from' => 1, |
319 | ); |
320 | |
f2ab166a |
321 | sub pad_keyword { |
a24cc3a0 |
322 | my ($self, $keyword, $depth) = @_; |
e171c446 |
323 | |
324 | my $before = ''; |
1536de15 |
325 | if (defined $self->indentmap->{lc $keyword}) { |
326 | $before = $self->newline . $self->indent($depth + $self->indentmap->{lc $keyword}); |
a24cc3a0 |
327 | } |
728f26a2 |
328 | $before = '' if $depth == 0 and defined $starters{lc $keyword}; |
b4e0e260 |
329 | return [$before, ' ']; |
a24cc3a0 |
330 | } |
331 | |
1536de15 |
332 | sub indent { ($_[0]->indent_string||'') x ( ( $_[0]->indent_amount || 0 ) * $_[1] ) } |
a24cc3a0 |
333 | |
a97eb57c |
334 | sub _is_key { |
335 | my ($self, $tree) = @_; |
0569a14f |
336 | $tree = $tree->[0] while ref $tree; |
337 | |
a97eb57c |
338 | defined $tree && defined $self->indentmap->{lc $tree}; |
0569a14f |
339 | } |
340 | |
9d11f0d4 |
341 | sub fill_in_placeholder { |
fb272e73 |
342 | my ($self, $bindargs) = @_; |
343 | |
344 | if ($self->fill_in_placeholders) { |
637bb22c |
345 | my $val = pop @{$bindargs} || ''; |
9d11f0d4 |
346 | my ($left, $right) = @{$self->placeholder_surround}; |
fb272e73 |
347 | $val =~ s/\\/\\\\/g; |
348 | $val =~ s/'/\\'/g; |
9d11f0d4 |
349 | return qq('$left$val$right') |
fb272e73 |
350 | } |
351 | return '?' |
352 | } |
353 | |
01dd4e4f |
354 | sub unparse { |
1a3cc911 |
355 | my ($self, $tree, $bindargs, $depth) = @_; |
a24cc3a0 |
356 | |
1a3cc911 |
357 | $depth ||= 0; |
01dd4e4f |
358 | |
359 | if (not $tree ) { |
360 | return ''; |
361 | } |
a24cc3a0 |
362 | |
363 | my $car = $tree->[0]; |
364 | my $cdr = $tree->[1]; |
365 | |
366 | if (ref $car) { |
1a3cc911 |
367 | return join ('', map $self->unparse($_, $bindargs, $depth), @$tree); |
01dd4e4f |
368 | } |
a24cc3a0 |
369 | elsif ($car eq 'LITERAL') { |
fb272e73 |
370 | if ($cdr->[0] eq '?') { |
9d11f0d4 |
371 | return $self->fill_in_placeholder($bindargs) |
fb272e73 |
372 | } |
a24cc3a0 |
373 | return $cdr->[0]; |
01dd4e4f |
374 | } |
a24cc3a0 |
375 | elsif ($car eq 'PAREN') { |
e171c446 |
376 | return '(' . |
a24cc3a0 |
377 | join(' ', |
1a3cc911 |
378 | map $self->unparse($_, $bindargs, $depth + 2), @{$cdr}) . |
379 | ($self->_is_key($cdr)?( $self->newline||'' ).$self->indent($depth + 1):'') . ') '; |
01dd4e4f |
380 | } |
a24cc3a0 |
381 | elsif ($car eq 'OR' or $car eq 'AND' or (grep { $car =~ /^ $_ $/xi } @binary_op_keywords ) ) { |
1a3cc911 |
382 | return join (" $car ", map $self->unparse($_, $bindargs, $depth), @{$cdr}); |
01dd4e4f |
383 | } |
384 | else { |
f2ab166a |
385 | my ($l, $r) = @{$self->pad_keyword($car, $depth)}; |
1a3cc911 |
386 | return sprintf "$l%s %s$r", $self->format_keyword($car), $self->unparse($cdr, $bindargs, $depth); |
01dd4e4f |
387 | } |
388 | } |
389 | |
fb272e73 |
390 | sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } |
01dd4e4f |
391 | |
392 | 1; |
393 | |
3be357b0 |
394 | =pod |
395 | |
396 | =head1 SYNOPSIS |
397 | |
398 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
399 | |
400 | print $sqla_tree->format('SELECT * FROM foo WHERE foo.a > 2'); |
401 | |
402 | # SELECT * |
403 | # FROM foo |
404 | # WHERE foo.a > 2 |
405 | |
6b1bf9f8 |
406 | =head1 METHODS |
407 | |
408 | =head2 new |
409 | |
410 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
411 | |
c22f502d |
412 | $args = { |
413 | profile => 'console', # predefined profile to use (default: 'none') |
414 | fill_in_placeholders => 1, # true for placeholder population |
9d11f0d4 |
415 | placeholder_surround => # The strings that will be wrapped around |
416 | [GREEN, RESET], # populated placeholders if the above is set |
c22f502d |
417 | indent_string => ' ', # the string used when indenting |
418 | indent_amount => 2, # how many of above string to use for a single |
419 | # indent level |
420 | newline => "\n", # string for newline |
421 | colormap => { |
422 | select => [RED, RESET], # a pair of strings defining what to surround |
423 | # the keyword with for colorization |
424 | # ... |
425 | }, |
426 | indentmap => { |
427 | select => 0, # A zero means that the keyword will start on |
428 | # a new line |
429 | from => 1, # Any other positive integer means that after |
430 | on => 2, # said newline it will get that many indents |
431 | # ... |
432 | }, |
433 | } |
434 | |
435 | Returns a new SQL::Abstract::Tree object. All arguments are optional. |
436 | |
437 | =head3 profiles |
438 | |
439 | There are four predefined profiles, C<none>, C<console>, C<console_monochrome>, |
440 | and C<html>. Typically a user will probably just use C<console> or |
441 | C<console_monochrome>, but if something about a profile bothers you, merely |
442 | use the profile and override the parts that you don't like. |
443 | |
6b1bf9f8 |
444 | =head2 format |
445 | |
c22f502d |
446 | $sqlat->format('SELECT * FROM bar WHERE x = ?', [1]) |
447 | |
448 | Takes C<$sql> and C<\@bindargs>. |
6b1bf9f8 |
449 | |
1a3cc911 |
450 | Returns a formatting string based on the string passed in |
ee4227a7 |
451 | |
452 | =head2 parse |
453 | |
454 | $sqlat->parse('SELECT * FROM bar WHERE x = ?') |
455 | |
456 | Returns a "tree" representing passed in SQL. Please do not depend on the |
457 | structure of the returned tree. It may be stable at some point, but not yet. |
458 | |
459 | =head2 unparse |
460 | |
461 | $sqlat->parse($tree_structure, \@bindargs) |
462 | |
463 | Transform "tree" into SQL, applying various transforms on the way. |
464 | |
465 | =head2 format_keyword |
466 | |
467 | $sqlat->format_keyword('SELECT') |
468 | |
469 | Currently this just takes a keyword and puts the C<colormap> stuff around it. |
470 | Later on it may do more and allow for coderef based transforms. |
471 | |
f2ab166a |
472 | =head2 pad_keyword |
ee4227a7 |
473 | |
f2ab166a |
474 | my ($before, $after) = @{$sqlat->pad_keyword('SELECT')}; |
ee4227a7 |
475 | |
476 | Returns whitespace to be inserted around a keyword. |
9d11f0d4 |
477 | |
478 | =head2 fill_in_placeholder |
479 | |
480 | my $value = $sqlat->fill_in_placeholder(\@bindargs) |
481 | |
482 | Removes last arg from passed arrayref and returns it, surrounded with |
483 | the values in placeholder_surround, and then surrounded with single quotes. |
f2ab166a |
484 | |
485 | =head2 indent |
486 | |
487 | Returns as many indent strings as indent amounts times the first argument. |
488 | |
489 | =head1 ACCESSORS |
490 | |
491 | =head2 colormap |
492 | |
493 | See L</new> |
494 | |
495 | =head2 fill_in_placeholders |
496 | |
497 | See L</new> |
498 | |
499 | =head2 indent_amount |
500 | |
501 | See L</new> |
502 | |
503 | =head2 indent_string |
504 | |
505 | See L</new> |
506 | |
507 | =head2 indentmap |
508 | |
509 | See L</new> |
510 | |
511 | =head2 newline |
512 | |
513 | See L</new> |
514 | |
515 | =head2 placeholder_surround |
516 | |
517 | See L</new> |
518 | |