Commit | Line | Data |
01dd4e4f |
1 | package SQL::Abstract::Tree; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Carp; |
6 | |
a97eb57c |
7 | use List::Util; |
1536de15 |
8 | |
1536de15 |
9 | use base 'Class::Accessor::Grouped'; |
10 | |
11 | __PACKAGE__->mk_group_accessors( simple => $_ ) for qw( |
fb272e73 |
12 | newline indent_string indent_amount colormap indentmap fill_in_placeholders |
1536de15 |
13 | ); |
14 | |
01dd4e4f |
15 | # Parser states for _recurse_parse() |
16 | use constant PARSE_TOP_LEVEL => 0; |
17 | use constant PARSE_IN_EXPR => 1; |
18 | use constant PARSE_IN_PARENS => 2; |
19 | use constant PARSE_RHS => 3; |
20 | |
21 | # These SQL keywords always signal end of the current expression (except inside |
22 | # of a parenthesized subexpression). |
23 | # Format: A list of strings that will be compiled to extended syntax (ie. |
24 | # /.../x) regexes, without capturing parentheses. They will be automatically |
25 | # anchored to word boundaries to match the whole token). |
26 | my @expression_terminator_sql_keywords = ( |
27 | 'SELECT', |
7853a177 |
28 | 'UPDATE', |
29 | 'INSERT \s+ INTO', |
30 | 'DELETE \s+ FROM', |
3d910890 |
31 | 'FROM', |
7853a177 |
32 | 'SET', |
01dd4e4f |
33 | '(?: |
34 | (?: |
35 | (?: \b (?: LEFT | RIGHT | FULL ) \s+ )? |
36 | (?: \b (?: CROSS | INNER | OUTER ) \s+ )? |
37 | )? |
38 | JOIN |
39 | )', |
40 | 'ON', |
41 | 'WHERE', |
7853a177 |
42 | 'VALUES', |
01dd4e4f |
43 | 'EXISTS', |
44 | 'GROUP \s+ BY', |
45 | 'HAVING', |
46 | 'ORDER \s+ BY', |
47 | 'LIMIT', |
48 | 'OFFSET', |
49 | 'FOR', |
50 | 'UNION', |
51 | 'INTERSECT', |
52 | 'EXCEPT', |
53 | 'RETURNING', |
8d0dd7dc |
54 | 'ROW_NUMBER \s* \( \s* \) \s+ OVER', |
01dd4e4f |
55 | ); |
56 | |
57 | # These are binary operator keywords always a single LHS and RHS |
58 | # * AND/OR are handled separately as they are N-ary |
59 | # * so is NOT as being unary |
60 | # * BETWEEN without paranthesis around the ANDed arguments (which |
61 | # makes it a non-binary op) is detected and accomodated in |
62 | # _recurse_parse() |
63 | my $stuff_around_mathops = qr/[\w\s\`\'\"\)]/; |
64 | my @binary_op_keywords = ( |
65 | ( map |
66 | { |
67 | ' ^ ' . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", |
68 | " (?<= $stuff_around_mathops)" . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", |
69 | } |
70 | (qw/< > != <> = <= >=/) |
71 | ), |
72 | ( map |
73 | { '\b (?: NOT \s+)?' . $_ . '\b' } |
74 | (qw/IN BETWEEN LIKE/) |
75 | ), |
76 | ); |
77 | |
78 | my $tokenizer_re_str = join("\n\t|\n", |
79 | ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'), |
80 | @binary_op_keywords, |
81 | ); |
82 | |
83 | my $tokenizer_re = qr/ \s* ( $tokenizer_re_str | \( | \) | \? ) \s* /xi; |
84 | |
85 | sub _binary_op_keywords { @binary_op_keywords } |
86 | |
7e5600e9 |
87 | my %indents = ( |
7853a177 |
88 | select => 0, |
89 | update => 0, |
90 | 'insert into' => 0, |
91 | 'delete from' => 0, |
3d910890 |
92 | from => 1, |
91916220 |
93 | where => 0, |
7853a177 |
94 | join => 1, |
95 | 'left join' => 1, |
96 | on => 2, |
91916220 |
97 | 'group by' => 0, |
98 | 'order by' => 0, |
7853a177 |
99 | set => 1, |
100 | into => 1, |
91916220 |
101 | values => 1, |
7e5600e9 |
102 | ); |
103 | |
75c3a063 |
104 | my %profiles = ( |
105 | console => { |
84c65032 |
106 | fill_in_placeholders => 1, |
1536de15 |
107 | indent_string => ' ', |
75c3a063 |
108 | indent_amount => 2, |
1536de15 |
109 | newline => "\n", |
3be357b0 |
110 | colormap => {}, |
7e5600e9 |
111 | indentmap => { %indents }, |
3be357b0 |
112 | }, |
113 | console_monochrome => { |
84c65032 |
114 | fill_in_placeholders => 1, |
3be357b0 |
115 | indent_string => ' ', |
116 | indent_amount => 2, |
117 | newline => "\n", |
118 | colormap => {}, |
7e5600e9 |
119 | indentmap => { %indents }, |
120 | }, |
121 | html => { |
84c65032 |
122 | fill_in_placeholders => 1, |
7e5600e9 |
123 | indent_string => ' ', |
124 | indent_amount => 2, |
125 | newline => "<br />\n", |
126 | colormap => { |
7853a177 |
127 | select => ['<span class="select">' , '</span>'], |
128 | 'insert into' => ['<span class="insert-into">' , '</span>'], |
129 | update => ['<span class="select">' , '</span>'], |
130 | 'delete from' => ['<span class="delete-from">' , '</span>'], |
131 | where => ['<span class="where">' , '</span>'], |
132 | from => ['<span class="from">' , '</span>'], |
133 | join => ['<span class="join">' , '</span>'], |
134 | on => ['<span class="on">' , '</span>'], |
135 | 'group by' => ['<span class="group-by">', '</span>'], |
136 | 'order by' => ['<span class="order-by">', '</span>'], |
137 | set => ['<span class="set">', '</span>'], |
138 | into => ['<span class="into">', '</span>'], |
139 | values => ['<span class="values">', '</span>'], |
1536de15 |
140 | }, |
7e5600e9 |
141 | indentmap => { %indents }, |
75c3a063 |
142 | }, |
143 | none => { |
1536de15 |
144 | colormap => {}, |
145 | indentmap => {}, |
75c3a063 |
146 | }, |
147 | ); |
148 | |
3be357b0 |
149 | eval { |
150 | require Term::ANSIColor; |
151 | $profiles{console}->{colormap} = { |
7853a177 |
152 | select => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
153 | 'insert into' => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
154 | update => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
155 | 'delete from' => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
156 | |
157 | set => [Term::ANSIColor::color('cyan'), Term::ANSIColor::color('reset')], |
c1b89c4f |
158 | from => [Term::ANSIColor::color('cyan'), Term::ANSIColor::color('reset')], |
7853a177 |
159 | |
160 | where => [Term::ANSIColor::color('green'), Term::ANSIColor::color('reset')], |
161 | values => [Term::ANSIColor::color('yellow'), Term::ANSIColor::color('reset')], |
162 | |
163 | join => [Term::ANSIColor::color('magenta'), Term::ANSIColor::color('reset')], |
164 | 'left join' => [Term::ANSIColor::color('magenta'), Term::ANSIColor::color('reset')], |
165 | on => [Term::ANSIColor::color('blue'), Term::ANSIColor::color('reset')], |
166 | |
167 | 'group by' => [Term::ANSIColor::color('yellow'), Term::ANSIColor::color('reset')], |
168 | 'order by' => [Term::ANSIColor::color('yellow'), Term::ANSIColor::color('reset')], |
3be357b0 |
169 | }; |
170 | }; |
171 | |
75c3a063 |
172 | sub new { |
173 | my ($class, $args) = @_; |
174 | |
175 | my $profile = delete $args->{profile} || 'none'; |
176 | my $data = {%{$profiles{$profile}}, %{$args||{}}}; |
177 | |
178 | bless $data, $class |
179 | } |
d695b0ad |
180 | |
01dd4e4f |
181 | sub parse { |
d695b0ad |
182 | my ($self, $s) = @_; |
01dd4e4f |
183 | |
184 | # tokenize string, and remove all optional whitespace |
185 | my $tokens = []; |
186 | foreach my $token (split $tokenizer_re, $s) { |
187 | push @$tokens, $token if (length $token) && ($token =~ /\S/); |
188 | } |
189 | |
d695b0ad |
190 | my $tree = $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); |
01dd4e4f |
191 | return $tree; |
192 | } |
193 | |
194 | sub _recurse_parse { |
d695b0ad |
195 | my ($self, $tokens, $state) = @_; |
01dd4e4f |
196 | |
197 | my $left; |
198 | while (1) { # left-associative parsing |
199 | |
200 | my $lookahead = $tokens->[0]; |
201 | if ( not defined($lookahead) |
202 | or |
203 | ($state == PARSE_IN_PARENS && $lookahead eq ')') |
204 | or |
205 | ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) ) |
206 | or |
207 | ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) ) |
208 | ) { |
209 | return $left; |
210 | } |
211 | |
212 | my $token = shift @$tokens; |
213 | |
214 | # nested expression in () |
215 | if ($token eq '(' ) { |
d695b0ad |
216 | my $right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); |
217 | $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse($right); |
218 | $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse($right); |
01dd4e4f |
219 | |
220 | $left = $left ? [@$left, [PAREN => [$right] ]] |
221 | : [PAREN => [$right] ]; |
222 | } |
223 | # AND/OR |
224 | elsif ($token =~ /^ (?: OR | AND ) $/xi ) { |
225 | my $op = uc $token; |
d695b0ad |
226 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
227 | |
228 | # Merge chunks if logic matches |
229 | if (ref $right and $op eq $right->[0]) { |
230 | $left = [ (shift @$right ), [$left, map { @$_ } @$right] ]; |
231 | } |
232 | else { |
233 | $left = [$op => [$left, $right]]; |
234 | } |
235 | } |
236 | # binary operator keywords |
237 | elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) { |
238 | my $op = uc $token; |
d695b0ad |
239 | my $right = $self->_recurse_parse($tokens, PARSE_RHS); |
01dd4e4f |
240 | |
241 | # A between with a simple LITERAL for a 1st RHS argument needs a |
242 | # rerun of the search to (hopefully) find the proper AND construct |
243 | if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') { |
244 | unshift @$tokens, $right->[1][0]; |
d695b0ad |
245 | $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
246 | } |
247 | |
248 | $left = [$op => [$left, $right] ]; |
249 | } |
250 | # expression terminator keywords (as they start a new expression) |
251 | elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) { |
252 | my $op = uc $token; |
d695b0ad |
253 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
254 | $left = $left ? [ $left, [$op => [$right] ]] |
255 | : [ $op => [$right] ]; |
256 | } |
257 | # NOT (last as to allow all other NOT X pieces first) |
258 | elsif ( $token =~ /^ not $/ix ) { |
259 | my $op = uc $token; |
d695b0ad |
260 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
01dd4e4f |
261 | $left = $left ? [ @$left, [$op => [$right] ]] |
262 | : [ $op => [$right] ]; |
263 | |
264 | } |
265 | # literal (eat everything on the right until RHS termination) |
266 | else { |
d695b0ad |
267 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
268 | $left = $left ? [ $left, [LITERAL => [join ' ', $token, $self->unparse($right)||()] ] ] |
269 | : [ LITERAL => [join ' ', $token, $self->unparse($right)||()] ]; |
01dd4e4f |
270 | } |
271 | } |
272 | } |
273 | |
d695b0ad |
274 | sub format_keyword { |
275 | my ($self, $keyword) = @_; |
276 | |
1536de15 |
277 | if (my $around = $self->colormap->{lc $keyword}) { |
d695b0ad |
278 | $keyword = "$around->[0]$keyword$around->[1]"; |
279 | } |
280 | |
281 | return $keyword |
282 | } |
283 | |
728f26a2 |
284 | |
285 | my %starters = ( |
286 | select => 1, |
287 | update => 1, |
288 | 'insert into' => 1, |
289 | 'delete from' => 1, |
290 | ); |
291 | |
a24cc3a0 |
292 | sub whitespace { |
293 | my ($self, $keyword, $depth) = @_; |
e171c446 |
294 | |
295 | my $before = ''; |
1536de15 |
296 | if (defined $self->indentmap->{lc $keyword}) { |
297 | $before = $self->newline . $self->indent($depth + $self->indentmap->{lc $keyword}); |
a24cc3a0 |
298 | } |
728f26a2 |
299 | $before = '' if $depth == 0 and defined $starters{lc $keyword}; |
b4e0e260 |
300 | return [$before, ' ']; |
a24cc3a0 |
301 | } |
302 | |
1536de15 |
303 | sub indent { ($_[0]->indent_string||'') x ( ( $_[0]->indent_amount || 0 ) * $_[1] ) } |
a24cc3a0 |
304 | |
a97eb57c |
305 | sub _is_key { |
306 | my ($self, $tree) = @_; |
0569a14f |
307 | $tree = $tree->[0] while ref $tree; |
308 | |
a97eb57c |
309 | defined $tree && defined $self->indentmap->{lc $tree}; |
0569a14f |
310 | } |
311 | |
fb272e73 |
312 | sub _fill_in_placeholder { |
313 | my ($self, $bindargs) = @_; |
314 | |
315 | if ($self->fill_in_placeholders) { |
316 | my $val = pop @{$bindargs}; |
317 | $val =~ s/\\/\\\\/g; |
318 | $val =~ s/'/\\'/g; |
319 | return qq('$val') |
320 | } |
321 | return '?' |
322 | } |
323 | |
01dd4e4f |
324 | sub unparse { |
fb272e73 |
325 | my ($self, $tree, $bindargs, $depth) = @_; |
a24cc3a0 |
326 | |
e171c446 |
327 | $depth ||= 0; |
01dd4e4f |
328 | |
329 | if (not $tree ) { |
330 | return ''; |
331 | } |
a24cc3a0 |
332 | |
333 | my $car = $tree->[0]; |
334 | my $cdr = $tree->[1]; |
335 | |
336 | if (ref $car) { |
fb272e73 |
337 | return join ('', map $self->unparse($_, $bindargs, $depth), @$tree); |
01dd4e4f |
338 | } |
a24cc3a0 |
339 | elsif ($car eq 'LITERAL') { |
fb272e73 |
340 | if ($cdr->[0] eq '?') { |
341 | return $self->_fill_in_placeholder($bindargs) |
342 | } |
a24cc3a0 |
343 | return $cdr->[0]; |
01dd4e4f |
344 | } |
a24cc3a0 |
345 | elsif ($car eq 'PAREN') { |
e171c446 |
346 | return '(' . |
a24cc3a0 |
347 | join(' ', |
fb272e73 |
348 | map $self->unparse($_, $bindargs, $depth + 2), @{$cdr}) . |
a97eb57c |
349 | ($self->_is_key($cdr)?( $self->newline||'' ).$self->indent($depth + 1):'') . ') '; |
01dd4e4f |
350 | } |
a24cc3a0 |
351 | elsif ($car eq 'OR' or $car eq 'AND' or (grep { $car =~ /^ $_ $/xi } @binary_op_keywords ) ) { |
fb272e73 |
352 | return join (" $car ", map $self->unparse($_, $bindargs, $depth), @{$cdr}); |
01dd4e4f |
353 | } |
354 | else { |
a24cc3a0 |
355 | my ($l, $r) = @{$self->whitespace($car, $depth)}; |
fb272e73 |
356 | return sprintf "$l%s %s$r", $self->format_keyword($car), $self->unparse($cdr, $bindargs, $depth); |
01dd4e4f |
357 | } |
358 | } |
359 | |
fb272e73 |
360 | sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } |
01dd4e4f |
361 | |
362 | 1; |
363 | |
3be357b0 |
364 | =pod |
365 | |
366 | =head1 SYNOPSIS |
367 | |
368 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
369 | |
370 | print $sqla_tree->format('SELECT * FROM foo WHERE foo.a > 2'); |
371 | |
372 | # SELECT * |
373 | # FROM foo |
374 | # WHERE foo.a > 2 |
375 | |
6b1bf9f8 |
376 | =head1 METHODS |
377 | |
378 | =head2 new |
379 | |
380 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
381 | |
382 | =head2 format |
383 | |
384 | $sqlat->format('SELECT * FROM bar') |
385 | |
386 | Returns a formatting string based on wthe string passed in |