Commit | Line | Data |
01dd4e4f |
1 | package SQL::Abstract::Tree; |
2 | |
3 | use strict; |
4 | use warnings; |
5 | use Carp; |
6 | |
1536de15 |
7 | |
1536de15 |
8 | use base 'Class::Accessor::Grouped'; |
9 | |
10 | __PACKAGE__->mk_group_accessors( simple => $_ ) for qw( |
11 | newline indent_string indent_amount colormap indentmap |
12 | ); |
13 | |
01dd4e4f |
14 | # Parser states for _recurse_parse() |
15 | use constant PARSE_TOP_LEVEL => 0; |
16 | use constant PARSE_IN_EXPR => 1; |
17 | use constant PARSE_IN_PARENS => 2; |
18 | use constant PARSE_RHS => 3; |
19 | |
20 | # These SQL keywords always signal end of the current expression (except inside |
21 | # of a parenthesized subexpression). |
22 | # Format: A list of strings that will be compiled to extended syntax (ie. |
23 | # /.../x) regexes, without capturing parentheses. They will be automatically |
24 | # anchored to word boundaries to match the whole token). |
25 | my @expression_terminator_sql_keywords = ( |
26 | 'SELECT', |
27 | 'FROM', |
28 | '(?: |
29 | (?: |
30 | (?: \b (?: LEFT | RIGHT | FULL ) \s+ )? |
31 | (?: \b (?: CROSS | INNER | OUTER ) \s+ )? |
32 | )? |
33 | JOIN |
34 | )', |
35 | 'ON', |
36 | 'WHERE', |
37 | 'EXISTS', |
38 | 'GROUP \s+ BY', |
39 | 'HAVING', |
40 | 'ORDER \s+ BY', |
41 | 'LIMIT', |
42 | 'OFFSET', |
43 | 'FOR', |
44 | 'UNION', |
45 | 'INTERSECT', |
46 | 'EXCEPT', |
47 | 'RETURNING', |
48 | ); |
49 | |
50 | # These are binary operator keywords always a single LHS and RHS |
51 | # * AND/OR are handled separately as they are N-ary |
52 | # * so is NOT as being unary |
53 | # * BETWEEN without paranthesis around the ANDed arguments (which |
54 | # makes it a non-binary op) is detected and accomodated in |
55 | # _recurse_parse() |
56 | my $stuff_around_mathops = qr/[\w\s\`\'\"\)]/; |
57 | my @binary_op_keywords = ( |
58 | ( map |
59 | { |
60 | ' ^ ' . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", |
61 | " (?<= $stuff_around_mathops)" . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", |
62 | } |
63 | (qw/< > != <> = <= >=/) |
64 | ), |
65 | ( map |
66 | { '\b (?: NOT \s+)?' . $_ . '\b' } |
67 | (qw/IN BETWEEN LIKE/) |
68 | ), |
69 | ); |
70 | |
71 | my $tokenizer_re_str = join("\n\t|\n", |
72 | ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'), |
73 | @binary_op_keywords, |
74 | ); |
75 | |
76 | my $tokenizer_re = qr/ \s* ( $tokenizer_re_str | \( | \) | \? ) \s* /xi; |
77 | |
78 | sub _binary_op_keywords { @binary_op_keywords } |
79 | |
75c3a063 |
80 | my %profiles = ( |
81 | console => { |
1536de15 |
82 | indent_string => ' ', |
75c3a063 |
83 | indent_amount => 2, |
1536de15 |
84 | newline => "\n", |
3be357b0 |
85 | colormap => {}, |
86 | indentmap => { |
87 | select => 0, |
88 | where => 1, |
89 | from => 1, |
1536de15 |
90 | }, |
3be357b0 |
91 | }, |
92 | console_monochrome => { |
93 | indent_string => ' ', |
94 | indent_amount => 2, |
95 | newline => "\n", |
96 | colormap => {}, |
97 | indentmap => { |
98 | select => 0, |
1536de15 |
99 | where => 1, |
100 | from => 1, |
101 | }, |
75c3a063 |
102 | }, |
103 | none => { |
1536de15 |
104 | colormap => {}, |
105 | indentmap => {}, |
75c3a063 |
106 | }, |
107 | ); |
108 | |
3be357b0 |
109 | eval { |
110 | require Term::ANSIColor; |
111 | $profiles{console}->{colormap} = { |
112 | select => [Term::ANSIColor::color('red'), Term::ANSIColor::color('reset')], |
113 | where => [Term::ANSIColor::color('green'), Term::ANSIColor::color('reset')], |
114 | from => [Term::ANSIColor::color('cyan'), Term::ANSIColor::color('reset')], |
115 | }; |
116 | }; |
117 | |
75c3a063 |
118 | sub new { |
119 | my ($class, $args) = @_; |
120 | |
121 | my $profile = delete $args->{profile} || 'none'; |
122 | my $data = {%{$profiles{$profile}}, %{$args||{}}}; |
123 | |
124 | bless $data, $class |
125 | } |
d695b0ad |
126 | |
01dd4e4f |
127 | sub parse { |
d695b0ad |
128 | my ($self, $s) = @_; |
01dd4e4f |
129 | |
130 | # tokenize string, and remove all optional whitespace |
131 | my $tokens = []; |
132 | foreach my $token (split $tokenizer_re, $s) { |
133 | push @$tokens, $token if (length $token) && ($token =~ /\S/); |
134 | } |
135 | |
d695b0ad |
136 | my $tree = $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); |
01dd4e4f |
137 | return $tree; |
138 | } |
139 | |
140 | sub _recurse_parse { |
d695b0ad |
141 | my ($self, $tokens, $state) = @_; |
01dd4e4f |
142 | |
143 | my $left; |
144 | while (1) { # left-associative parsing |
145 | |
146 | my $lookahead = $tokens->[0]; |
147 | if ( not defined($lookahead) |
148 | or |
149 | ($state == PARSE_IN_PARENS && $lookahead eq ')') |
150 | or |
151 | ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) ) |
152 | or |
153 | ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) ) |
154 | ) { |
155 | return $left; |
156 | } |
157 | |
158 | my $token = shift @$tokens; |
159 | |
160 | # nested expression in () |
161 | if ($token eq '(' ) { |
d695b0ad |
162 | my $right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); |
163 | $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse($right); |
164 | $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse($right); |
01dd4e4f |
165 | |
166 | $left = $left ? [@$left, [PAREN => [$right] ]] |
167 | : [PAREN => [$right] ]; |
168 | } |
169 | # AND/OR |
170 | elsif ($token =~ /^ (?: OR | AND ) $/xi ) { |
171 | my $op = uc $token; |
d695b0ad |
172 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
173 | |
174 | # Merge chunks if logic matches |
175 | if (ref $right and $op eq $right->[0]) { |
176 | $left = [ (shift @$right ), [$left, map { @$_ } @$right] ]; |
177 | } |
178 | else { |
179 | $left = [$op => [$left, $right]]; |
180 | } |
181 | } |
182 | # binary operator keywords |
183 | elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) { |
184 | my $op = uc $token; |
d695b0ad |
185 | my $right = $self->_recurse_parse($tokens, PARSE_RHS); |
01dd4e4f |
186 | |
187 | # A between with a simple LITERAL for a 1st RHS argument needs a |
188 | # rerun of the search to (hopefully) find the proper AND construct |
189 | if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') { |
190 | unshift @$tokens, $right->[1][0]; |
d695b0ad |
191 | $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
192 | } |
193 | |
194 | $left = [$op => [$left, $right] ]; |
195 | } |
196 | # expression terminator keywords (as they start a new expression) |
197 | elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) { |
198 | my $op = uc $token; |
d695b0ad |
199 | my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); |
01dd4e4f |
200 | $left = $left ? [ $left, [$op => [$right] ]] |
201 | : [ $op => [$right] ]; |
202 | } |
203 | # NOT (last as to allow all other NOT X pieces first) |
204 | elsif ( $token =~ /^ not $/ix ) { |
205 | my $op = uc $token; |
d695b0ad |
206 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
01dd4e4f |
207 | $left = $left ? [ @$left, [$op => [$right] ]] |
208 | : [ $op => [$right] ]; |
209 | |
210 | } |
211 | # literal (eat everything on the right until RHS termination) |
212 | else { |
d695b0ad |
213 | my $right = $self->_recurse_parse ($tokens, PARSE_RHS); |
214 | $left = $left ? [ $left, [LITERAL => [join ' ', $token, $self->unparse($right)||()] ] ] |
215 | : [ LITERAL => [join ' ', $token, $self->unparse($right)||()] ]; |
01dd4e4f |
216 | } |
217 | } |
218 | } |
219 | |
d695b0ad |
220 | sub format_keyword { |
221 | my ($self, $keyword) = @_; |
222 | |
1536de15 |
223 | if (my $around = $self->colormap->{lc $keyword}) { |
d695b0ad |
224 | $keyword = "$around->[0]$keyword$around->[1]"; |
225 | } |
226 | |
227 | return $keyword |
228 | } |
229 | |
a24cc3a0 |
230 | sub whitespace { |
231 | my ($self, $keyword, $depth) = @_; |
e171c446 |
232 | |
233 | my $before = ''; |
1536de15 |
234 | my $after = ' '; |
235 | if (defined $self->indentmap->{lc $keyword}) { |
236 | $before = $self->newline . $self->indent($depth + $self->indentmap->{lc $keyword}); |
a24cc3a0 |
237 | } |
1a67e3a0 |
238 | $before = '' if $depth == 0 and lc $keyword eq 'select'; |
e171c446 |
239 | return [$before, $after]; |
a24cc3a0 |
240 | } |
241 | |
1536de15 |
242 | sub indent { ($_[0]->indent_string||'') x ( ( $_[0]->indent_amount || 0 ) * $_[1] ) } |
a24cc3a0 |
243 | |
0569a14f |
244 | sub _is_select { |
245 | my $tree = shift; |
246 | $tree = $tree->[0] while ref $tree; |
247 | |
ca8aec12 |
248 | defined $tree && lc $tree eq 'select'; |
0569a14f |
249 | } |
250 | |
01dd4e4f |
251 | sub unparse { |
a24cc3a0 |
252 | my ($self, $tree, $depth) = @_; |
253 | |
e171c446 |
254 | $depth ||= 0; |
01dd4e4f |
255 | |
256 | if (not $tree ) { |
257 | return ''; |
258 | } |
a24cc3a0 |
259 | |
260 | my $car = $tree->[0]; |
261 | my $cdr = $tree->[1]; |
262 | |
263 | if (ref $car) { |
e171c446 |
264 | return join ('', map $self->unparse($_, $depth), @$tree); |
01dd4e4f |
265 | } |
a24cc3a0 |
266 | elsif ($car eq 'LITERAL') { |
267 | return $cdr->[0]; |
01dd4e4f |
268 | } |
a24cc3a0 |
269 | elsif ($car eq 'PAREN') { |
e171c446 |
270 | return '(' . |
a24cc3a0 |
271 | join(' ', |
0569a14f |
272 | map $self->unparse($_, $depth + 2), @{$cdr}) . |
1536de15 |
273 | (_is_select($cdr)?( $self->newline||'' ).$self->indent($depth + 1):'') . ')'; |
01dd4e4f |
274 | } |
a24cc3a0 |
275 | elsif ($car eq 'OR' or $car eq 'AND' or (grep { $car =~ /^ $_ $/xi } @binary_op_keywords ) ) { |
e171c446 |
276 | return join (" $car ", map $self->unparse($_, $depth), @{$cdr}); |
01dd4e4f |
277 | } |
278 | else { |
a24cc3a0 |
279 | my ($l, $r) = @{$self->whitespace($car, $depth)}; |
e171c446 |
280 | return sprintf "$l%s %s$r", $self->format_keyword($car), $self->unparse($cdr, $depth); |
01dd4e4f |
281 | } |
282 | } |
283 | |
d695b0ad |
284 | sub format { my $self = shift; $self->unparse($self->parse(@_)) } |
01dd4e4f |
285 | |
286 | 1; |
287 | |
3be357b0 |
288 | =pod |
289 | |
290 | =head1 SYNOPSIS |
291 | |
292 | my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' }); |
293 | |
294 | print $sqla_tree->format('SELECT * FROM foo WHERE foo.a > 2'); |
295 | |
296 | # SELECT * |
297 | # FROM foo |
298 | # WHERE foo.a > 2 |
299 | |