X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTree.pm;h=d490a83b19204fa69c4f71cd35da8116fe2b76c1;hb=6c4d8eb8d2b9a5deb84400fd35f72c23818f442e;hp=f6076ba3881988c6f4ae957097bd23966c76f4dd;hpb=1c33db5d0ccacdfc4d6028ec596b0f34a045471c;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract/Tree.pm b/lib/SQL/Abstract/Tree.pm index f6076ba..d490a83 100644 --- a/lib/SQL/Abstract/Tree.pm +++ b/lib/SQL/Abstract/Tree.pm @@ -50,10 +50,10 @@ my $placeholder_re = qr/(?: \? | \$\d+ )/x; my @expression_start_keywords = ( 'SELECT', 'UPDATE', + 'SET', 'INSERT \s+ INTO', 'DELETE \s+ FROM', 'FROM', - 'SET', '(?: (?: (?: (?: LEFT | RIGHT | FULL ) \s+ )? @@ -64,7 +64,7 @@ my @expression_start_keywords = ( 'ON', 'WHERE', '(?: DEFAULT \s+ )? VALUES', - 'EXISTS', + '(?: NOT \s+)? EXISTS', 'GROUP \s+ BY', 'HAVING', 'ORDER \s+ BY', @@ -95,6 +95,7 @@ $expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; # * BETWEEN without paranthesis around the ANDed arguments (which # makes it a non-binary op) is detected and accomodated in # _recurse_parse() +# * AS is not really an operator but is handled here as it's also LHS/RHS # this will be included in the $binary_op_re, the distinction is interesting during # testing as one is tighter than the other, plus mathops have different look @@ -111,7 +112,7 @@ sub _math_op_re { $math_re } my $binary_op_re = '(?: NOT \s+)? (?:' . join ('|', qw/IN BETWEEN R?LIKE/) . ')'; $binary_op_re = join "\n\t|\n", - "$op_look_behind (?i: $binary_op_re ) $op_look_ahead", + "$op_look_behind (?i: $binary_op_re | AS ) $op_look_ahead", $math_re, $op_look_behind . 'IS (?:\s+ NOT)?' . "(?= \\s+ NULL \\b | $op_look_ahead )", ; @@ -122,8 +123,8 @@ sub _binary_op_re { $binary_op_re } my $all_known_re = join("\n\t|\n", $expr_start_re, $binary_op_re, - "$op_look_behind (?i: AND|OR|NOT ) $op_look_ahead", - (map { quotemeta $_ } qw/, ( ) */), + "$op_look_behind (?i: AND|OR|NOT|\\* ) $op_look_ahead", + (map { quotemeta $_ } qw/, ( )/), $placeholder_re, ); @@ -188,7 +189,7 @@ my %profiles = ( my $magenta = [$c->('magenta'), $c->('reset')]; my $b_o_w = [$c->('black on_white'), $c->('reset')]; ( - placeholder_surround => [q(') . $c->('black on_magenta'), $c->('reset') . q(')], + placeholder_surround => [$c->('black on_magenta'), $c->('reset')], colormap => { 'begin work' => $b_o_w, commit => $b_o_w, @@ -309,6 +310,10 @@ sub parse { $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); } +{ +# this is temporary, lists can be parsed *without* recursing, but +# it requires a massive rewrite of the AST generator +no warnings qw/recursion/; sub _recurse_parse { my ($self, $tokens, $state) = @_; @@ -344,14 +349,14 @@ sub _recurse_parse { elsif ($token =~ /^ (?: OR | AND | \, ) $/xi ) { my $op = ($token eq ',') ? 'LIST' : uc $token; - my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); + my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR) || []; # Merge chunks if logic matches - if (ref $right and $op eq $right->[0]) { - $left = [ (shift @$right ), [$left||(), map { @$_ } @$right] ]; + if (ref $right and @$right and $op eq $right->[0]) { + $left = [ (shift @$right ), [$left||[], map { @$_ } @$right] ]; } else { - $left = [$op => [ $left||(), $right||() ]]; + $left = [$op => [ $left||[], $right ]]; } } # binary operator keywords @@ -379,8 +384,8 @@ sub _recurse_parse { elsif ( $token =~ /^ NOT $/ix ) { my $op = uc $token; my $right = $self->_recurse_parse ($tokens, PARSE_RHS); - $left = $left ? [ @$left, [$op => [$right] ]] - : [ $op => [$right] ]; + $left = $left ? [ @$left, [$op => [$right||()] ]] + : [ $op => [$right||()] ]; } elsif ( $token =~ $placeholder_re) { @@ -405,6 +410,7 @@ sub _recurse_parse { } } } +} sub format_keyword { my ($self, $keyword) = @_; @@ -448,9 +454,11 @@ sub fill_in_placeholder { if ($self->fill_in_placeholders) { my $val = shift @{$bindargs} || ''; + my $quoted = $val =~ s/^(['"])(.*)\1$/$2/; my ($left, $right) = @{$self->placeholder_surround}; $val =~ s/\\/\\\\/g; $val =~ s/'/\\'/g; + $val = qq('$val') if $quoted; return qq($left$val$right) } return '?' @@ -469,6 +477,7 @@ sub _unparse { return ''; } + $self->_parenthesis_unroll($tree); my ($car, $cdr) = @{$tree}[0,1]; if (! defined $car or (! ref $car and ! defined $cdr) ) { @@ -488,7 +497,7 @@ sub _unparse { return $self->fill_in_placeholder($bindargs); } elsif ($car eq 'PAREN') { - return sprintf ('(%s)', + return sprintf ('( %s )', join (' ', map { $self->_unparse($_, $bindargs, $depth + 2) } @{$cdr} ) . ($self->_is_key($cdr) @@ -505,16 +514,161 @@ sub _unparse { } else { my ($l, $r) = @{$self->pad_keyword($car, $depth)}; - return sprintf "$l%s %s$r", $self->format_keyword($car), $self->_unparse($cdr, $bindargs, $depth); + + return sprintf "$l%s%s%s$r", + $self->format_keyword($car), + ( ref $cdr eq 'ARRAY' and ref $cdr->[0] eq 'ARRAY' and $cdr->[0][0] and $cdr->[0][0] eq 'PAREN' ) + ? '' # mysql-- + : ' ' + , + $self->_unparse($cdr, $bindargs, $depth), + ; } } +# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics +my @unrollable_ops = ( + 'ON', + 'WHERE', + 'GROUP \s+ BY', + 'HAVING', + 'ORDER \s+ BY', + 'I?LIKE', +); +my $unrollable_ops_re = join ' | ', @unrollable_ops; +$unrollable_ops_re = qr/$unrollable_ops_re/xi; + +sub _parenthesis_unroll { + my $self = shift; + my $ast = shift; + + #return if $self->parenthesis_significant; + return unless (ref $ast and ref $ast->[1]); + + my $changes; + do { + my @children; + $changes = 0; + + for my $child (@{$ast->[1]}) { + # the current node in this loop is *always* a PAREN + if (! ref $child or ! @$child or $child->[0] ne 'PAREN') { + push @children, $child; + next; + } + + # unroll nested parenthesis + while ( @{$child->[1]} && $child->[1][0][0] eq 'PAREN') { + $child = $child->[1][0]; + $changes++; + } + + # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list + if ( + ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') + and + $child->[1][0][0] eq $ast->[0] + ) { + push @children, @{$child->[1][0][1]}; + $changes++; + } + + # if the parent operator explcitly allows it nuke the parenthesis + elsif ( $ast->[0] =~ $unrollable_ops_re ) { + push @children, $child->[1][0]; + $changes++; + } + + # only *ONE* LITERAL or placeholder element + # as an AND/OR/NOT argument + elsif ( + @{$child->[1]} == 1 && ( + $child->[1][0][0] eq 'LITERAL' + or + $child->[1][0][0] eq 'PLACEHOLDER' + ) && ( + $ast->[0] eq 'AND' or $ast->[0] eq 'OR' or $ast->[0] eq 'NOT' + ) + ) { + push @children, $child->[1][0]; + $changes++; + } + + # only one element in the parenthesis which is a binary op + # and has exactly two grandchildren + # the only time when we can *not* unroll this is when both + # the parent and the child are mathops (in which case we'll + # break precedence) or when the child is BETWEEN (special + # case) + elsif ( + @{$child->[1]} == 1 + and + $child->[1][0][0] =~ SQL::Abstract::Tree::_binary_op_re() + and + $child->[1][0][0] ne 'BETWEEN' + and + @{$child->[1][0][1]} == 2 + and + ! ( + $child->[1][0][0] =~ SQL::Abstract::Tree::_math_op_re() + and + $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() + ) + ) { + push @children, $child->[1][0]; + $changes++; + } + + # a function binds tighter than a mathop - see if our ancestor is a + # mathop, and our content is: + # a single non-mathop child with a single PAREN grandchild which + # would indicate mathop ( nonmathop ( ... ) ) + # or a single non-mathop with a single LITERAL ( nonmathop foo ) + # or a single non-mathop with a single PLACEHOLDER ( nonmathop ? ) + elsif ( + @{$child->[1]} == 1 + and + @{$child->[1][0][1]} == 1 + and + $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() + and + $child->[1][0][0] !~ SQL::Abstract::Tree::_math_op_re + and + ( + $child->[1][0][1][0][0] eq 'PAREN' + or + $child->[1][0][1][0][0] eq 'LITERAL' + or + $child->[1][0][1][0][0] eq 'PLACEHOLDER' + ) + ) { + push @children, $child->[1][0]; + $changes++; + } + + + # otherwise no more mucking for this pass + else { + push @children, $child; + } + } + + $ast->[1] = \@children; + + } while ($changes); + +} + sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } 1; =pod +=head1 NAME + +SQL::Abstract::Tree - Represent SQL as an AST + =head1 SYNOPSIS my $sqla_tree = SQL::Abstract::Tree->new({ profile => 'console' });