From: Peter Rabbitson Date: Thu, 21 Oct 2010 12:39:16 +0000 (+0000) Subject: Add list support, and various mini-fixes X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=scpubgit%2FQ-Branch.git;a=commitdiff_plain;h=b3b79607321d406a194b2aac205978d925b398c0 Add list support, and various mini-fixes --- diff --git a/Changes b/Changes index 7f620e4..775bf5d 100644 --- a/Changes +++ b/Changes @@ -5,6 +5,9 @@ Revision history for SQL::Abstract - Switch the tokenizer to precompiled regexes (massive speedup) - Rudimentary handling of quotes ( 'WHERE' vs WHERE ) - Fix extended argument parsing by IN/BETWEEN + - Add proper handling of lists (foo,bar,?) + - Better handling of generic -function's during AST construction + - Special handle IS NOT? NULL revision 1.68 2010-09-16 ---------------------------- diff --git a/lib/SQL/Abstract/Test.pm b/lib/SQL/Abstract/Test.pm index 3c01661..42491f9 100644 --- a/lib/SQL/Abstract/Test.pm +++ b/lib/SQL/Abstract/Test.pm @@ -269,18 +269,24 @@ sub _parenthesis_unroll { } # a function binds tighter than a mathop - see if our ancestor is a - # mathop, and our content is a single non-mathop child with a single - # PAREN grandchild which would indicate mathop ( nonmathop ( ... ) ) + # mathop, and our content is: + # a single non-mathop child with a single PAREN grandchild which + # would indicate mathop ( nonmathop ( ... ) ) + # or a single non-mathop with a single LITERAL ( nonmathop ? ) elsif ( @{$child->[1]} == 1 and @{$child->[1][0][1]} == 1 and - $child->[1][0][1][0][0] eq 'PAREN' - and $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() and $child->[1][0][0] !~ SQL::Abstract::Tree::_math_op_re + and + ( + $child->[1][0][1][0][0] eq 'PAREN' + or + $child->[1][0][1][0][0] eq 'LITERAL' + ) ) { push @children, $child->[1][0]; $changes++; diff --git a/lib/SQL/Abstract/Tree.pm b/lib/SQL/Abstract/Tree.pm index 092d88a..7b97b29 100644 --- a/lib/SQL/Abstract/Tree.pm +++ b/lib/SQL/Abstract/Tree.pm @@ -2,6 +2,7 @@ package SQL::Abstract::Tree; use strict; use warnings; +no warnings 'qw'; use Carp; use Hash::Merge qw//; @@ -33,16 +34,9 @@ $merger->specify_behavior({ }, }, 'SQLA::Tree Behavior' ); - -# Parser states for _recurse_parse() -use constant PARSE_TOP_LEVEL => 0; -use constant PARSE_IN_EXPR => 1; -use constant PARSE_IN_PARENS => 2; -use constant PARSE_RHS => 3; -use constant PARSE_IN_FUNC => 4; - my $op_look_ahead = '(?: (?= [\s\)\(\;] ) | \z)'; -my $op_look_behind = '(?: (?<= [\s\)\(] ) | \A )'; +my $op_look_behind = '(?: (?<= [\,\s\)\(] ) | \A )'; + my $quote_left = qr/[\`\'\"\[]/; my $quote_right = qr/[\`\'\"\]]/; @@ -82,8 +76,8 @@ my @expression_start_keywords = ( 'ROW_NUMBER \s* \( \s* \) \s+ OVER', ); -my $exp_start_re = join ("\n\t|\n", @expression_start_keywords ); -$exp_start_re = qr/ $op_look_behind (?i: $exp_start_re ) $op_look_ahead /x; +my $expr_start_re = join ("\n\t|\n", @expression_start_keywords ); +$expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; # These are binary operator keywords always a single LHS and RHS # * AND/OR are handled separately as they are N-ary @@ -106,21 +100,38 @@ sub _math_op_re { $math_re } my $binary_op_re = '(?: NOT \s+)? (?:' . join ('|', qw/IN BETWEEN R?LIKE/) . ')'; -$binary_op_re = "(?: $op_look_behind (?i: $binary_op_re ) $op_look_ahead ) \n\t|\n $math_re"; +$binary_op_re = join "\n\t|\n", + "$op_look_behind (?i: $binary_op_re ) $op_look_ahead", + $math_re, + $op_look_behind . 'IS (?:\s+ NOT)?' . "(?= \\s+ NULL \\b | $op_look_ahead )", +; $binary_op_re = qr/$binary_op_re/x; sub _binary_op_re { $binary_op_re } - -my $tokenizer_re = join("\n\t|\n", - $exp_start_re, +my $all_known_re = join("\n\t|\n", + $expr_start_re, $binary_op_re, "$op_look_behind (?i: AND|OR|NOT ) $op_look_ahead", - (map { quotemeta $_ } qw/( ) ? */), + (map { quotemeta $_ } qw/, ( ) */), ); -#this one *is* capturing -$tokenizer_re = qr/ \s* ( $tokenizer_re ) \s* /x; +$all_known_re = qr/$all_known_re/x; + +#this one *is* capturing for the split below +# splits on whitespace if all else fails +my $tokenizer_re = qr/ \s* ( $all_known_re ) \s* | \s+ /x; + +# Parser states for _recurse_parse() +use constant PARSE_TOP_LEVEL => 0; +use constant PARSE_IN_EXPR => 1; +use constant PARSE_IN_PARENS => 2; +use constant PARSE_IN_FUNC => 3; +use constant PARSE_RHS => 4; + +my $expr_term_re = qr/ ^ (?: $expr_start_re | \) ) $/x; +my $rhs_term_re = qr/ ^ (?: $expr_term_re | $binary_op_re | (?i: AND | OR | NOT | \, ) ) $/x; +my $func_start_re = qr/^ (?: \? | \$\d+ | \( ) $/x; my %indents = ( select => 0, @@ -230,11 +241,15 @@ sub parse { # tokenize string, and remove all optional whitespace my $tokens = []; foreach my $token (split $tokenizer_re, $s) { - push @$tokens, $token if (length $token) && ($token =~ /\S/); + push @$tokens, $token if ( + defined $token + and + length $token + and + $token =~ /\S/ + ); } - - my $tree = $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); - return $tree; + $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); } sub _recurse_parse { @@ -248,11 +263,11 @@ sub _recurse_parse { or ($state == PARSE_IN_PARENS && $lookahead eq ')') or - ($state == PARSE_IN_EXPR && $lookahead =~ qr/ ^ (?: $exp_start_re | \) ) $ /x ) + ($state == PARSE_IN_EXPR && $lookahead =~ $expr_term_re ) or - ($state == PARSE_RHS && $lookahead =~ qr/ ^ (?: $exp_start_re | $binary_op_re | (?i: AND | OR | NOT ) | \) ) $ /x ) + ($state == PARSE_RHS && $lookahead =~ $rhs_term_re ) or - ($state == PARSE_IN_FUNC && $lookahead ne '(') + ($state == PARSE_IN_FUNC && $lookahead !~ $func_start_re) # if there are multiple values - the parenthesis will switch the $state ) { return $left||(); } @@ -268,17 +283,18 @@ sub _recurse_parse { $left = $left ? [$left, [PAREN => [$right||()] ]] : [PAREN => [$right||()] ]; } - # AND/OR - elsif ($token =~ /^ (?: OR | AND ) $/xi ) { - my $op = uc $token; + # AND/OR and LIST (,) + elsif ($token =~ /^ (?: OR | AND | \, ) $/xi ) { + my $op = ($token eq ',') ? 'LIST' : uc $token; + my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); # Merge chunks if logic matches if (ref $right and $op eq $right->[0]) { - $left = [ (shift @$right ), [$left, map { @$_ } @$right] ]; + $left = [ (shift @$right ), [$left||(), map { @$_ } @$right] ]; } else { - $left = [$op => [$left, $right]]; + $left = [$op => [ $left||(), $right||() ]]; } } # binary operator keywords @@ -296,7 +312,7 @@ sub _recurse_parse { $left = [$op => [$left, $right] ]; } # expression terminator keywords (as they start a new expression) - elsif ( $token =~ / ^ $exp_start_re $ /x ) { + elsif ( $token =~ / ^ $expr_start_re $ /x ) { my $op = uc $token; my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); $left = $left ? [ $left, [$op => [$right] ]] @@ -310,18 +326,21 @@ sub _recurse_parse { : [ $op => [$right] ]; } - # generic function - elsif (@$tokens && $tokens->[0] eq '(') { - my $right = $self->_recurse_parse($tokens, PARSE_IN_FUNC); - - $left = $left ? [ $left, [ $token => [$right||()] ]] - : [ $token => [$right||()] ]; - } - # literal (eat everything on the right until RHS termination) + # we're now in "unknown token" land - start eating tokens until + # we see something familiar else { - my $right = $self->_recurse_parse ($tokens, PARSE_RHS); - $left = $left ? [ $left, [LITERAL => [join ' ', $token, $self->unparse($right)||()] ] ] - : [ LITERAL => [join ' ', $token, $self->unparse($right)||()] ]; + my $right; + + # check if the current token is an unknown op-start + if (@$tokens and $tokens->[0] =~ $func_start_re) { + $right = [ $token => [ $self->_recurse_parse($tokens, PARSE_IN_FUNC) || () ] ]; + } + else { + $right = [ LITERAL => [ $token ] ]; + } + + $left = $left ? [ $left, $right ] + : $right; } } } @@ -395,7 +414,7 @@ sub unparse { } if (ref $car) { - return join ('', map $self->unparse($_, $bindargs, $depth), @$tree); + return join (' ', map $self->unparse($_, $bindargs, $depth), @$tree); } elsif ($car eq 'LITERAL') { if ($cdr->[0] eq '?') { @@ -412,6 +431,9 @@ sub unparse { elsif ($car eq 'AND' or $car eq 'OR' or $car =~ / ^ $binary_op_re $ /x ) { return join (" $car ", map $self->unparse($_, $bindargs, $depth), @{$cdr}); } + elsif ($car eq 'LIST' ) { + return join (', ', map $self->unparse($_, $bindargs, $depth), @{$cdr}); + } else { my ($l, $r) = @{$self->pad_keyword($car, $depth)}; return sprintf "$l%s %s$r", $self->format_keyword($car), $self->unparse($cdr, $bindargs, $depth); diff --git a/t/05in_between.t b/t/05in_between.t index a40416c..79f8c29 100644 --- a/t/05in_between.t +++ b/t/05in_between.t @@ -75,7 +75,7 @@ my @in_between_tests = ( ] }, }, stmt => "WHERE ( - ( start0 BETWEEN ? AND upper ? ) + ( start0 BETWEEN ? AND UPPER ? ) AND ( start1 BETWEEN ? AND ? ) AND ( start2 BETWEEN lower(x) AND upper(y) ) AND ( start3 BETWEEN lower(x) AND upper(?) ) diff --git a/t/10test.t b/t/10test.t index d83dd65..aa140d4 100644 --- a/t/10test.t +++ b/t/10test.t @@ -282,6 +282,17 @@ my @sql_tests = ( ] }, + # IS NULL (special LHS-only op) + { + equal => 1, + statements => [ + q/WHERE a IS NOT NULL AND b IS NULL/, + q/WHERE (a IS NOT NULL) AND b IS NULL/, + q/WHERE a IS NOT NULL AND (b IS NULL)/, + q/WHERE (a IS NOT NULL) AND ((b IS NULL))/, + ], + }, + # JOIN condition - equal { equal => 1, @@ -614,7 +625,7 @@ my @sql_tests = ( 'SELECT count(1) FROM foo', ] }, - # func + # misc func { equal => 1, statements => [ @@ -632,6 +643,33 @@ my @sql_tests = ( 'SELECT foo FROM bar ()', ] }, + # single ? of unknown funcs can unroll + # (think ...LIKE ?...) + { + equal => 1, + statements => [ + 'SELECT foo FROM bar WHERE bar > foo ?', + 'SELECT foo FROM bar WHERE bar > foo (?)', + 'SELECT foo FROM bar WHERE bar > foo( ? )', + ] + }, + { + equal => 1, + statements => [ + 'SELECT foo FROM bar WHERE bar > (foo ?)', + 'SELECT foo FROM bar WHERE bar > (foo( ? ))', + 'SELECT foo FROM bar WHERE bar > (( foo (?) ))', + ] + }, + { + equal => 1, + statements => [ + 'SELECT foo FROM bar WHERE bar foo ?', + 'SELECT foo FROM bar WHERE bar foo (?)', + 'SELECT foo FROM bar WHERE bar foo( (?))', + ] + }, + # not so about multival { equal => 0, statements => [ @@ -888,13 +926,13 @@ for my $test (@sql_tests) { if ($equal ^ $test->{equal}) { my ($ast1, $ast2) = map { SQL::Abstract::Test::parse ($_) } ($sql1, $sql2); - $_ = Dumper $_ for ($ast1, $ast2); - diag("sql1: $sql1"); - diag("sql2: $sql2"); - note("ast1: $ast1"); - note("ast2: $ast2"); + diag "sql1: $sql1"; + diag "sql2: $sql2"; + note $SQL::Abstract::Test::sql_differ; + note "ast1: $ast1"; + note "ast2: $ast2"; } } }