From: Peter Rabbitson Date: Sat, 6 Aug 2011 20:22:28 +0000 (+0200) Subject: Massively refactor arbitrary sql parser code X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=6f2a5b668d6d34e8aee21c2b0cf51fdbf5dee991;p=scpubgit%2FQ-Branch.git Massively refactor arbitrary sql parser code Now it deals much better with malformedness, does not recurse to insanely deep evels, and reports much much better errors on test mismatches --- diff --git a/Changes b/Changes index e2463f6..c638bb4 100644 --- a/Changes +++ b/Changes @@ -4,6 +4,7 @@ Revision history for SQL::Abstract - Fix over-eager parenthesis unrolling - Fix deep recursion warnings while parsing obnoxiously long sql statements - Fix incorrect comparison of malformed lists + - Fix incorrect reporting of mismatch-members in SQLA::Test revision 1.72 2010-12-21 ---------------------------- diff --git a/Makefile.PL b/Makefile.PL index 0f430c2..d738560 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -20,7 +20,6 @@ requires 'Getopt::Long::Descriptive' => 0.086; requires 'Hash::Merge' => 0.12; test_requires "Test::More" => 0.92; -test_requires "Test::Deep" => 0.106; test_requires "Test::Exception" => 0; test_requires "Test::Warn" => 0; test_requires "Storable" => 0; # for cloning in tests diff --git a/lib/SQL/Abstract/Test.pm b/lib/SQL/Abstract/Test.pm index 2485006..2bc681e 100644 --- a/lib/SQL/Abstract/Test.pm +++ b/lib/SQL/Abstract/Test.pm @@ -118,6 +118,7 @@ sub eq_sql { my $tree1 = $sqlat->parse($sql1); my $tree2 = $sqlat->parse($sql2); + undef $sql_differ; return 1 if _eq_sql($tree1, $tree2); } @@ -126,59 +127,80 @@ sub _eq_sql { # one is defined the other not if ( (defined $left) xor (defined $right) ) { + $sql_differ = sprintf ("[%s] != [%s]\n", map { defined $_ ? $sqlat->unparse ($_) : 'N/A' } ($left, $right) ); return 0; } + # one is undefined, then so is the other elsif (not defined $left) { return 1; } - # different amount of elements - elsif (@$left != @$right) { - return 0; - } - # one is empty - so is the other - elsif (@$left == 0) { + + # both are empty + elsif (@$left == 0 and @$right == 0) { return 1; } + + # one is empty + if (@$left == 0 or @$right == 0) { + $sql_differ = sprintf ("left: %s\nright: %s\n", map { @$_ ? $sqlat->unparse ($_) : 'N/A'} ($left, $right) ); + return 0; + } + # one is a list, the other is an op with a list elsif (ref $left->[0] xor ref $right->[0]) { - $sql_differ = sprintf ("left: %s\nright: %s\n", map { $sqlat->unparse ($_) } ($left, $right) ); + $sql_differ = sprintf ("[%s] != [%s]\nleft: %s\nright: %s\n", map + { ref $_ ? $sqlat->unparse ($_) : $_ } + ($left->[0], $right->[0], $left, $right) + ); return 0; } - # one is a list, so is the other + + # both are lists elsif (ref $left->[0]) { for (my $i = 0; $i <= $#$left or $i <= $#$right; $i++ ) { - return 0 if (not _eq_sql ($left->[$i], $right->[$i]) ); + if (not _eq_sql ($left->[$i], $right->[$i]) ) { + if (! $sql_differ or $sql_differ !~ /left\:\s .+ right:\s/xs) { + $sql_differ ||= ''; + $sql_differ .= "\n" unless $sql_differ =~ /\n\z/; + $sql_differ .= sprintf ("left: %s\nright: %s\n", map { $sqlat->unparse ($_) } ($left, $right) ); + } + return 0; + } } return 1; } - # both are an op-list combo + + # both are ops else { # unroll parenthesis if possible/allowed - $parenthesis_significant || $sqlat->_parenthesis_unroll($_) for $left, $right; + unless ( $parenthesis_significant ) { + $sqlat->_parenthesis_unroll($_) for $left, $right; + } - # if operators are different if ( $left->[0] ne $right->[0] ) { $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n", $sqlat->unparse($left), - $sqlat->unparse($right); + $sqlat->unparse($right) + ; return 0; } - # elsif operators are identical, compare operands + + # literals have a different arg-sig + elsif ($left->[0] eq '-LITERAL') { + (my $l = " $left->[1][0] " ) =~ s/\s+/ /g; + (my $r = " $right->[1][0] ") =~ s/\s+/ /g; + my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r); + $sql_differ = "[$l] != [$r]\n" if not $eq; + return $eq; + } + + # if operators are identical, compare operands else { - if ($left->[0] eq 'LITERAL' ) { # unary - (my $l = " $left->[1][0] " ) =~ s/\s+/ /g; - (my $r = " $right->[1][0] ") =~ s/\s+/ /g; - my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r); - $sql_differ = "[$l] != [$r]\n" if not $eq; - return $eq; - } - else { - my $eq = _eq_sql($left->[1], $right->[1]); - $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { $sqlat->unparse ($_) } ($left, $right) ) if not $eq; - return $eq; - } + my $eq = _eq_sql($left->[1], $right->[1]); + $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { $sqlat->unparse ($_) } ($left, $right) ) if not $eq; + return $eq; } } } diff --git a/lib/SQL/Abstract/Tree.pm b/lib/SQL/Abstract/Tree.pm index 1a4560c..c4c9cdc 100644 --- a/lib/SQL/Abstract/Tree.pm +++ b/lib/SQL/Abstract/Tree.pm @@ -64,7 +64,6 @@ my @expression_start_keywords = ( 'ON', 'WHERE', '(?: DEFAULT \s+ )? VALUES', - '(?: NOT \s+)? EXISTS', 'GROUP \s+ BY', 'HAVING', 'ORDER \s+ BY', @@ -120,10 +119,19 @@ $binary_op_re = qr/$binary_op_re/x; sub _binary_op_re { $binary_op_re } +my $unary_op_re = '(?: NOT \s+ EXISTS | NOT )'; +$unary_op_re = join "\n\t|\n", + "$op_look_behind (?i: $unary_op_re ) $op_look_ahead", +; +$unary_op_re = qr/$unary_op_re/x; + +sub _unary_op_re { $unary_op_re } + my $all_known_re = join("\n\t|\n", $expr_start_re, $binary_op_re, - "$op_look_behind (?i: AND|OR|NOT|\\* ) $op_look_ahead", + $unary_op_re, + "$op_look_behind (?i: AND|OR|\\* ) $op_look_ahead", (map { quotemeta $_ } qw/, ( )/), $placeholder_re, ); @@ -140,10 +148,14 @@ use constant PARSE_IN_EXPR => 1; use constant PARSE_IN_PARENS => 2; use constant PARSE_IN_FUNC => 3; use constant PARSE_RHS => 4; +use constant PARSE_LIST_ELT => 5; +my $asc_desc_re = qr/^ (?: ASC | DESC ) $/xi; my $expr_term_re = qr/ ^ (?: $expr_start_re | \) ) $/x; -my $rhs_term_re = qr/ ^ (?: $expr_term_re | $binary_op_re | (?i: AND | OR | NOT | \, ) ) $/x; -my $func_start_re = qr/^ (?: \* | $placeholder_re | \( ) $/x; +my $rhs_term_re = qr/ ^ (?: $expr_term_re | $binary_op_re | $unary_op_re | $asc_desc_re | (?i: AND | OR | \, ) ) $/x; +my $common_single_args_re = qr/ ^ (?: \* | $placeholder_re ) $/x; +my $all_std_keywords_re = qr/^ (?: $rhs_term_re | \( | $common_single_args_re ) $/x; + my %indents = ( select => 0, @@ -307,110 +319,155 @@ sub parse { $token =~ /\S/ ); } - $self->_recurse_parse($tokens, PARSE_TOP_LEVEL); + + return [ $self->_recurse_parse($tokens, PARSE_TOP_LEVEL) ]; } -{ -# this is temporary, lists can be parsed *without* recursing, but -# it requires a massive rewrite of the AST generator -no warnings qw/recursion/; sub _recurse_parse { my ($self, $tokens, $state) = @_; - my $left; + my @left; while (1) { # left-associative parsing - my $lookahead = $tokens->[0]; - if ( not defined($lookahead) + if ( ! @$tokens or - ($state == PARSE_IN_PARENS && $lookahead eq ')') + ($state == PARSE_IN_PARENS && $tokens->[0] eq ')') or - ($state == PARSE_IN_EXPR && $lookahead =~ $expr_term_re ) + ($state == PARSE_IN_EXPR && $tokens->[0] =~ $expr_term_re ) or - ($state == PARSE_RHS && $lookahead =~ $rhs_term_re ) + ($state == PARSE_RHS && $tokens->[0] =~ $rhs_term_re ) or - ($state == PARSE_IN_FUNC && $lookahead !~ $func_start_re) # if there are multiple values - the parenthesis will switch the $state + ($state == PARSE_LIST_ELT && $tokens->[0] =~ qr/^ (?: $expr_term_re | \, ) $/x) ) { - return $left||(); + return @left; } my $token = shift @$tokens; # nested expression in () if ($token eq '(' ) { - my $right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); - $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse($right); - $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse($right); + my @right = $self->_recurse_parse($tokens, PARSE_IN_PARENS); + $token = shift @$tokens or croak "missing closing ')' around block " . $self->unparse(\@right); + $token eq ')' or croak "unexpected token '$token' terminating block " . $self->unparse(\@right); + + push @left, [ '-PAREN' => \@right ]; + } + + # AND/OR + elsif ($token =~ /^ (?: OR | AND ) $/ix ) { + my $op = uc $token; - $left = $left ? [$left, [PAREN => [$right||()] ]] - : [PAREN => [$right||()] ]; + my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); + + # Merge chunks if "logic" matches + @left = [ $op => [ @left, (@right and $op eq $right[0][0]) + ? @{ $right[0][1] } + : @right + ] ]; } - # AND/OR and LIST (,) - elsif ($token =~ /^ (?: OR | AND | \, ) $/xi ) { - my $op = ($token eq ',') ? 'LIST' : uc $token; - my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR) || []; + # LIST (,) + elsif ($token eq ',') { + + my @right = $self->_recurse_parse($tokens, PARSE_LIST_ELT); + + # deal with malformed lists ( foo, bar, , baz ) + @right = [] unless @right; - # Merge chunks if logic matches - if (ref $right and @$right and $op eq $right->[0]) { - $left = [ (shift @$right ), [$left||[], map { @$_ } @$right] ]; + @right = [ -MISC => [ @right ] ] if @right > 1; + + if (!@left) { + @left = [ -LIST => [ [], @right ] ]; + } + elsif ($left[0][0] eq '-LIST') { + push @{$left[0][1]}, (@{$right[0]} and $right[0][0] eq '-LIST') + ? @{$right[0][1]} + : @right + ; } else { - $left = [$op => [ $left||[], $right ]]; + @left = [ -LIST => [ @left, @right ] ]; } } + # binary operator keywords - elsif ( $token =~ /^ $binary_op_re $ /x ) { + elsif ($token =~ $binary_op_re) { my $op = uc $token; - my $right = $self->_recurse_parse($tokens, PARSE_RHS); + + my @right = $self->_recurse_parse($tokens, PARSE_RHS); # A between with a simple LITERAL for a 1st RHS argument needs a # rerun of the search to (hopefully) find the proper AND construct - if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') { - unshift @$tokens, $right->[1][0]; - $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); + if ($op eq 'BETWEEN' and $right[0] eq '-LITERAL') { + unshift @$tokens, $right[1][0]; + @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); } - $left = [$op => [$left, $right] ]; + @left = [$op => [ @left, @right ]]; } - # expression terminator keywords (as they start a new expression) - elsif ( $token =~ / ^ $expr_start_re $ /x ) { + + # unary op keywords + elsif ( $token =~ $unary_op_re ) { my $op = uc $token; - my $right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); - $left = $left ? [ $left, [$op => [$right||()] ]] - : [ $op => [$right||()] ]; + my @right = $self->_recurse_parse ($tokens, PARSE_RHS); + + push @left, [ $op => \@right ]; } - # NOT - elsif ( $token =~ /^ NOT $/ix ) { + + # expression terminator keywords + elsif ( $token =~ $expr_start_re ) { my $op = uc $token; - my $right = $self->_recurse_parse ($tokens, PARSE_RHS); - $left = $left ? [ @$left, [$op => [$right||()] ]] - : [ $op => [$right||()] ]; + my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); + push @left, [ $op => \@right ]; } + + # a '?' elsif ( $token =~ $placeholder_re) { - $left = $left ? [ $left, [ PLACEHOLDER => [ $token ] ] ] - : [ PLACEHOLDER => [ $token ] ]; + push @left, [ -PLACEHOLDER => [ $token ] ]; + } + + # check if the current token is an unknown op-start + elsif (@$tokens and $tokens->[0] =~ qr/^ (?: \( | $common_single_args_re ) $/x ) { + push @left, [ $token => [ $self->_recurse_parse($tokens, PARSE_RHS) ] ]; } + # we're now in "unknown token" land - start eating tokens until # we see something familiar else { - my $right; + my @lits = [ -LITERAL => [$token] ]; - # check if the current token is an unknown op-start - if (@$tokens and $tokens->[0] =~ $func_start_re) { - $right = [ $token => [ $self->_recurse_parse($tokens, PARSE_IN_FUNC) || () ] ]; - } - else { - $right = [ LITERAL => [ $token ] ]; - } + while (@$tokens and $tokens->[0] !~ $all_std_keywords_re) { + push @lits, [ -LITERAL => [ shift @$tokens ] ]; + } + + if (@left == 1) { + unshift @lits, pop @left; + } + + @lits = [ -MISC => [ @lits ] ] if @lits > 1; + + push @left, @lits; + } - $left = $left ? [ $left, $right ] - : $right; + # deal with post-fix operators (only when sql is sane - i.e. we have one element to apply to) + if (@left == 1 and @$tokens) { + + # asc/desc + if ($tokens->[0] =~ $asc_desc_re) { + my $op = shift @$tokens; + + # if -MISC - this is a literal collection, do not promote asc/desc to an op + if ($left[0][0] eq '-MISC') { + push @{$left[0][1]}, [ -LITERAL => [ $op ] ]; + } + else { + @left = [ ('-' . uc ($op)) => [ @left ] ]; + } + } } } } -} sub format_keyword { my ($self, $keyword) = @_; @@ -480,50 +537,52 @@ sub _unparse { # FIXME - needs a config switch to disable $self->_parenthesis_unroll($tree); - my ($car, $cdr) = @{$tree}[0,1]; + my ($op, $args) = @{$tree}[0,1]; - if (! defined $car or (! ref $car and ! defined $cdr) ) { + if (! defined $op or (! ref $op and ! defined $args) ) { require Data::Dumper; Carp::confess( sprintf ( "Internal error - malformed branch at depth $depth:\n%s", Data::Dumper::Dumper($tree) ) ); } - if (ref $car) { + if (ref $op) { return join (' ', map $self->_unparse($_, $bindargs, $depth), @$tree); } - elsif ($car eq 'LITERAL') { - return $cdr->[0]; + elsif ($op eq '-LITERAL') { # literal has different sig + return $args->[0]; } - elsif ($car eq 'PLACEHOLDER') { + elsif ($op eq '-PLACEHOLDER') { return $self->fill_in_placeholder($bindargs); } - elsif ($car eq 'PAREN') { + elsif ($op eq '-PAREN') { return sprintf ('( %s )', - join (' ', map { $self->_unparse($_, $bindargs, $depth + 2) } @{$cdr} ) + join (' ', map { $self->_unparse($_, $bindargs, $depth + 2) } @{$args} ) . - ($self->_is_key($cdr) + ($self->_is_key($args) ? ( $self->newline||'' ) . $self->indent($depth + 1) : '' ) ); } - elsif ($car eq 'AND' or $car eq 'OR' or $car =~ / ^ $binary_op_re $ /x ) { - return join (" $car ", map $self->_unparse($_, $bindargs, $depth), @{$cdr}); + elsif ($op eq 'AND' or $op eq 'OR' or $op =~ / ^ $binary_op_re $ /x ) { + return join (" $op ", map $self->_unparse($_, $bindargs, $depth), @{$args}); + } + elsif ($op eq '-LIST' ) { + return join (', ', map $self->_unparse($_, $bindargs, $depth), @{$args}); } - elsif ($car eq 'LIST' ) { - return join (', ', map $self->_unparse($_, $bindargs, $depth), @{$cdr}); + elsif ($op eq '-MISC' ) { + return join (' ', map $self->_unparse($_, $bindargs, $depth), @{$args}); } else { - my ($l, $r) = @{$self->pad_keyword($car, $depth)}; - + my ($l, $r) = @{$self->pad_keyword($op, $depth)}; return sprintf "$l%s%s%s$r", - $self->format_keyword($car), - ( ref $cdr eq 'ARRAY' and ref $cdr->[0] eq 'ARRAY' and $cdr->[0][0] and $cdr->[0][0] eq 'PAREN' ) + $self->format_keyword($op), + ( ref $args eq 'ARRAY' and @{$args} == 1 and $args->[0][0] eq '-PAREN' ) ? '' # mysql-- : ' ' , - $self->_unparse($cdr, $bindargs, $depth), + $self->_unparse($args, $bindargs, $depth), ; } } @@ -554,45 +613,47 @@ sub _parenthesis_unroll { for my $child (@{$ast->[1]}) { # the current node in this loop is *always* a PAREN - if (! ref $child or ! @$child or $child->[0] ne 'PAREN') { + if (! ref $child or ! @$child or $child->[0] ne '-PAREN') { push @children, $child; next; } # unroll nested parenthesis - while ( @{$child->[1]} && $child->[1][0][0] eq 'PAREN') { + while ( @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { $child = $child->[1][0]; $changes++; } + # if the parent operator explcitly allows it nuke the parenthesis + if ( $ast->[0] =~ $unrollable_ops_re ) { + push @children, @{$child->[1]}; + $changes++; + } + # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list - if ( + elsif ( + @{$child->[1]} == 1 + and ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') and - $child->[1][0][0] eq $ast->[0] + $child->[1][0][0] eq $ast->[0] ) { push @children, @{$child->[1][0][1]}; $changes++; } - # if the parent operator explcitly allows it nuke the parenthesis - elsif ( $ast->[0] =~ $unrollable_ops_re ) { - push @children, $child->[1][0]; - $changes++; - } - # only *ONE* LITERAL or placeholder element # as an AND/OR/NOT argument elsif ( @{$child->[1]} == 1 && ( - $child->[1][0][0] eq 'LITERAL' + $child->[1][0][0] eq '-LITERAL' or - $child->[1][0][0] eq 'PLACEHOLDER' + $child->[1][0][0] eq '-PLACEHOLDER' ) && ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR' or $ast->[0] eq 'NOT' ) ) { - push @children, $child->[1][0]; + push @children, @{$child->[1]}; $changes++; } @@ -619,7 +680,7 @@ sub _parenthesis_unroll { $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() ) ) { - push @children, $child->[1][0]; + push @children, @{$child->[1]}; $changes++; } @@ -639,14 +700,14 @@ sub _parenthesis_unroll { $child->[1][0][0] !~ SQL::Abstract::Tree::_math_op_re and ( - $child->[1][0][1][0][0] eq 'PAREN' + $child->[1][0][1][0][0] eq '-PAREN' or - $child->[1][0][1][0][0] eq 'LITERAL' + $child->[1][0][1][0][0] eq '-LITERAL' or - $child->[1][0][1][0][0] eq 'PLACEHOLDER' + $child->[1][0][1][0][0] eq '-PLACEHOLDER' ) ) { - push @children, $child->[1][0]; + push @children, @{$child->[1]}; $changes++; } @@ -660,7 +721,6 @@ sub _parenthesis_unroll { $ast->[1] = \@children; } while ($changes); - } sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } diff --git a/t/10test.t b/t/10test.t index b5d4829..9c69343 100644 --- a/t/10test.t +++ b/t/10test.t @@ -704,6 +704,14 @@ my @sql_tests = ( 'SELECT * FROM foo WHERE bar = (baz( buzz ))', ] }, + # oddballs + { + equal => 1, + statements => [ + 'WHERE ( foo GLOB ? )', + 'WHERE foo GLOB ?', + ], + } ); my @bind_tests = ( @@ -909,7 +917,7 @@ plan tests => 1 + map { scalar @{$_->{bindvals}} } @bind_tests ) + - 3; + 9; use_ok('SQL::Abstract::Test', import => [qw( eq_sql_bind eq_sql eq_bind is_same_sql_bind @@ -991,3 +999,36 @@ ok(!eq_sql_bind( ), "eq_sql_bind considers different SQL expressions and equal bind values different" ); + +# test diag string +ok (! eq_sql ( + 'SELECT owner_name FROM books me WHERE ( source = ? )', + 'SELECT owner_name FROM books me WHERE ( sUOrce = ? )', +)); +like( + $SQL::Abstract::Test::sql_differ, + qr/\Q[ source ] != [ sUOrce ]/, + 'expected debug of literal diff', +); + +ok (! eq_sql ( + 'SELECT owner_name FROM books me ORDER BY owner_name', + 'SELECT owner_name FROM books me GROUP BY owner_name', +)); +like( + $SQL::Abstract::Test::sql_differ, + qr/\QOP [ORDER BY] != [GROUP BY]/, + 'expected debug of op diff', +); + +ok (! eq_sql ( + 'SELECT owner_name FROM books WHERE ( source = ? )', + 'SELECT owner_name FROM books' +)); + +like( + $SQL::Abstract::Test::sql_differ, + qr|\Q[WHERE source = ?] != [N/A]|, + 'expected debug of missing branch', +); + diff --git a/t/11parser.t b/t/11parser.t index 7f4dae7..d6acd1d 100644 --- a/t/11parser.t +++ b/t/11parser.t @@ -2,50 +2,46 @@ use strict; use warnings; use Test::More; -use Test::Deep; use Test::Warn; use SQL::Abstract::Tree; my $sqlat = SQL::Abstract::Tree->new; - -cmp_deeply($sqlat->parse("SELECT a, b.*, * FROM foo WHERE foo.a =1 and foo.b LIKE 'station'"), [ +is_deeply($sqlat->parse("SELECT a, b.*, * FROM foo WHERE foo.a =1 and foo.b LIKE 'station'"), [ [ + "SELECT", [ - "SELECT", [ + "-LIST", [ - "LIST", [ + "-LITERAL", [ - "LITERAL", - [ - "a" - ] - ], + "a" + ] + ], + [ + "-LITERAL", [ - "LITERAL", - [ - "b.*" - ] - ], + "b.*" + ] + ], + [ + "-LITERAL", [ - "LITERAL", - [ - "*" - ] + "*" ] ] ] ] - ], + ] + ], + [ + "FROM", [ - "FROM", [ + "-LITERAL", [ - "LITERAL", - [ - "foo" - ] + "foo" ] ] ] @@ -60,13 +56,13 @@ cmp_deeply($sqlat->parse("SELECT a, b.*, * FROM foo WHERE foo.a =1 and foo.b LIK "=", [ [ - "LITERAL", + "-LITERAL", [ "foo.a" ] ], [ - "LITERAL", + "-LITERAL", [ 1 ] @@ -77,13 +73,13 @@ cmp_deeply($sqlat->parse("SELECT a, b.*, * FROM foo WHERE foo.a =1 and foo.b LIK "LIKE", [ [ - "LITERAL", + "-LITERAL", [ "foo.b" ] ], [ - "LITERAL", + "-LITERAL", [ "'station'" ] @@ -96,31 +92,32 @@ cmp_deeply($sqlat->parse("SELECT a, b.*, * FROM foo WHERE foo.a =1 and foo.b LIK ] ], 'simple statement parsed correctly'); -cmp_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) WHERE foo.a =1 and foo.b LIKE 'station'"), [ +is_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) foo WHERE foo.a =1 and foo.b LIKE 'station'"), [ [ + "SELECT", [ - "SELECT", [ + "-LITERAL", [ - "LITERAL", - [ - "*" - ] + "*" ] ] - ], + ] + ], + [ + "FROM", [ - "FROM", [ + "-MISC", [ - "PAREN", [ + "-PAREN", [ [ "SELECT", [ [ - "LITERAL", + "-LITERAL", [ "*" ] @@ -131,7 +128,7 @@ cmp_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) WHERE foo.a =1 a "FROM", [ [ - "LITERAL", + "-LITERAL", [ "foobar" ] @@ -139,6 +136,12 @@ cmp_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) WHERE foo.a =1 a ] ] ] + ], + [ + "-LITERAL", + [ + "foo" + ] ] ] ] @@ -154,13 +157,13 @@ cmp_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) WHERE foo.a =1 a "=", [ [ - "LITERAL", + "-LITERAL", [ "foo.a" ] ], [ - "LITERAL", + "-LITERAL", [ 1 ] @@ -171,13 +174,13 @@ cmp_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) WHERE foo.a =1 a "LIKE", [ [ - "LITERAL", + "-LITERAL", [ "foo.b" ] ], [ - "LITERAL", + "-LITERAL", [ "'station'" ] @@ -190,386 +193,145 @@ cmp_deeply($sqlat->parse( "SELECT * FROM (SELECT * FROM foobar) WHERE foo.a =1 a ] ], 'subquery statement parsed correctly'); -cmp_deeply($sqlat->parse("SELECT * FROM lolz WHERE ( foo.a =1 ) and foo.b LIKE 'station'"), [ +is_deeply($sqlat->parse( "SELECT [screen].[id], [screen].[name], [screen].[section_id], [screen].[xtype] FROM [users_roles] [me] JOIN [roles] [role] ON [role].[id] = [me].[role_id] JOIN [roles_permissions] [role_permissions] ON [role_permissions].[role_id] = [role].[id] JOIN [permissions] [permission] ON [permission].[id] = [role_permissions].[permission_id] JOIN [permissionscreens] [permission_screens] ON [permission_screens].[permission_id] = [permission].[id] JOIN [screens] [screen] ON [screen].[id] = [permission_screens].[screen_id] WHERE ( [me].[user_id] = ? ) GROUP BY [screen].[id], [screen].[name], [screen].[section_id], [screen].[xtype]"), [ [ + "SELECT", [ - "SELECT", [ + "-LIST", [ - "LITERAL", [ - "*" + "-LITERAL", + [ + "[screen].[id]" + ] + ], + [ + "-LITERAL", + [ + "[screen].[name]" + ] + ], + [ + "-LITERAL", + [ + "[screen].[section_id]" + ] + ], + [ + "-LITERAL", + [ + "[screen].[xtype]" + ] ] ] ] - ], + ] + ], + [ + "FROM", [ - "FROM", [ + "-MISC", [ - "LITERAL", [ - "lolz" + "-LITERAL", + [ + "[users_roles]" + ] + ], + [ + "-LITERAL", + [ + "[me]" + ] ] ] ] ] ], [ - "WHERE", + "JOIN", [ [ - "AND", + "-MISC", [ [ - "PAREN", + "-LITERAL", [ - [ - "=", - [ - [ - "LITERAL", - [ - "foo.a" - ] - ], - [ - "LITERAL", - [ - 1 - ] - ] - ] - ] + "[roles]" ] ], [ - "LIKE", + "-LITERAL", [ - [ - "LITERAL", - [ - "foo.b" - ] - ], - [ - "LITERAL", - [ - "'station'" - ] - ] + "[role]" ] ] ] ] ] - ] -], 'simple statement with parens in where parsed correctly'); - -cmp_deeply($sqlat->parse( "SELECT [screen].[id], [screen].[name], [screen].[section_id], [screen].[xtype] FROM [users_roles] [me] JOIN [roles] [role] ON [role].[id] = [me].[role_id] JOIN [roles_permissions] [role_permissions] ON [role_permissions].[role_id] = [role].[id] JOIN [permissions] [permission] ON [permission].[id] = [role_permissions].[permission_id] JOIN [permissionscreens] [permission_screens] ON [permission_screens].[permission_id] = [permission].[id] JOIN [screens] [screen] ON [screen].[id] = [permission_screens].[screen_id] WHERE ( [me].[user_id] = ? ) GROUP BY [screen].[id], [screen].[name], [screen].[section_id], [screen].[xtype]"), [ + ], [ + "ON", [ [ + "=", [ [ + "-LITERAL", [ - [ - [ - [ - [ - [ - [ - [ - "SELECT", - [ - [ - "LIST", - [ - [ - "LITERAL", - [ - "[screen].[id]" - ] - ], - [ - "LITERAL", - [ - "[screen].[name]" - ] - ], - [ - "LITERAL", - [ - "[screen].[section_id]" - ] - ], - [ - "LITERAL", - [ - "[screen].[xtype]" - ] - ] - ] - ] - ] - ], - [ - "FROM", - [ - [ - [ - "LITERAL", - [ - "[users_roles]" - ] - ], - [ - "LITERAL", - [ - "[me]" - ] - ] - ] - ] - ] - ], - [ - "JOIN", - [ - [ - [ - "LITERAL", - [ - "[roles]" - ] - ], - [ - "LITERAL", - [ - "[role]" - ] - ] - ] - ] - ] - ], - [ - "ON", - [ - [ - "=", - [ - [ - "LITERAL", - [ - "[role].[id]" - ] - ], - [ - "LITERAL", - [ - "[me].[role_id]" - ] - ] - ] - ] - ] - ] - ], - [ - "JOIN", - [ - [ - [ - "LITERAL", - [ - "[roles_permissions]" - ] - ], - [ - "LITERAL", - [ - "[role_permissions]" - ] - ] - ] - ] - ] - ], - [ - "ON", - [ - [ - "=", - [ - [ - "LITERAL", - [ - "[role_permissions].[role_id]" - ] - ], - [ - "LITERAL", - [ - "[role].[id]" - ] - ] - ] - ] - ] - ] - ], - [ - "JOIN", - [ - [ - [ - "LITERAL", - [ - "[permissions]" - ] - ], - [ - "LITERAL", - [ - "[permission]" - ] - ] - ] - ] - ] - ], - [ - "ON", - [ - [ - "=", - [ - [ - "LITERAL", - [ - "[permission].[id]" - ] - ], - [ - "LITERAL", - [ - "[role_permissions].[permission_id]" - ] - ] - ] - ] - ] - ] - ], - [ - "JOIN", - [ - [ - [ - "LITERAL", - [ - "[permissionscreens]" - ] - ], - [ - "LITERAL", - [ - "[permission_screens]" - ] - ] - ] - ] + "[role].[id]" ] ], [ - "ON", + "-LITERAL", [ - [ - "=", - [ - [ - "LITERAL", - [ - "[permission_screens].[permission_id]" - ] - ], - [ - "LITERAL", - [ - "[permission].[id]" - ] - ] - ] - ] + "[me].[role_id]" ] ] - ], + ] + ] + ] + ], + [ + "JOIN", + [ + [ + "-MISC", [ - "JOIN", [ + "-LITERAL", [ - [ - "LITERAL", - [ - "[screens]" - ] - ], - [ - "LITERAL", - [ - "[screen]" - ] - ] + "[roles_permissions]" ] - ] - ] - ], - [ - "ON", - [ + ], [ - "=", + "-LITERAL", [ - [ - "LITERAL", - [ - "[screen].[id]" - ] - ], - [ - "LITERAL", - [ - "[permission_screens].[screen_id]" - ] - ] + "[role_permissions]" ] ] ] ] - ], + ] + ], + [ + "ON", [ - "WHERE", [ + "=", [ - "PAREN", [ + "-LITERAL", [ - "=", - [ - [ - "LITERAL", - [ - "[me].[user_id]" - ] - ], - [ - "PLACEHOLDER", - [ - "?" - ] - ] - ] + "[role_permissions].[role_id]" + ] + ], + [ + "-LITERAL", + [ + "[role].[id]" ] ] ] @@ -577,31 +339,190 @@ cmp_deeply($sqlat->parse( "SELECT [screen].[id], [screen].[name], [screen].[sect ] ], [ - "GROUP BY", + "JOIN", [ [ - "LIST", + "-MISC", [ [ - "LITERAL", + "-LITERAL", [ - "[screen].[id]" + "[permissions]" ] ], [ - "LITERAL", + "-LITERAL", + [ + "[permission]" + ] + ] + ] + ] + ] + ], + [ + "ON", + [ + [ + "=", + [ + [ + "-LITERAL", + [ + "[permission].[id]" + ] + ], + [ + "-LITERAL", + [ + "[role_permissions].[permission_id]" + ] + ] + ] + ] + ] + ], + [ + "JOIN", + [ + [ + "-MISC", + [ + [ + "-LITERAL", + [ + "[permissionscreens]" + ] + ], + [ + "-LITERAL", + [ + "[permission_screens]" + ] + ] + ] + ] + ] + ], + [ + "ON", + [ + [ + "=", + [ + [ + "-LITERAL", + [ + "[permission_screens].[permission_id]" + ] + ], + [ + "-LITERAL", + [ + "[permission].[id]" + ] + ] + ] + ] + ] + ], + [ + "JOIN", + [ + [ + "-MISC", + [ + [ + "-LITERAL", + [ + "[screens]" + ] + ], + [ + "-LITERAL", + [ + "[screen]" + ] + ] + ] + ] + ] + ], + [ + "ON", + [ + [ + "=", + [ + [ + "-LITERAL", + [ + "[screen].[id]" + ] + ], + [ + "-LITERAL", + [ + "[permission_screens].[screen_id]" + ] + ] + ] + ] + ] + ], + [ + "WHERE", + [ + [ + "-PAREN", + [ + [ + "=", + [ + [ + "-LITERAL", + [ + "[me].[user_id]" + ] + ], + [ + "-PLACEHOLDER", + [ + "?" + ] + ] + ] + ] + ] + ] + ] + ], + [ + "GROUP BY", + [ + [ + "-LIST", + [ + [ + "-LITERAL", + [ + "[screen].[id]" + ] + ], + [ + "-LITERAL", [ "[screen].[name]" ] ], [ - "LITERAL", + "-LITERAL", [ "[screen].[section_id]" ] ], [ - "LITERAL", + "-LITERAL", [ "[screen].[xtype]" ] @@ -612,38 +533,36 @@ cmp_deeply($sqlat->parse( "SELECT [screen].[id], [screen].[name], [screen].[sect ] ], 'real life statement 1 parsed correctly'); -cmp_deeply($sqlat->parse("SELECT x, y FROM foo WHERE x IN (?, ?, ?, ?)"), [ +is_deeply($sqlat->parse("SELECT x, y FROM foo WHERE x IN (?, ?, ?, ?)"), [ [ + "SELECT", [ - "SELECT", [ + "-LIST", [ - "LIST", [ + "-LITERAL", [ - "LITERAL", - [ - "x" - ] - ], + "x" + ] + ], + [ + "-LITERAL", [ - "LITERAL", - [ - "y" - ] + "y" ] ] ] ] - ], + ] + ], + [ + "FROM", [ - "FROM", [ + "-LITERAL", [ - "LITERAL", - [ - "foo" - ] + "foo" ] ] ] @@ -655,37 +574,37 @@ cmp_deeply($sqlat->parse("SELECT x, y FROM foo WHERE x IN (?, ?, ?, ?)"), [ "IN", [ [ - "LITERAL", + "-LITERAL", [ "x" ] ], [ - "PAREN", + "-PAREN", [ [ - "LIST", + "-LIST", [ [ - "PLACEHOLDER", + "-PLACEHOLDER", [ "?" ] ], [ - "PLACEHOLDER", + "-PLACEHOLDER", [ "?" ] ], [ - "PLACEHOLDER", + "-PLACEHOLDER", [ "?" ] ], [ - "PLACEHOLDER", + "-PLACEHOLDER", [ "?" ] @@ -700,9 +619,332 @@ cmp_deeply($sqlat->parse("SELECT x, y FROM foo WHERE x IN (?, ?, ?, ?)"), [ ] ], 'Lists parsed correctly'); +is_deeply($sqlat->parse("SELECT * * FROM (SELECT *, FROM foobar baz buzz) foo bar WHERE NOT NOT NOT EXISTS (SELECT 'cr,ap') AND foo.a = ? and not (foo.b LIKE 'station') and x = y and a = b and GROUP BY , ORDER BY x x1 x2 y asc, max(y) desc x z desc"), [ + [ + "SELECT", + [ + [ + "*", + [ + [ + "-LITERAL", + [ + "*" + ] + ] + ] + ] + ] + ], + [ + "FROM", + [ + [ + "-MISC", + [ + [ + "-PAREN", + [ + [ + "SELECT", + [ + [ + "-LIST", + [ + [ + "-LITERAL", + [ + "*" + ] + ], + [] + ] + ] + ] + ], + [ + "FROM", + [ + [ + "-MISC", + [ + [ + "-LITERAL", + [ + "foobar" + ] + ], + [ + "-LITERAL", + [ + "baz" + ] + ], + [ + "-LITERAL", + [ + "buzz" + ] + ] + ] + ] + ] + ] + ] + ], + [ + "-LITERAL", + [ + "foo" + ] + ], + [ + "-LITERAL", + [ + "bar" + ] + ] + ] + ] + ] + ], + [ + "WHERE", + [ + [ + "AND", + [ + [ + "NOT", + [] + ], + [ + "NOT", + [] + ], + [ + "NOT EXISTS", + [ + [ + "-PAREN", + [ + [ + "SELECT", + [ + [ + "-LIST", + [ + [ + "-LITERAL", + [ + "'cr" + ] + ], + [ + "-LITERAL", + [ + "ap'" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ], + [ + "=", + [ + [ + "-LITERAL", + [ + "foo.a" + ] + ], + [ + "-PLACEHOLDER", + [ + "?" + ] + ] + ] + ], + [ + "NOT", + [ + [ + "-PAREN", + [ + [ + "LIKE", + [ + [ + "-LITERAL", + [ + "foo.b" + ] + ], + [ + "-LITERAL", + [ + "'station'" + ] + ] + ] + ] + ] + ] + ] + ], + [ + "=", + [ + [ + "-LITERAL", + [ + "x" + ] + ], + [ + "-LITERAL", + [ + "y" + ] + ] + ] + ], + [ + "=", + [ + [ + "-LITERAL", + [ + "a" + ] + ], + [ + "-LITERAL", + [ + "b" + ] + ] + ] + ] + ] + ] + ] + ], + [ + "GROUP BY", + [ + [ + "-LIST", + [ + [], + [] + ] + ] + ] + ], + [ + "ORDER BY", + [ + [ + "-LIST", + [ + [ + "-MISC", + [ + [ + "-LITERAL", + [ + "x" + ] + ], + [ + "-LITERAL", + [ + "x1" + ] + ], + [ + "-LITERAL", + [ + "x2" + ] + ], + [ + "-LITERAL", + [ + "y" + ] + ], + [ + "-LITERAL", + [ + "asc" + ] + ] + ] + ], + [ + "max", + [ + [ + "-MISC", + [ + [ + "-DESC", + [ + [ + "-PAREN", + [ + [ + "-LITERAL", + [ + "y" + ] + ] + ] + ] + ] + ], + [ + "-LITERAL", + [ + "x" + ] + ], + [ + "-LITERAL", + [ + "z" + ] + ], + [ + "-LITERAL", + [ + "desc" + ] + ] + ] + ] + ] + ] + ] + ] + ] + ] +], 'Deliberately malformed SQL parsed "correctly"'); + + # test for recursion warnings on huge selectors +my @lst = ('XAA' .. 'XZZ'); +#@lst = ('XAAA' .. 'XZZZ'); # if you really want to wait a while warnings_are { - my $sql = sprintf 'SELECT %s FROM foo', join (', ', map { qq|"$_"| } 'aa' .. 'zz' ); + my $sql = sprintf 'SELECT %s FROM foo', join (', ', (map { "( $_ )" } @lst), (map { qq|"$_"| } @lst), (map { qq|"$_", ( $_ )| } @lst) ); my $tree = $sqlat->parse($sql); is_deeply( $tree, [ @@ -710,9 +952,11 @@ warnings_are { "SELECT", [ [ - "LIST", + "-LIST", [ - map { [ "LITERAL", [ qq|"$_"| ] ] } ('aa' .. 'zz') + (map { [ -PAREN => [ [ -LITERAL => [ $_ ] ] ] ] } @lst), + (map { [ -LITERAL => [ qq|"$_"| ] ] } @lst), + (map { [ -LITERAL => [ qq|"$_"| ] ], [ -PAREN => [ [ -LITERAL => [ $_ ] ] ] ] } @lst), ] ] ] @@ -721,7 +965,7 @@ warnings_are { "FROM", [ [ - "LITERAL", + "-LITERAL", [ "foo" ]