X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTest.pm;h=98277b9793960db71b68a3c0d92612cb64917980;hb=ec9af79ee98f3b1e692587c8e4f16084ecf8c265;hp=0500ffabeeb34f00c17cc23f587562e871946e32;hpb=351498952c2359349196da6dfbdce5818b59e2fd;p=scpubgit%2FQ-Branch.git diff --git a/lib/SQL/Abstract/Test.pm b/lib/SQL/Abstract/Test.pm index 0500ffa..98277b9 100644 --- a/lib/SQL/Abstract/Test.pm +++ b/lib/SQL/Abstract/Test.pm @@ -13,13 +13,15 @@ our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind $case_sensitive $sql_differ/; our $case_sensitive = 0; +our $parenthesis_significant = 0; our $sql_differ; # keeps track of differing portion between SQLs our $tb = __PACKAGE__->builder; # Parser states for _recurse_parse() +use constant PARSE_TOP_LEVEL => 0; use constant PARSE_IN_EXPR => 1; use constant PARSE_IN_PARENS => 2; -use constant PARSE_TOP_LEVEL => 3; +use constant PARSE_RHS => 3; # These SQL keywords always signal end of the current expression (except inside # of a parenthesized subexpression). @@ -49,8 +51,26 @@ my @expression_terminator_sql_keywords = ( 'EXCEPT', ); +# These are binary operator keywords always a single LHS and RHS +# * AND/OR are handled separately as they are N-ary +# * BETWEEN without paranthesis around the ANDed arguments (which +# makes it a non-binary op) is detected and accomodated in +# _recurse_parse() +my @binary_op_keywords = ( + (map { "\Q$_\E" } (qw/< > != = <= >=/)), + '(?: NOT \s+)? LIKE', + '(?: NOT \s+)? BETWEEN', +); + +my $tokenizer_re_str = join("\n\t|\n", + ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR' ), + ( map { q! (?<= [\w\s\`\'\)] ) ! . $_ . q! (?= [\w\s\`\'\(] ) ! } @binary_op_keywords ), +); + +my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi; + # All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics -my @unrollable_sql_keywords = ( +my @unrollable_ops = ( 'ON', 'WHERE', 'GROUP \s+ BY', @@ -58,27 +78,6 @@ my @unrollable_sql_keywords = ( 'ORDER \s+ BY', ); -my $tokenizer_re_str = join('|', - map { '\b' . $_ . '\b' } - @expression_terminator_sql_keywords, 'AND', 'OR' -); - -my $tokenizer_re = qr/ - \s* - ( - \( - | - \) - | - $tokenizer_re_str - ) - \s* -/xi; - -my $unrollable_re_str = join ('|', map { $_ } @unrollable_sql_keywords); -my $unrollable_re = qr/^ (?: $unrollable_re_str ) $/ix; - - sub is_same_sql_bind { my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_; @@ -205,54 +204,8 @@ sub _eq_sql { # both are an op-list combo else { - for ($left, $right) { - - next unless (ref $_->[1]); - - # unroll parenthesis in an elaborate loop - my $changes; - do { - - my @children; - $changes = 0; - - for my $child (@{$_->[1]}) { - if (not ref $child or not $child->[0] eq 'PAREN') { - push @children, $child; - next; - } - - # unroll nested parenthesis - while ($child->[1][0][0] eq 'PAREN') { - $child = $child->[1][0]; - $changes++; - } - - # if the parens are wrapped around an AND/OR matching the parent AND/OR - open the parens up and merge the list - if ( - ( $_->[0] eq 'AND' or $_->[0] eq 'OR') - and - $child->[1][0][0] eq $_->[0] - ) { - push @children, @{$child->[1][0][1]}; - $changes++; - } - - # if the parent operator explcitly allows it, or if parents are wrapped around an expression just nuke them - elsif ( $child->[1][0][0] eq 'EXPR' or $_->[0] =~ $unrollable_re ) { - push @children, $child->[1][0]; - $changes++; - } - - # otherwise no more mucking - else { - push @children, $child; - } - } - - $_->[1] = \@children; - } while ($changes); - } + # unroll parenthesis if possible/allowed + _parenthesis_unroll ($_) for ($left, $right); # if operators are different if ($left->[0] ne $right->[0]) { @@ -264,10 +217,10 @@ sub _eq_sql { # elsif operators are identical, compare operands else { if ($left->[0] eq 'EXPR' ) { # unary operator - (my $l = " $left->[1] " ) =~ s/\s+/ /g; - (my $r = " $right->[1] ") =~ s/\s+/ /g; + (my $l = " $left->[1][0] " ) =~ s/\s+/ /g; + (my $r = " $right->[1][0] ") =~ s/\s+/ /g; my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r); - $sql_differ = "[$left->[1]] != [$right->[1]]\n" if not $eq; + $sql_differ = "[$l] != [$r]\n" if not $eq; return $eq; } else { @@ -279,7 +232,6 @@ sub _eq_sql { } } - sub parse { my $s = shift; @@ -289,7 +241,7 @@ sub parse { $token =~ s/\s+/ /g; $token =~ s/\s+([^\w\s])/$1/g; $token =~ s/([^\w\s])\s+/$1/g; - push @$tokens, $token if $token !~ /^$/; + push @$tokens, $token if length $token; } my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL); @@ -307,14 +259,11 @@ sub _recurse_parse { or ($state == PARSE_IN_PARENS && $lookahead eq ')') or - ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords) ) + ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) ) + or + ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR' ) ) ) { - return $left; - return ($state == PARSE_TOP_LEVEL - ? $left->[0] - : $left - ); } my $token = shift @$tokens; @@ -322,12 +271,12 @@ sub _recurse_parse { # nested expression in () if ($token eq '(') { my $right = _recurse_parse($tokens, PARSE_IN_PARENS); - $token = shift @$tokens or croak "missing ')'"; - $token eq ')' or croak "unexpected token : $token"; + $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right); + $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right); $left = $left ? [@$left, [PAREN => [$right] ]] : [PAREN => [$right] ]; } - # AND/OR + # AND/OR elsif ($token =~ /^ (?: OR | AND ) $/xi ) { my $op = uc $token; my $right = _recurse_parse($tokens, PARSE_IN_EXPR); @@ -340,22 +289,99 @@ sub _recurse_parse { $left = [$op => [$left, $right]]; } } + # binary operator keywords + elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) { + my $op = uc $token; + my $right = _recurse_parse($tokens, PARSE_RHS); + + # A between with a simple EXPR for a 1st RHS argument needs a + # rerun of the search to (hopefully) find the proper AND construct + if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') { + unshift @$tokens, $right->[1][0]; + $right = _recurse_parse($tokens, PARSE_IN_EXPR); + } + + $left = [$op => [$left, $right] ]; + } # expression terminator keywords (as they start a new expression) elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) { my $op = uc $token; my $right = _recurse_parse($tokens, PARSE_IN_EXPR); $left = $left ? [@$left, [$op => [$right] ]] - : [ [$op => [$right] ] ]; + : [[ $op => [$right] ]]; } # leaf expression else { - $left = $left ? [@$left, [EXPR => $token] ] - : [ EXPR => $token ]; + $left = $left ? [@$left, [EXPR => [$token] ] ] + : [ EXPR => [$token] ]; } } } +sub _parenthesis_unroll { + my $ast = shift; + + return if $parenthesis_significant; + return unless (ref $ast and ref $ast->[1]); + + my $changes; + do { + my @children; + $changes = 0; + for my $child (@{$ast->[1]}) { + if (not ref $child or not $child->[0] eq 'PAREN') { + push @children, $child; + next; + } + + # unroll nested parenthesis + while ($child->[1][0][0] eq 'PAREN') { + $child = $child->[1][0]; + $changes++; + } + + # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list + if ( + ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') + and + $child->[1][0][0] eq $ast->[0] + ) { + push @children, @{$child->[1][0][1]}; + $changes++; + } + + # if the parent operator explcitly allows it nuke the parenthesis + elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) { + push @children, $child->[1][0]; + $changes++; + } + + # only one element in the parenthesis which is a binary op with two EXPR sub-children + elsif ( + @{$child->[1]} == 1 + and + grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords) + and + $child->[1][0][1][0][0] eq 'EXPR' + and + $child->[1][0][1][1][0] eq 'EXPR' + ) { + push @children, $child->[1][0]; + $changes++; + } + + # otherwise no more mucking for this pass + else { + push @children, $child; + } + } + + $ast->[1] = \@children; + + } while ($changes); + +} sub unparse { my $tree = shift; @@ -367,12 +393,12 @@ sub unparse { return join (" ", map { unparse ($_) } @$tree); } elsif ($tree->[0] eq 'EXPR') { - return $tree->[1]; + return $tree->[1][0]; } elsif ($tree->[0] eq 'PAREN') { - return sprintf '( %s )', join (" ", map {unparse($_)} @{$tree->[1]}); + return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]}); } - elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND') { + elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } @binary_op_keywords ) ) { return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]}); } else { @@ -398,7 +424,7 @@ SQL::Abstract::Test - Helper function for testing SQL::Abstract is_same_sql_bind is_same_sql is_same_bind eq_sql_bind eq_sql eq_bind /]; - + my ($sql, @bind) = SQL::Abstract->new->select(%args); is_same_sql_bind($given_sql, \@given_bind, @@ -425,10 +451,14 @@ ignoring differences in spaces or in levels of parentheses. Therefore the tests will pass as long as the semantics is preserved, even if the surface syntax has changed. -B : this is only a half-cooked semantic equivalence; -parsing is simple-minded, and comparison of SQL abstract syntax trees -ignores commutativity or associativity of AND/OR operators, Morgan -laws, etc. +B : the semantic equivalence handling is pretty limited. +A lot of effort goes into distinguishing significant from +non-significant parenthesis, including AND/OR operator associativity. +Currently this module does not support commutativity and more +intelligent transformations like Morgan laws, etc. + +For a good overview of what this test framework is capable of refer +to C =head1 FUNCTIONS @@ -497,6 +527,11 @@ diagnostics or talk to L. If true, SQL comparisons will be case-sensitive. Default is false; +=head2 $parenthesis_significant + +If true, SQL comparison will preserve and report difference in nested +parenthesis. Useful for testing the C<-nest> modifier. Defaults to false; + =head2 $sql_differ When L returns false, the global variable @@ -514,6 +549,8 @@ Laurent Dami, Elaurent.dami AT etat geneve chE Norbert Buchmuller +Peter Rabbitson + =head1 COPYRIGHT AND LICENSE Copyright 2008 by Laurent Dami.