X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTest.pm;h=8951e36a7d3cb7d053dee53171787b3bc724092b;hb=e6ea82008ab0623fec0971d8cbc097903f88be73;hp=82eed8f4affd2d31b11858add8ff3ec2299ebb9f;hpb=09abf3a06717253ac4eb7859bf9145c1f9b1b5af;p=scpubgit%2FQ-Branch.git diff --git a/lib/SQL/Abstract/Test.pm b/lib/SQL/Abstract/Test.pm index 82eed8f..8951e36 100644 --- a/lib/SQL/Abstract/Test.pm +++ b/lib/SQL/Abstract/Test.pm @@ -13,6 +13,7 @@ our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind $case_sensitive $sql_differ/; our $case_sensitive = 0; +our $parenthesis_significant = 0; our $sql_differ; # keeps track of differing portion between SQLs our $tb = __PACKAGE__->builder; @@ -39,6 +40,7 @@ my @expression_terminator_sql_keywords = ( )', 'ON', 'WHERE', + 'EXISTS', 'GROUP \s+ BY', 'HAVING', 'ORDER \s+ BY', @@ -48,25 +50,36 @@ my @expression_terminator_sql_keywords = ( 'UNION', 'INTERSECT', 'EXCEPT', + 'RETURNING', ); # These are binary operator keywords always a single LHS and RHS # * AND/OR are handled separately as they are N-ary +# * so is NOT as being unary # * BETWEEN without paranthesis around the ANDed arguments (which # makes it a non-binary op) is detected and accomodated in # _recurse_parse() +my $stuff_around_mathops = qr/[\w\s\`\'\"\)]/; my @binary_op_keywords = ( - (map { "\Q$_\E" } (qw/< > != = <= >=/)), - '(?: NOT \s+)? LIKE', - '(?: NOT \s+)? BETWEEN', + ( map + { + ' ^ ' . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", + " (?<= $stuff_around_mathops)" . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ", + } + (qw/< > != <> = <= >=/) + ), + ( map + { '\b (?: NOT \s+)?' . $_ . '\b' } + (qw/IN BETWEEN LIKE/) + ), ); my $tokenizer_re_str = join("\n\t|\n", - ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR' ), - ( map { q! (?<= [\w\s\`\'\)] ) ! . $_ . q! (?= [\w\s\`\'\(] ) ! } @binary_op_keywords ), + ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR', 'NOT'), + @binary_op_keywords, ); -my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi; +my $tokenizer_re = qr/ \s* ( $tokenizer_re_str | \( | \) | \? ) \s* /xi; # All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics my @unrollable_ops = ( @@ -203,71 +216,11 @@ sub _eq_sql { # both are an op-list combo else { - for my $ast ($left, $right) { - - next unless (ref $ast->[1]); - - # unroll parenthesis in an elaborate loop - my $changes; - do { - - my @children; - $changes = 0; - - for my $child (@{$ast->[1]}) { - if (not ref $child or not $child->[0] eq 'PAREN') { - push @children, $child; - next; - } - - # unroll nested parenthesis - while ($child->[1][0][0] eq 'PAREN') { - $child = $child->[1][0]; - $changes++; - } - - # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list - if ( - ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') - and - $child->[1][0][0] eq $ast->[0] - ) { - push @children, @{$child->[1][0][1]}; - $changes++; - } - - # if the parent operator explcitly allows it nuke the parenthesis - elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) { - push @children, $child->[1][0]; - $changes++; - } - - # only one element in the parenthesis which is a binary op with two EXPR sub-children - elsif ( - @{$child->[1]} == 1 - and - grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords) - and - $child->[1][0][1][0][0] eq 'EXPR' - and - $child->[1][0][1][1][0] eq 'EXPR' - ) { - push @children, $child->[1][0]; - $changes++; - } - - # otherwise no more mucking for this pass - else { - push @children, $child; - } - } - - $ast->[1] = \@children; - } while ($changes); - } + # unroll parenthesis if possible/allowed + _parenthesis_unroll ($_) for ($left, $right); # if operators are different - if ($left->[0] ne $right->[0]) { + if ( $left->[0] ne $right->[0] ) { $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n", unparse($left), unparse($right); @@ -275,7 +228,7 @@ sub _eq_sql { } # elsif operators are identical, compare operands else { - if ($left->[0] eq 'EXPR' ) { # unary operator + if ($left->[0] eq 'LITERAL' ) { # unary (my $l = " $left->[1][0] " ) =~ s/\s+/ /g; (my $r = " $right->[1][0] ") =~ s/\s+/ /g; my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r); @@ -291,17 +244,13 @@ sub _eq_sql { } } - sub parse { my $s = shift; # tokenize string, and remove all optional whitespace my $tokens = []; foreach my $token (split $tokenizer_re, $s) { - $token =~ s/\s+/ /g; - $token =~ s/\s+([^\w\s])/$1/g; - $token =~ s/([^\w\s])\s+/$1/g; - push @$tokens, $token if length $token; + push @$tokens, $token if (length $token) && ($token =~ /\S/); } my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL); @@ -321,7 +270,7 @@ sub _recurse_parse { or ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) ) or - ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR' ) ) + ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR', 'NOT' ) ) ) { return $left; } @@ -329,7 +278,7 @@ sub _recurse_parse { my $token = shift @$tokens; # nested expression in () - if ($token eq '(') { + if ($token eq '(' ) { my $right = _recurse_parse($tokens, PARSE_IN_PARENS); $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right); $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right); @@ -354,9 +303,9 @@ sub _recurse_parse { my $op = uc $token; my $right = _recurse_parse($tokens, PARSE_RHS); - # A between with a simple EXPR for a 1st RHS argument needs a + # A between with a simple LITERAL for a 1st RHS argument needs a # rerun of the search to (hopefully) find the proper AND construct - if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') { + if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') { unshift @$tokens, $right->[1][0]; $right = _recurse_parse($tokens, PARSE_IN_EXPR); } @@ -370,15 +319,95 @@ sub _recurse_parse { $left = $left ? [@$left, [$op => [$right] ]] : [[ $op => [$right] ]]; } - # leaf expression + # NOT (last as to allow all other NOT X pieces first) + elsif ( $token =~ /^ not $/ix ) { + my $op = uc $token; + my $right = _recurse_parse ($tokens, PARSE_RHS); + $left = $left ? [ @$left, [$op => [$right] ]] + : [[ $op => [$right] ]]; + + } + # literal (eat everything on the right until RHS termination) else { - $left = $left ? [@$left, [EXPR => [$token] ] ] - : [ EXPR => [$token] ]; + my $right = _recurse_parse ($tokens, PARSE_RHS); + $left = $left ? [$left, [LITERAL => [join ' ', $token, unparse($right)||()] ] ] + : [ LITERAL => [join ' ', $token, unparse($right)||()] ]; } } } +sub _parenthesis_unroll { + my $ast = shift; + return if $parenthesis_significant; + return unless (ref $ast and ref $ast->[1]); + + my $changes; + do { + my @children; + $changes = 0; + + for my $child (@{$ast->[1]}) { + if (not ref $child or not $child->[0] eq 'PAREN') { + push @children, $child; + next; + } + + # unroll nested parenthesis + while ($child->[1][0][0] eq 'PAREN') { + $child = $child->[1][0]; + $changes++; + } + + # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list + if ( + ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') + and + $child->[1][0][0] eq $ast->[0] + ) { + push @children, @{$child->[1][0][1]}; + $changes++; + } + + # if the parent operator explcitly allows it nuke the parenthesis + elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) { + push @children, $child->[1][0]; + $changes++; + } + + # only one LITERAL element in the parenthesis + elsif ( + @{$child->[1]} == 1 && $child->[1][0][0] eq 'LITERAL' + ) { + push @children, $child->[1][0]; + $changes++; + } + + # only one element in the parenthesis which is a binary op with two LITERAL sub-children + elsif ( + @{$child->[1]} == 1 + and + grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords) + and + $child->[1][0][1][0][0] eq 'LITERAL' + and + $child->[1][0][1][1][0] eq 'LITERAL' + ) { + push @children, $child->[1][0]; + $changes++; + } + + # otherwise no more mucking for this pass + else { + push @children, $child; + } + } + + $ast->[1] = \@children; + + } while ($changes); + +} sub unparse { my $tree = shift; @@ -389,7 +418,7 @@ sub unparse { elsif (ref $tree->[0]) { return join (" ", map { unparse ($_) } @$tree); } - elsif ($tree->[0] eq 'EXPR') { + elsif ($tree->[0] eq 'LITERAL') { return $tree->[1][0]; } elsif ($tree->[0] eq 'PAREN') { @@ -421,7 +450,7 @@ SQL::Abstract::Test - Helper function for testing SQL::Abstract is_same_sql_bind is_same_sql is_same_bind eq_sql_bind eq_sql eq_bind /]; - + my ($sql, @bind) = SQL::Abstract->new->select(%args); is_same_sql_bind($given_sql, \@given_bind, @@ -448,10 +477,14 @@ ignoring differences in spaces or in levels of parentheses. Therefore the tests will pass as long as the semantics is preserved, even if the surface syntax has changed. -B : this is only a half-cooked semantic equivalence; -parsing is simple-minded, and comparison of SQL abstract syntax trees -ignores commutativity or associativity of AND/OR operators, Morgan -laws, etc. +B : the semantic equivalence handling is pretty limited. +A lot of effort goes into distinguishing significant from +non-significant parenthesis, including AND/OR operator associativity. +Currently this module does not support commutativity and more +intelligent transformations like Morgan laws, etc. + +For a good overview of what this test framework is capable of refer +to C =head1 FUNCTIONS @@ -520,6 +553,11 @@ diagnostics or talk to L. If true, SQL comparisons will be case-sensitive. Default is false; +=head2 $parenthesis_significant + +If true, SQL comparison will preserve and report difference in nested +parenthesis. Useful for testing the C<-nest> modifier. Defaults to false; + =head2 $sql_differ When L returns false, the global variable @@ -537,6 +575,8 @@ Laurent Dami, Elaurent.dami AT etat geneve chE Norbert Buchmuller +Peter Rabbitson + =head1 COPYRIGHT AND LICENSE Copyright 2008 by Laurent Dami.