X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTest.pm;h=8f41167503f3fb326e8b5a309f32545b26bece09;hb=d4705371922bba7c32f98f62ed10ee7fd2007d51;hp=6da627cf069f418b94f8bc9bbfc4f4dd3e832dfd;hpb=1496d4ce1467676a04bacee2f67e34778990de27;p=scpubgit%2FQ-Branch.git diff --git a/lib/SQL/Abstract/Test.pm b/lib/SQL/Abstract/Test.pm index 6da627c..8f41167 100644 --- a/lib/SQL/Abstract/Test.pm +++ b/lib/SQL/Abstract/Test.pm @@ -8,19 +8,20 @@ use Carp; use Test::Builder; use Test::Deep qw(eq_deeply); -our @EXPORT_OK = qw/&is_same_sql_bind &eq_sql &eq_bind +our @EXPORT_OK = qw/&is_same_sql_bind &is_same_sql &is_same_bind + &eq_sql_bind &eq_sql &eq_bind $case_sensitive $sql_differ/; our $case_sensitive = 0; +our $parenthesis_significant = 0; our $sql_differ; # keeps track of differing portion between SQLs our $tb = __PACKAGE__->builder; # Parser states for _recurse_parse() -use constant { - PARSE_TOP_LEVEL => 0, - PARSE_IN_EXPR => 1, - PARSE_IN_PARENS => 2, -}; +use constant PARSE_TOP_LEVEL => 0; +use constant PARSE_IN_EXPR => 1; +use constant PARSE_IN_PARENS => 2; +use constant PARSE_RHS => 3; # These SQL keywords always signal end of the current expression (except inside # of a parenthesized subexpression). @@ -28,6 +29,7 @@ use constant { # /.../x) regexes, without capturing parentheses. They will be automatically # anchored to word boundaries to match the whole token). my @expression_terminator_sql_keywords = ( + 'SELECT', 'FROM', '(?: (?: @@ -49,23 +51,32 @@ my @expression_terminator_sql_keywords = ( 'EXCEPT', ); -my $tokenizer_re_str = join('|', - map { '\b' . $_ . '\b' } - @expression_terminator_sql_keywords, 'AND', 'OR' +# These are binary operator keywords always a single LHS and RHS +# * AND/OR are handled separately as they are N-ary +# * BETWEEN without paranthesis around the ANDed arguments (which +# makes it a non-binary op) is detected and accomodated in +# _recurse_parse() +my @binary_op_keywords = ( + (map { "\Q$_\E" } (qw/< > != = <= >=/)), + '(?: NOT \s+)? LIKE', + '(?: NOT \s+)? BETWEEN', ); -my $tokenizer_re = qr/ - \s* - ( - \( - | - \) - | - $tokenizer_re_str - ) - \s* -/xi; +my $tokenizer_re_str = join("\n\t|\n", + ( map { '\b' . $_ . '\b' } @expression_terminator_sql_keywords, 'AND', 'OR' ), + ( map { q! (?<= [\w\s\`\'\)] ) ! . $_ . q! (?= [\w\s\`\'\(] ) ! } @binary_op_keywords ), +); +my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi; + +# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics +my @unrollable_ops = ( + 'ON', + 'WHERE', + 'GROUP \s+ BY', + 'HAVING', + 'ORDER \s+ BY', +); sub is_same_sql_bind { my ($sql1, $bind_ref1, $sql2, $bind_ref2, $msg) = @_; @@ -75,24 +86,82 @@ sub is_same_sql_bind { my $same_bind = eq_bind($bind_ref1, $bind_ref2); # call Test::Builder::ok - $tb->ok($same_sql && $same_bind, $msg); + my $ret = $tb->ok($same_sql && $same_bind, $msg); # add debugging info if (!$same_sql) { - $tb->diag("SQL expressions differ\n" - ." got: $sql1\n" - ."expected: $sql2\n" - ."differing in :\n$sql_differ\n" - ); + _sql_differ_diag($sql1, $sql2); } if (!$same_bind) { - $tb->diag("BIND values differ\n" - ." got: " . Dumper($bind_ref1) - ."expected: " . Dumper($bind_ref2) - ); + _bind_differ_diag($bind_ref1, $bind_ref2); } + + # pass ok() result further + return $ret; +} + +sub is_same_sql { + my ($sql1, $sql2, $msg) = @_; + + # compare + my $same_sql = eq_sql($sql1, $sql2); + + # call Test::Builder::ok + my $ret = $tb->ok($same_sql, $msg); + + # add debugging info + if (!$same_sql) { + _sql_differ_diag($sql1, $sql2); + } + + # pass ok() result further + return $ret; +} + +sub is_same_bind { + my ($bind_ref1, $bind_ref2, $msg) = @_; + + # compare + my $same_bind = eq_bind($bind_ref1, $bind_ref2); + + # call Test::Builder::ok + my $ret = $tb->ok($same_bind, $msg); + + # add debugging info + if (!$same_bind) { + _bind_differ_diag($bind_ref1, $bind_ref2); + } + + # pass ok() result further + return $ret; +} + +sub _sql_differ_diag { + my ($sql1, $sql2) = @_; + + $tb->diag("SQL expressions differ\n" + ." got: $sql1\n" + ."expected: $sql2\n" + ."differing in :\n$sql_differ\n" + ); +} + +sub _bind_differ_diag { + my ($bind_ref1, $bind_ref2) = @_; + + $tb->diag("BIND values differ\n" + ." got: " . Dumper($bind_ref1) + ."expected: " . Dumper($bind_ref2) + ); } +sub eq_sql_bind { + my ($sql1, $bind_ref1, $sql2, $bind_ref2) = @_; + + return eq_sql($sql1, $sql2) && eq_bind($bind_ref1, $bind_ref2); +} + + sub eq_bind { my ($bind_ref1, $bind_ref2) = @_; @@ -106,40 +175,63 @@ sub eq_sql { my $tree1 = parse($sql1); my $tree2 = parse($sql2); - return _eq_sql($tree1, $tree2); + return 1 if _eq_sql($tree1, $tree2); } sub _eq_sql { my ($left, $right) = @_; - # ignore top-level parentheses - while ($left->[0] eq 'PAREN') {$left = $left->[1] } - while ($right->[0] eq 'PAREN') {$right = $right->[1]} - - # if operators are different - if ($left->[0] ne $right->[0]) { - $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n", - unparse($left), - unparse($right); + # one is defined the other not + if ( (defined $left) xor (defined $right) ) { return 0; } - # elsif operators are identical, compare operands - else { - if ($left->[0] eq 'EXPR' ) { # unary operator - (my $l = " $left->[1] " ) =~ s/\s+/ /g; - (my $r = " $right->[1] ") =~ s/\s+/ /g; - my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r); - $sql_differ = "[$left->[1]] != [$right->[1]]\n" if not $eq; - return $eq; + # one is undefined, then so is the other + elsif (not defined $left) { + return 1; + } + # one is a list, the other is an op with a list + elsif (ref $left->[0] xor ref $right->[0]) { + $sql_differ = sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) ); + return 0; + } + # one is a list, so is the other + elsif (ref $left->[0]) { + for (my $i = 0; $i <= $#$left or $i <= $#$right; $i++ ) { + return 0 if (not _eq_sql ($left->[$i], $right->[$i]) ); } - else { # binary operator - return _eq_sql($left->[1][0], $right->[1][0]) # left operand - && _eq_sql($left->[1][1], $right->[1][1]); # right operand + return 1; + } + # both are an op-list combo + else { + + # unroll parenthesis if possible/allowed + _parenthesis_unroll ($_) for ($left, $right); + + # if operators are different + if ($left->[0] ne $right->[0]) { + $sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n", + unparse($left), + unparse($right); + return 0; + } + # elsif operators are identical, compare operands + else { + if ($left->[0] eq 'EXPR' ) { # unary operator + (my $l = " $left->[1][0] " ) =~ s/\s+/ /g; + (my $r = " $right->[1][0] ") =~ s/\s+/ /g; + my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r); + $sql_differ = "[$l] != [$r]\n" if not $eq; + return $eq; + } + else { + my $eq = _eq_sql($left->[1], $right->[1]); + $sql_differ ||= sprintf ("left: %s\nright: %s\n", map { unparse ($_) } ($left, $right) ) if not $eq; + return $eq; + } } } } - sub parse { my $s = shift; @@ -149,7 +241,7 @@ sub parse { $token =~ s/\s+/ /g; $token =~ s/\s+([^\w\s])/$1/g; $token =~ s/([^\w\s])\s+/$1/g; - push @$tokens, $token if $token !~ /^$/; + push @$tokens, $token if length $token; } my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL); @@ -163,53 +255,155 @@ sub _recurse_parse { while (1) { # left-associative parsing my $lookahead = $tokens->[0]; - return $left if !defined($lookahead) - || ($state == PARSE_IN_PARENS && $lookahead eq ')') - || ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^$_$/xi } - '\)', @expression_terminator_sql_keywords - ); + if ( not defined($lookahead) + or + ($state == PARSE_IN_PARENS && $lookahead eq ')') + or + ($state == PARSE_IN_EXPR && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords ) ) + or + ($state == PARSE_RHS && grep { $lookahead =~ /^ $_ $/xi } ('\)', @expression_terminator_sql_keywords, @binary_op_keywords, 'AND', 'OR' ) ) + ) { + return $left; + } my $token = shift @$tokens; # nested expression in () if ($token eq '(') { my $right = _recurse_parse($tokens, PARSE_IN_PARENS); - $token = shift @$tokens or croak "missing ')'"; - $token eq ')' or croak "unexpected token : $token"; - $left = $left ? [CONCAT => [$left, [PAREN => $right]]] - : [PAREN => $right]; + $token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right); + $token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right); + $left = $left ? [@$left, [PAREN => [$right] ]] + : [PAREN => [$right] ]; } # AND/OR - elsif ($token eq 'AND' || $token eq 'OR') { + elsif ($token =~ /^ (?: OR | AND ) $/xi ) { + my $op = uc $token; my $right = _recurse_parse($tokens, PARSE_IN_EXPR); - $left = [$token => [$left, $right]]; + + # Merge chunks if logic matches + if (ref $right and $op eq $right->[0]) { + $left = [ (shift @$right ), [$left, map { @$_ } @$right] ]; + } + else { + $left = [$op => [$left, $right]]; + } + } + # binary operator keywords + elsif (grep { $token =~ /^ $_ $/xi } @binary_op_keywords ) { + my $op = uc $token; + my $right = _recurse_parse($tokens, PARSE_RHS); + + # A between with a simple EXPR for a 1st RHS argument needs a + # rerun of the search to (hopefully) find the proper AND construct + if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') { + unshift @$tokens, $right->[1][0]; + $right = _recurse_parse($tokens, PARSE_IN_EXPR); + } + + $left = [$op => [$left, $right] ]; } # expression terminator keywords (as they start a new expression) - elsif (grep { $token =~ /^$_$/xi } @expression_terminator_sql_keywords) { + elsif (grep { $token =~ /^ $_ $/xi } @expression_terminator_sql_keywords ) { + my $op = uc $token; my $right = _recurse_parse($tokens, PARSE_IN_EXPR); - $left = $left ? [CONCAT => [$left, [CONCAT => [[EXPR => $token], [PAREN => $right]]]]] - : [CONCAT => [[EXPR => $token], [PAREN => $right]]]; + $left = $left ? [@$left, [$op => [$right] ]] + : [[ $op => [$right] ]]; } # leaf expression else { - $left = $left ? [CONCAT => [$left, [EXPR => $token]]] - : [EXPR => $token]; + $left = $left ? [@$left, [EXPR => [$token] ] ] + : [ EXPR => [$token] ]; } } } +sub _parenthesis_unroll { + my $ast = shift; + + return if $parenthesis_significant; + return unless (ref $ast and ref $ast->[1]); + + my $changes; + do { + my @children; + $changes = 0; + + for my $child (@{$ast->[1]}) { + if (not ref $child or not $child->[0] eq 'PAREN') { + push @children, $child; + next; + } + + # unroll nested parenthesis + while ($child->[1][0][0] eq 'PAREN') { + $child = $child->[1][0]; + $changes++; + } + + # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list + if ( + ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') + and + $child->[1][0][0] eq $ast->[0] + ) { + push @children, @{$child->[1][0][1]}; + $changes++; + } + + # if the parent operator explcitly allows it nuke the parenthesis + elsif ( grep { $ast->[0] =~ /^ $_ $/xi } @unrollable_ops ) { + push @children, $child->[1][0]; + $changes++; + } + + # only one element in the parenthesis which is a binary op with two EXPR sub-children + elsif ( + @{$child->[1]} == 1 + and + grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords) + and + $child->[1][0][1][0][0] eq 'EXPR' + and + $child->[1][0][1][1][0] eq 'EXPR' + ) { + push @children, $child->[1][0]; + $changes++; + } + + # otherwise no more mucking for this pass + else { + push @children, $child; + } + } + + $ast->[1] = \@children; + } while ($changes); + +} sub unparse { my $tree = shift; - my $dispatch = { - EXPR => sub {$tree->[1] }, - PAREN => sub {"(" . unparse($tree->[1]) . ")" }, - CONCAT => sub {join " ", map {unparse($_)} @{$tree->[1]}}, - AND => sub {join " AND ", map {unparse($_)} @{$tree->[1]}}, - OR => sub {join " OR ", map {unparse($_)} @{$tree->[1]}}, - }; - $dispatch->{$tree->[0]}->(); + + if (not $tree ) { + return ''; + } + elsif (ref $tree->[0]) { + return join (" ", map { unparse ($_) } @$tree); + } + elsif ($tree->[0] eq 'EXPR') { + return $tree->[1][0]; + } + elsif ($tree->[0] eq 'PAREN') { + return sprintf '(%s)', join (" ", map {unparse($_)} @{$tree->[1]}); + } + elsif ($tree->[0] eq 'OR' or $tree->[0] eq 'AND' or (grep { $tree->[0] =~ /^ $_ $/xi } @binary_op_keywords ) ) { + return join (" $tree->[0] ", map {unparse($_)} @{$tree->[1]}); + } + else { + return sprintf '%s %s', $tree->[0], unparse ($tree->[1]); + } } @@ -226,12 +420,25 @@ SQL::Abstract::Test - Helper function for testing SQL::Abstract use SQL::Abstract; use Test::More; - use SQL::Abstract::Test import => ['is_same_sql_bind']; + use SQL::Abstract::Test import => [qw/ + is_same_sql_bind is_same_sql is_same_bind + eq_sql_bind eq_sql eq_bind + /]; my ($sql, @bind) = SQL::Abstract->new->select(%args); + is_same_sql_bind($given_sql, \@given_bind, $expected_sql, \@expected_bind, $test_msg); + is_same_sql($given_sql, $expected_sql, $test_msg); + is_same_bind(\@given_bind, \@expected_bind, $test_msg); + + my $is_same = eq_sql_bind($given_sql, \@given_bind, + $expected_sql, \@expected_bind); + + my $sql_same = eq_sql($given_sql, $expected_sql); + my $bind_same = eq_bind(\@given_bind, \@expected_bind); + =head1 DESCRIPTION This module is only intended for authors of tests on @@ -257,34 +464,71 @@ laws, etc. $expected_sql, \@expected_bind, $test_msg); Compares given and expected pairs of C<($sql, \@bind)>, and calls -L on the result, with C<$test_msg> as message. If the -test fails, a detailed diagnostic is printed. For clients which use -L, this is the only function that needs to be -imported. +L on the result, with C<$test_msg> as message. If the test +fails, a detailed diagnostic is printed. For clients which use L, +this is the one of the three functions (L, L, +L) that needs to be imported. + +=head2 is_same_sql + + is_same_sql($given_sql, $expected_sql, $test_msg); + +Compares given and expected SQL statements, and calls L on +the result, with C<$test_msg> as message. If the test fails, a detailed +diagnostic is printed. For clients which use L, this is the one of +the three functions (L, L, L) +that needs to be imported. + +=head2 is_same_bind + + is_same_bind(\@given_bind, \@expected_bind, $test_msg); + +Compares given and expected bind values, and calls L on the +result, with C<$test_msg> as message. If the test fails, a detailed diagnostic +is printed. For clients which use L, this is the one of the three +functions (L, L, L) that needs +to be imported. + +=head2 eq_sql_bind + + my $is_same = eq_sql_bind($given_sql, \@given_bind, + $expected_sql, \@expected_bind); + +Compares given and expected pairs of C<($sql, \@bind)>. Similar to +L, but it just returns a boolean value and does not print +diagnostics or talk to L. =head2 eq_sql my $is_same = eq_sql($given_sql, $expected_sql); -Compares the abstract syntax of two SQL statements. If the result is -false, global variable L will contain the SQL portion -where a difference was encountered; this is useful for printing diagnostics. +Compares the abstract syntax of two SQL statements. Similar to L, +but it just returns a boolean value and does not print diagnostics or talk to +L. If the result is false, the global variable L +will contain the SQL portion where a difference was encountered; this is useful +for printing diagnostics. =head2 eq_bind my $is_same = eq_sql(\@given_bind, \@expected_bind); -Compares two lists of bind values, taking into account -the fact that some of the values may be -arrayrefs (see L). +Compares two lists of bind values, taking into account the fact that some of +the values may be arrayrefs (see L). Similar to +L, but it just returns a boolean value and does not print +diagnostics or talk to L. =head1 GLOBAL VARIABLES -=head2 case_sensitive +=head2 $case_sensitive If true, SQL comparisons will be case-sensitive. Default is false; -=head2 sql_differ +=head2 $parenthesis_significant + +If true, SQL comparison will preserve and report difference in nested +parenthesis. Useful for testing the C<-nest> modifier. Defaults to false; + +=head2 $sql_differ When L returns false, the global variable C<$sql_differ> contains the SQL portion @@ -301,6 +545,8 @@ Laurent Dami, Elaurent.dami AT etat geneve chE Norbert Buchmuller +Peter Rabbitson + =head1 COPYRIGHT AND LICENSE Copyright 2008 by Laurent Dami.