)',
'ON',
'WHERE',
+ 'EXISTS',
'GROUP \s+ BY',
'HAVING',
'ORDER \s+ BY',
# * BETWEEN without paranthesis around the ANDed arguments (which
# makes it a non-binary op) is detected and accomodated in
# _recurse_parse()
-my $stuff_around_mathops = qr/[\w\s\`\'\)]/;
+my $stuff_around_mathops = qr/[\w\s\`\'\"\)]/;
my @binary_op_keywords = (
( map
- { " (?<= $stuff_around_mathops) " . quotemeta $_ . "(?= $stuff_around_mathops )" }
- (qw/< > != = <= >=/)
+ {
+ ' ^ ' . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ",
+ " (?<= $stuff_around_mathops)" . quotemeta ($_) . "(?= \$ | $stuff_around_mathops ) ",
+ }
+ (qw/< > != <> = <= >=/)
),
( map
{ '\b (?: NOT \s+)?' . $_ . '\b' }
@binary_op_keywords,
);
-my $tokenizer_re = qr/ \s* ( \( | \) | \? | $tokenizer_re_str ) \s* /xi;
+my $tokenizer_re = qr/ \s* ( $tokenizer_re_str | \( | \) | \? ) \s* /xi;
# All of these keywords allow their parameters to be specified with or without parenthesis without changing the semantics
my @unrollable_ops = (
_parenthesis_unroll ($_) for ($left, $right);
# if operators are different
- if ($left->[0] ne $right->[0]) {
+ if ( $left->[0] ne $right->[0] ) {
$sql_differ = sprintf "OP [$left->[0]] != [$right->[0]] in\nleft: %s\nright: %s\n",
unparse($left),
unparse($right);
}
# elsif operators are identical, compare operands
else {
- if ($left->[0] eq 'EXPR' ) { # unary operator
+ if ($left->[0] eq 'LITERAL' ) { # unary
(my $l = " $left->[1][0] " ) =~ s/\s+/ /g;
(my $r = " $right->[1][0] ") =~ s/\s+/ /g;
my $eq = $case_sensitive ? $l eq $r : uc($l) eq uc($r);
# tokenize string, and remove all optional whitespace
my $tokens = [];
foreach my $token (split $tokenizer_re, $s) {
- $token =~ s/\s+/ /g;
- $token =~ s/\s+([^\w\s])/$1/g;
- $token =~ s/([^\w\s])\s+/$1/g;
- push @$tokens, $token if length $token;
+ push @$tokens, $token if (length $token) && ($token =~ /\S/);
}
my $tree = _recurse_parse($tokens, PARSE_TOP_LEVEL);
my $token = shift @$tokens;
# nested expression in ()
- if ($token eq '(') {
+ if ($token eq '(' ) {
my $right = _recurse_parse($tokens, PARSE_IN_PARENS);
$token = shift @$tokens or croak "missing closing ')' around block " . unparse ($right);
$token eq ')' or croak "unexpected token '$token' terminating block " . unparse ($right);
my $op = uc $token;
my $right = _recurse_parse($tokens, PARSE_RHS);
- # A between with a simple EXPR for a 1st RHS argument needs a
+ # A between with a simple LITERAL for a 1st RHS argument needs a
# rerun of the search to (hopefully) find the proper AND construct
- if ($op eq 'BETWEEN' and $right->[0] eq 'EXPR') {
+ if ($op eq 'BETWEEN' and $right->[0] eq 'LITERAL') {
unshift @$tokens, $right->[1][0];
$right = _recurse_parse($tokens, PARSE_IN_EXPR);
}
: [[ $op => [$right] ]];
}
- # leaf expression
+ # literal (eat everything on the right until RHS termination)
else {
- $left = $left ? [@$left, [EXPR => [$token] ] ]
- : [ EXPR => [$token] ];
+ my $right = _recurse_parse ($tokens, PARSE_RHS);
+ $left = $left ? [$left, [LITERAL => [join ' ', $token, unparse($right)||()] ] ]
+ : [ LITERAL => [join ' ', $token, unparse($right)||()] ];
}
}
}
$changes++;
}
- # only one EXPR element in the parenthesis
+ # only one LITERAL element in the parenthesis
elsif (
- @{$child->[1]} == 1 && $child->[1][0][0] eq 'EXPR'
+ @{$child->[1]} == 1 && $child->[1][0][0] eq 'LITERAL'
) {
push @children, $child->[1][0];
$changes++;
}
- # only one element in the parenthesis which is a binary op with two EXPR sub-children
+ # only one element in the parenthesis which is a binary op with two LITERAL sub-children
elsif (
@{$child->[1]} == 1
and
grep { $child->[1][0][0] =~ /^ $_ $/xi } (@binary_op_keywords)
and
- $child->[1][0][1][0][0] eq 'EXPR'
+ $child->[1][0][1][0][0] eq 'LITERAL'
and
- $child->[1][0][1][1][0] eq 'EXPR'
+ $child->[1][0][1][1][0] eq 'LITERAL'
) {
push @children, $child->[1][0];
$changes++;
elsif (ref $tree->[0]) {
return join (" ", map { unparse ($_) } @$tree);
}
- elsif ($tree->[0] eq 'EXPR') {
+ elsif ($tree->[0] eq 'LITERAL') {
return $tree->[1][0];
}
elsif ($tree->[0] eq 'PAREN') {
{
parenthesis_significant => 1,
where => { x => { -in => \'( 1,2,lower(y) )' } },
- stmt => "WHERE ( x IN (1, 2, lower(y) ) )",
+ stmt => "WHERE ( x IN ( 1,2,lower(y) ) )",
bind => [],
test => '-in with a literal scalarref',
},
{
parenthesis_significant => 1,
where => { x => { -in => \['( ( ?,?,lower(y) ) )', 1, 2] } },
- stmt => "WHERE ( x IN (?, ?, lower(y) ) )",
+ stmt => "WHERE ( x IN ( ?,?,lower(y) ) )", # note that outer parens are opened even though literal was requested (RIBASUSHI)
bind => [1, 2],
test => '-in with a literal arrayrefref',
},