X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTree.pm;h=d60e2369e1f0fabc67f81935b74d7acb14c2b0b6;hb=1f00df9fde149ba81f014d6fa1a7d304ac2e4b08;hp=8e743a64dd469f7b495d453ff43610f744164fae;hpb=1ec9b9e3261f37de1bd05b31fa3c88ab78ab1480;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract/Tree.pm b/lib/SQL/Abstract/Tree.pm index 8e743a6..d60e236 100644 --- a/lib/SQL/Abstract/Tree.pm +++ b/lib/SQL/Abstract/Tree.pm @@ -68,6 +68,7 @@ my @expression_start_keywords = ( 'HAVING', 'ORDER \s+ BY', 'SKIP', + 'FETCH', 'FIRST', 'LIMIT', 'OFFSET', @@ -91,8 +92,8 @@ $expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; # These are binary operator keywords always a single LHS and RHS # * AND/OR are handled separately as they are N-ary # * so is NOT as being unary -# * BETWEEN without paranthesis around the ANDed arguments (which -# makes it a non-binary op) is detected and accomodated in +# * BETWEEN without parentheses around the ANDed arguments (which +# makes it a non-binary op) is detected and accommodated in # _recurse_parse() # * AS is not really an operator but is handled here as it's also LHS/RHS @@ -136,7 +137,7 @@ my $tokenizer_re = join("\n\t|\n", # this one *is* capturing for the split below # splits on whitespace if all else fails -# has to happen before the composiign qr's are anchored (below) +# has to happen before the composing qr's are anchored (below) $tokenizer_re = qr/ \s* ( $tokenizer_re ) \s* | \s+ /x; # Parser states for _recurse_parse() @@ -169,7 +170,8 @@ for ( $_ = qr/ \A $_ \z /x; } - +# what can be bunched together under one MISC in an AST +my $compressable_node_re = qr/^ \- (?: MISC | LITERAL | PLACEHOLDER ) $/x; my %indents = ( select => 0, @@ -417,7 +419,7 @@ sub _recurse_parse { @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); } - @left = [$op => [ @left, @right ]]; + push @left, [$op => [ (@left ? pop @left : ''), @right ]]; } # unary op keywords @@ -447,23 +449,50 @@ sub _recurse_parse { } # we're now in "unknown token" land - start eating tokens until - # we see something familiar + # we see something familiar, OR in the case of RHS (binop) stop + # after the first token + # Also stop processing when we could end up with an unknown func else { my @lits = [ -LITERAL => [$token] ]; - while (@$tokens and $tokens->[0] !~ $all_std_keywords_re) { - push @lits, [ -LITERAL => [ shift @$tokens ] ]; - } + unshift @lits, pop @left if @left == 1; - if (@left == 1) { - unshift @lits, pop @left; - } + unless ( $state == PARSE_RHS ) { + while ( + @$tokens + and + $tokens->[0] !~ $all_std_keywords_re + and + ! ( @$tokens > 1 and $tokens->[1] eq '(' ) + ) { + push @lits, [ -LITERAL => [ shift @$tokens ] ]; + } + } @lits = [ -MISC => [ @lits ] ] if @lits > 1; push @left, @lits; } + # compress -LITERAL -MISC and -PLACEHOLDER pieces into a single + # -MISC container + if (@left > 1) { + my $i = 0; + while ($#left > $i) { + if ($left[$i][0] =~ $compressable_node_re and $left[$i+1][0] =~ $compressable_node_re) { + splice @left, $i, 2, [ -MISC => [ + map { $_->[0] eq '-MISC' ? @{$_->[1]} : $_ } (@left[$i, $i+1]) + ]]; + } + else { + $i++; + } + } + } + + return @left if $state == PARSE_RHS; + + # deal with post-fix operators if (@$tokens) { # asc/desc if ($tokens->[0] =~ $asc_desc_re) { @@ -630,12 +659,12 @@ sub _parenthesis_unroll { } # unroll nested parenthesis - while ( @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { + while ( $ast->[0] ne 'IN' and @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { $child = $child->[1][0]; $changes++; } - # if the parent operator explcitly allows it nuke the parenthesis + # if the parent operator explicitly allows it nuke the parenthesis if ( $ast->[0] =~ $unrollable_ops_re ) { push @children, @{$child->[1]}; $changes++;