X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTree.pm;h=9edc1d722dd41ef7cebaf9ef386b686e312c965f;hb=a4d17ff1e4ca5f981aacbeab10b0efa93ac047d9;hp=60a1aa220f307e561b3b9ea94610463274bb9f0f;hpb=c84a43217912905d7fdab027f7ca8641003665ea;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract/Tree.pm b/lib/SQL/Abstract/Tree.pm index 60a1aa2..9edc1d7 100644 --- a/lib/SQL/Abstract/Tree.pm +++ b/lib/SQL/Abstract/Tree.pm @@ -68,6 +68,7 @@ my @expression_start_keywords = ( 'HAVING', 'ORDER \s+ BY', 'SKIP', + 'FETCH', 'FIRST', 'LIMIT', 'OFFSET', @@ -82,7 +83,6 @@ my @expression_start_keywords = ( 'SAVEPOINT', 'RELEASE \s+ SAVEPOINT', 'RETURNING', - 'ROW_NUMBER \s* \( \s* \) \s+ OVER', ); my $expr_start_re = join ("\n\t|\n", @expression_start_keywords ); @@ -91,8 +91,8 @@ $expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; # These are binary operator keywords always a single LHS and RHS # * AND/OR are handled separately as they are N-ary # * so is NOT as being unary -# * BETWEEN without paranthesis around the ANDed arguments (which -# makes it a non-binary op) is detected and accomodated in +# * BETWEEN without parentheses around the ANDed arguments (which +# makes it a non-binary op) is detected and accommodated in # _recurse_parse() # * AS is not really an operator but is handled here as it's also LHS/RHS @@ -114,7 +114,9 @@ $binary_op_re = join "\n\t|\n", ; $binary_op_re = qr/$binary_op_re/x; -my $unary_op_re = '(?: NOT \s+ EXISTS | NOT )'; +my $rno_re = qr/ROW_NUMBER \s* \( \s* \) \s+ OVER/ix; + +my $unary_op_re = 'NOT \s+ EXISTS | NOT | ' . $rno_re; $unary_op_re = join "\n\t|\n", "$op_look_behind (?i: $unary_op_re ) $op_look_ahead", ; @@ -136,7 +138,7 @@ my $tokenizer_re = join("\n\t|\n", # this one *is* capturing for the split below # splits on whitespace if all else fails -# has to happen before the composiign qr's are anchored (below) +# has to happen before the composing qr's are anchored (below) $tokenizer_re = qr/ \s* ( $tokenizer_re ) \s* | \s+ /x; # Parser states for _recurse_parse() @@ -169,7 +171,8 @@ for ( $_ = qr/ \A $_ \z /x; } - +# what can be bunched together under one MISC in an AST +my $compressable_node_re = qr/^ \- (?: MISC | LITERAL | PLACEHOLDER ) $/x; my %indents = ( select => 0, @@ -417,12 +420,16 @@ sub _recurse_parse { @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); } - @left = [$op => [ @left, @right ]]; + push @left, [$op => [ (@left ? pop @left : ''), @right ]]; } # unary op keywords elsif ( $token =~ $unary_op_re ) { my $op = uc $token; + + # normalize RNO explicitly + $op = 'ROW_NUMBER() OVER' if $op =~ /^$rno_re$/; + my @right = $self->_recurse_parse ($tokens, PARSE_RHS); push @left, [ $op => \@right ]; @@ -453,6 +460,8 @@ sub _recurse_parse { else { my @lits = [ -LITERAL => [$token] ]; + unshift @lits, pop @left if @left == 1; + unless ( $state == PARSE_RHS ) { while ( @$tokens @@ -462,27 +471,38 @@ sub _recurse_parse { ! ( @$tokens > 1 and $tokens->[1] eq '(' ) ) { push @lits, [ -LITERAL => [ shift @$tokens ] ]; - } + } } - if (@left == 1) { - unshift @lits, pop @left; - } - @lits = [ -MISC => [ @lits ] ] if @lits > 1; push @left, @lits; } - if (@$tokens) { + # compress -LITERAL -MISC and -PLACEHOLDER pieces into a single + # -MISC container + if (@left > 1) { + my $i = 0; + while ($#left > $i) { + if ($left[$i][0] =~ $compressable_node_re and $left[$i+1][0] =~ $compressable_node_re) { + splice @left, $i, 2, [ -MISC => [ + map { $_->[0] eq '-MISC' ? @{$_->[1]} : $_ } (@left[$i, $i+1]) + ]]; + } + else { + $i++; + } + } + } - # deal with post-fix operators (asc/desc) + return @left if $state == PARSE_RHS; + + # deal with post-fix operators + if (@$tokens) { + # asc/desc if ($tokens->[0] =~ $asc_desc_re) { - return @left if $state == PARSE_RHS; @left = [ ('-' . uc (shift @$tokens)) => [ @left ] ]; } - - return @left if $state == PARSE_RHS and $left[-1][0] eq '-LITERAL'; } } } @@ -643,25 +663,38 @@ sub _parenthesis_unroll { next; } + my $parent_op = $ast->[0]; + # unroll nested parenthesis - while ( @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { + while ( $parent_op ne 'IN' and @{$child->[1]} == 1 and $child->[1][0][0] eq '-PAREN') { $child = $child->[1][0]; $changes++; } - # if the parent operator explcitly allows it nuke the parenthesis - if ( $ast->[0] =~ $unrollable_ops_re ) { + # set to CHILD in the case of PARENT ( CHILD ) + # but NOT in the case of PARENT( CHILD1, CHILD2 ) + my $single_child_op = (@{$child->[1]} == 1) ? $child->[1][0][0] : ''; + + my $child_op_argc = $single_child_op ? scalar @{$child->[1][0][1]} : undef; + + my $single_grandchild_op + = ( $child_op_argc||0 == 1 and ref $child->[1][0][1][0] eq 'ARRAY' ) + ? $child->[1][0][1][0][0] + : '' + ; + + # if the parent operator explicitly allows it AND the child isn't a subselect + # nuke the parenthesis + if ($parent_op =~ $unrollable_ops_re and $single_child_op ne 'SELECT') { push @children, @{$child->[1]}; $changes++; } # if the parenthesis are wrapped around an AND/OR matching the parent AND/OR - open the parenthesis up and merge the list elsif ( - @{$child->[1]} == 1 - and - ( $ast->[0] eq 'AND' or $ast->[0] eq 'OR') - and - $child->[1][0][0] eq $ast->[0] + $single_child_op eq $parent_op + and + ( $parent_op eq 'AND' or $parent_op eq 'OR') ) { push @children, @{$child->[1][0][1]}; $changes++; @@ -670,13 +703,9 @@ sub _parenthesis_unroll { # only *ONE* LITERAL or placeholder element # as an AND/OR/NOT argument elsif ( - @{$child->[1]} == 1 && ( - $child->[1][0][0] eq '-LITERAL' - or - $child->[1][0][0] eq '-PLACEHOLDER' - ) && ( - $ast->[0] eq 'AND' or $ast->[0] eq 'OR' or $ast->[0] eq 'NOT' - ) + ( $single_child_op eq '-LITERAL' or $single_child_op eq '-PLACEHOLDER' ) + and + ( $parent_op eq 'AND' or $parent_op eq 'OR' or $parent_op eq 'NOT' ) ) { push @children, @{$child->[1]}; $changes++; @@ -689,20 +718,18 @@ sub _parenthesis_unroll { # break precedence) or when the child is BETWEEN (special # case) elsif ( - @{$child->[1]} == 1 - and - ($ast->[0] eq 'AND' or $ast->[0] eq 'OR') + ($parent_op eq 'AND' or $parent_op eq 'OR') and - $child->[1][0][0] =~ $binary_op_re + $single_child_op =~ $binary_op_re and - $child->[1][0][0] ne 'BETWEEN' + $single_child_op ne 'BETWEEN' and - @{$child->[1][0][1]} == 2 + $child_op_argc == 2 and ! ( - $child->[1][0][0] =~ $alphanum_cmp_op_re + $single_child_op =~ $alphanum_cmp_op_re and - $ast->[0] =~ $alphanum_cmp_op_re + $parent_op =~ $alphanum_cmp_op_re ) ) { push @children, @{$child->[1]}; @@ -716,20 +743,20 @@ sub _parenthesis_unroll { # or a single non-mathop with a single LITERAL ( nonmathop foo ) # or a single non-mathop with a single PLACEHOLDER ( nonmathop ? ) elsif ( - @{$child->[1]} == 1 + $single_child_op and - @{$child->[1][0][1]} == 1 + $parent_op =~ $alphanum_cmp_op_re and - $ast->[0] =~ $alphanum_cmp_op_re + $single_child_op !~ $alphanum_cmp_op_re and - $child->[1][0][0] !~ $alphanum_cmp_op_re + $child_op_argc == 1 and ( - $child->[1][0][1][0][0] eq '-PAREN' + $single_grandchild_op eq '-PAREN' or - $child->[1][0][1][0][0] eq '-LITERAL' + $single_grandchild_op eq '-LITERAL' or - $child->[1][0][1][0][0] eq '-PLACEHOLDER' + $single_grandchild_op eq '-PLACEHOLDER' ) ) { push @children, @{$child->[1]}; @@ -738,16 +765,17 @@ sub _parenthesis_unroll { # a construct of ... ( somefunc ( ... ) ) ... can safely lose the outer parens # except for the case of ( NOT ( ... ) ) which has already been handled earlier + # and except for the case of RNO, where the double are explicit syntax elsif ( - @{$child->[1]} == 1 + $parent_op ne 'ROW_NUMBER() OVER' and - @{$child->[1][0][1]} == 1 + $single_child_op and - $child->[1][0][0] ne 'NOT' + $single_child_op ne 'NOT' and - ref $child->[1][0][1][0] eq 'ARRAY' + $child_op_argc == 1 and - $child->[1][0][1][0][0] eq '-PAREN' + $single_grandchild_op eq '-PAREN' ) { push @children, @{$child->[1]}; $changes++;