X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract%2FTree.pm;h=8e743a64dd469f7b495d453ff43610f744164fae;hb=1ec9b9e3261f37de1bd05b31fa3c88ab78ab1480;hp=c4c9cdcf0592a82a2fc798cf925a892322185598;hpb=6f2a5b668d6d34e8aee21c2b0cf51fdbf5dee991;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract/Tree.pm b/lib/SQL/Abstract/Tree.pm index c4c9cdc..8e743a6 100644 --- a/lib/SQL/Abstract/Tree.pm +++ b/lib/SQL/Abstract/Tree.pm @@ -9,10 +9,10 @@ use Hash::Merge qw//; use base 'Class::Accessor::Grouped'; -__PACKAGE__->mk_group_accessors( simple => $_ ) for qw( +__PACKAGE__->mk_group_accessors( simple => qw( newline indent_string indent_amount colormap indentmap fill_in_placeholders placeholder_surround -); +)); my $merger = Hash::Merge->new; @@ -97,50 +97,47 @@ $expr_start_re = qr/ $op_look_behind (?i: $expr_start_re ) $op_look_ahead /x; # * AS is not really an operator but is handled here as it's also LHS/RHS # this will be included in the $binary_op_re, the distinction is interesting during -# testing as one is tighter than the other, plus mathops have different look -# ahead/behind (e.g. "x"="y" ) -my @math_op_keywords = (qw/ < > != <> = <= >= /); -my $math_re = join ("\n\t|\n", map +# testing as one is tighter than the other, plus alphanum cmp ops have different +# look ahead/behind (e.g. "x"="y" ) +my @alphanum_cmp_op_keywords = (qw/< > != <> = <= >= /); +my $alphanum_cmp_op_re = join ("\n\t|\n", map { "(?: (?<= [\\w\\s] | $quote_right ) | \\A )" . quotemeta ($_) . "(?: (?= [\\w\\s] | $quote_left ) | \\z )" } - @math_op_keywords + @alphanum_cmp_op_keywords ); -$math_re = qr/$math_re/x; - -sub _math_op_re { $math_re } - +$alphanum_cmp_op_re = qr/$alphanum_cmp_op_re/x; my $binary_op_re = '(?: NOT \s+)? (?:' . join ('|', qw/IN BETWEEN R?LIKE/) . ')'; $binary_op_re = join "\n\t|\n", "$op_look_behind (?i: $binary_op_re | AS ) $op_look_ahead", - $math_re, + $alphanum_cmp_op_re, $op_look_behind . 'IS (?:\s+ NOT)?' . "(?= \\s+ NULL \\b | $op_look_ahead )", ; $binary_op_re = qr/$binary_op_re/x; -sub _binary_op_re { $binary_op_re } - my $unary_op_re = '(?: NOT \s+ EXISTS | NOT )'; $unary_op_re = join "\n\t|\n", "$op_look_behind (?i: $unary_op_re ) $op_look_ahead", ; $unary_op_re = qr/$unary_op_re/x; -sub _unary_op_re { $unary_op_re } +my $asc_desc_re = qr/$op_look_behind (?i: ASC | DESC ) $op_look_ahead /x; +my $and_or_re = qr/$op_look_behind (?i: AND | OR ) $op_look_ahead /x; -my $all_known_re = join("\n\t|\n", +my $tokenizer_re = join("\n\t|\n", $expr_start_re, $binary_op_re, $unary_op_re, - "$op_look_behind (?i: AND|OR|\\* ) $op_look_ahead", + $asc_desc_re, + $and_or_re, + $op_look_behind . ' \* ' . $op_look_ahead, (map { quotemeta $_ } qw/, ( )/), $placeholder_re, ); -$all_known_re = qr/$all_known_re/x; - -#this one *is* capturing for the split below +# this one *is* capturing for the split below # splits on whitespace if all else fails -my $tokenizer_re = qr/ \s* ( $all_known_re ) \s* | \s+ /x; +# has to happen before the composiign qr's are anchored (below) +$tokenizer_re = qr/ \s* ( $tokenizer_re ) \s* | \s+ /x; # Parser states for _recurse_parse() use constant PARSE_TOP_LEVEL => 0; @@ -150,11 +147,28 @@ use constant PARSE_IN_FUNC => 3; use constant PARSE_RHS => 4; use constant PARSE_LIST_ELT => 5; -my $asc_desc_re = qr/^ (?: ASC | DESC ) $/xi; -my $expr_term_re = qr/ ^ (?: $expr_start_re | \) ) $/x; -my $rhs_term_re = qr/ ^ (?: $expr_term_re | $binary_op_re | $unary_op_re | $asc_desc_re | (?i: AND | OR | \, ) ) $/x; -my $common_single_args_re = qr/ ^ (?: \* | $placeholder_re ) $/x; -my $all_std_keywords_re = qr/^ (?: $rhs_term_re | \( | $common_single_args_re ) $/x; +my $expr_term_re = qr/$expr_start_re | \)/x; +my $rhs_term_re = qr/ $expr_term_re | $binary_op_re | $unary_op_re | $asc_desc_re | $and_or_re | \, /x; +my $all_std_keywords_re = qr/ $rhs_term_re | \( | $placeholder_re /x; + +# anchor everything - even though keywords are separated by the tokenizer, leakage may occur +for ( + $quote_left, + $quote_right, + $placeholder_re, + $expr_start_re, + $alphanum_cmp_op_re, + $binary_op_re, + $unary_op_re, + $asc_desc_re, + $and_or_re, + $expr_term_re, + $rhs_term_re, + $all_std_keywords_re, +) { + $_ = qr/ \A $_ \z /x; +} + my %indents = ( @@ -337,7 +351,7 @@ sub _recurse_parse { or ($state == PARSE_RHS && $tokens->[0] =~ $rhs_term_re ) or - ($state == PARSE_LIST_ELT && $tokens->[0] =~ qr/^ (?: $expr_term_re | \, ) $/x) + ($state == PARSE_LIST_ELT && ( $tokens->[0] eq ',' or $tokens->[0] =~ $expr_term_re ) ) ) { return @left; } @@ -354,7 +368,7 @@ sub _recurse_parse { } # AND/OR - elsif ($token =~ /^ (?: OR | AND ) $/ix ) { + elsif ($token =~ $and_or_re) { my $op = uc $token; my @right = $self->_recurse_parse($tokens, PARSE_IN_EXPR); @@ -428,7 +442,7 @@ sub _recurse_parse { } # check if the current token is an unknown op-start - elsif (@$tokens and $tokens->[0] =~ qr/^ (?: \( | $common_single_args_re ) $/x ) { + elsif (@$tokens and ($tokens->[0] eq '(' or $tokens->[0] =~ $placeholder_re ) ) { push @left, [ $token => [ $self->_recurse_parse($tokens, PARSE_RHS) ] ]; } @@ -450,20 +464,10 @@ sub _recurse_parse { push @left, @lits; } - # deal with post-fix operators (only when sql is sane - i.e. we have one element to apply to) - if (@left == 1 and @$tokens) { - + if (@$tokens) { # asc/desc if ($tokens->[0] =~ $asc_desc_re) { - my $op = shift @$tokens; - - # if -MISC - this is a literal collection, do not promote asc/desc to an op - if ($left[0][0] eq '-MISC') { - push @{$left[0][1]}, [ -LITERAL => [ $op ] ]; - } - else { - @left = [ ('-' . uc ($op)) => [ @left ] ]; - } + @left = [ ('-' . uc (shift @$tokens)) => [ @left ] ]; } } } @@ -565,7 +569,7 @@ sub _unparse { ) ); } - elsif ($op eq 'AND' or $op eq 'OR' or $op =~ / ^ $binary_op_re $ /x ) { + elsif ($op eq 'AND' or $op eq 'OR' or $op =~ $binary_op_re ) { return join (" $op ", map $self->_unparse($_, $bindargs, $depth), @{$args}); } elsif ($op eq '-LIST' ) { @@ -574,16 +578,23 @@ sub _unparse { elsif ($op eq '-MISC' ) { return join (' ', map $self->_unparse($_, $bindargs, $depth), @{$args}); } + elsif ($op =~ qr/^-(ASC|DESC)$/ ) { + my $dir = $1; + return join (' ', (map $self->_unparse($_, $bindargs, $depth), @{$args}), $dir); + } else { my ($l, $r) = @{$self->pad_keyword($op, $depth)}; - return sprintf "$l%s%s%s$r", - $self->format_keyword($op), + + my $rhs = $self->_unparse($args, $bindargs, $depth); + + return sprintf "$l%s$r", join( ( ref $args eq 'ARRAY' and @{$args} == 1 and $args->[0][0] eq '-PAREN' ) ? '' # mysql-- : ' ' , - $self->_unparse($args, $bindargs, $depth), - ; + $self->format_keyword($op), + (length $rhs ? $rhs : () ), + ); } } @@ -668,16 +679,16 @@ sub _parenthesis_unroll { and ($ast->[0] eq 'AND' or $ast->[0] eq 'OR') and - $child->[1][0][0] =~ SQL::Abstract::Tree::_binary_op_re() + $child->[1][0][0] =~ $binary_op_re and $child->[1][0][0] ne 'BETWEEN' and @{$child->[1][0][1]} == 2 and ! ( - $child->[1][0][0] =~ SQL::Abstract::Tree::_math_op_re() + $child->[1][0][0] =~ $alphanum_cmp_op_re and - $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() + $ast->[0] =~ $alphanum_cmp_op_re ) ) { push @children, @{$child->[1]}; @@ -695,9 +706,9 @@ sub _parenthesis_unroll { and @{$child->[1][0][1]} == 1 and - $ast->[0] =~ SQL::Abstract::Tree::_math_op_re() + $ast->[0] =~ $alphanum_cmp_op_re and - $child->[1][0][0] !~ SQL::Abstract::Tree::_math_op_re + $child->[1][0][0] !~ $alphanum_cmp_op_re and ( $child->[1][0][1][0][0] eq '-PAREN' @@ -711,6 +722,23 @@ sub _parenthesis_unroll { $changes++; } + # a construct of ... ( somefunc ( ... ) ) ... can safely lose the outer parens + # except for the case of ( NOT ( ... ) ) which has already been handled earlier + elsif ( + @{$child->[1]} == 1 + and + @{$child->[1][0][1]} == 1 + and + $child->[1][0][0] ne 'NOT' + and + ref $child->[1][0][1][0] eq 'ARRAY' + and + $child->[1][0][1][0][0] eq '-PAREN' + ) { + push @children, @{$child->[1]}; + $changes++; + } + # otherwise no more mucking for this pass else { @@ -723,6 +751,30 @@ sub _parenthesis_unroll { } while ($changes); } +sub _strip_asc_from_order_by { + my ($self, $ast) = @_; + + return $ast if ( + ref $ast ne 'ARRAY' + or + $ast->[0] ne 'ORDER BY' + ); + + + my $to_replace; + + if (@{$ast->[1]} == 1 and $ast->[1][0][0] eq '-ASC') { + $to_replace = [ $ast->[1][0] ]; + } + elsif (@{$ast->[1]} == 1 and $ast->[1][0][0] eq '-LIST') { + $to_replace = [ grep { $_->[0] eq '-ASC' } @{$ast->[1][0][1]} ]; + } + + @$_ = @{$_->[1][0]} for @$to_replace; + + $ast; +} + sub format { my $self = shift; $self->unparse($self->parse($_[0]), $_[1]) } 1; @@ -798,7 +850,7 @@ structure of the returned tree. It may be stable at some point, but not yet. =head2 unparse - $sqlat->parse($tree_structure, \@bindargs) + $sqlat->unparse($tree_structure, \@bindargs) Transform "tree" into SQL, applying various transforms on the way.