X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=dbsrgits%2FSQL-Abstract.git;a=blobdiff_plain;f=lib%2FSQL%2FAbstract.pm;h=692816fa001c279174f941f887ccad971cd329af;hp=919101059f14b32a6b472a393eb617284e462bff;hb=1f490ae449fd0c9dc413b776cbc77557f79d6ab5;hpb=07936978f41bd6a48f2f8eaf38361a2e36ca71bd diff --git a/lib/SQL/Abstract.pm b/lib/SQL/Abstract.pm index 9191010..692816f 100644 --- a/lib/SQL/Abstract.pm +++ b/lib/SQL/Abstract.pm @@ -1,41 +1,58 @@ package SQL::Abstract; # see doc at end of file -# LDNOTE : this code is heavy refactoring from original SQLA. -# Several design decisions will need discussion during -# the test / diffusion / acceptance phase; those are marked with flag -# 'LDNOTE' (note by laurent.dami AT free.fr) - -use Carp; use strict; use warnings; +use Carp (); use List::Util (); use Scalar::Util (); +use Exporter 'import'; +our @EXPORT_OK = qw(is_plain_value is_literal_value); + +BEGIN { + if ($] < 5.009_005) { + require MRO::Compat; + } + else { + require mro; + } + + *SQL::Abstract::_ENV_::DETECT_AUTOGENERATED_STRINGIFICATION = $ENV{SQLA_ISVALUE_IGNORE_AUTOGENERATED_STRINGIFICATION} + ? sub () { 0 } + : sub () { 1 } + ; +} + #====================================================================== # GLOBALS #====================================================================== -our $VERSION = '1.61'; +our $VERSION = '1.78'; # This would confuse some packagers -#$VERSION = eval $VERSION; # numify for warning-free dev releases +$VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases our $AUTOLOAD; # special operators (-in, -between). May be extended/overridden by user. # See section WHERE: BUILTIN SPECIAL OPERATORS below for implementation my @BUILTIN_SPECIAL_OPS = ( - {regex => qr/^(not )?between$/i, handler => '_where_field_BETWEEN'}, - {regex => qr/^(not )?in$/i, handler => '_where_field_IN'}, + {regex => qr/^ (?: not \s )? between $/ix, handler => '_where_field_BETWEEN'}, + {regex => qr/^ (?: not \s )? in $/ix, handler => '_where_field_IN'}, + {regex => qr/^ ident $/ix, handler => '_where_op_IDENT'}, + {regex => qr/^ value $/ix, handler => '_where_op_VALUE'}, + {regex => qr/^ is (?: \s+ not )? $/ix, handler => '_where_field_IS'}, ); # unaryish operators - key maps to handler my @BUILTIN_UNARY_OPS = ( # the digits are backcompat stuff - { regex => qr/^and (?: \s? \d+ )? $/xi, handler => '_where_op_ANDOR' }, - { regex => qr/^or (?: \s? \d+ )? $/xi, handler => '_where_op_ANDOR' }, - { regex => qr/^nest (?: \s? \d+ )? $/xi, handler => '_where_op_NEST' }, - { regex => qr/^ (?: not \s )? bool $/xi, handler => '_where_op_BOOL' }, + { regex => qr/^ and (?: [_\s]? \d+ )? $/xi, handler => '_where_op_ANDOR' }, + { regex => qr/^ or (?: [_\s]? \d+ )? $/xi, handler => '_where_op_ANDOR' }, + { regex => qr/^ nest (?: [_\s]? \d+ )? $/xi, handler => '_where_op_NEST' }, + { regex => qr/^ (?: not \s )? bool $/xi, handler => '_where_op_BOOL' }, + { regex => qr/^ ident $/xi, handler => '_where_op_IDENT' }, + { regex => qr/^ value $/xi, handler => '_where_op_VALUE' }, ); #====================================================================== @@ -50,15 +67,78 @@ sub _debug { sub belch (@) { my($func) = (caller(1))[3]; - carp "[$func] Warning: ", @_; + Carp::carp "[$func] Warning: ", @_; } sub puke (@) { my($func) = (caller(1))[3]; - croak "[$func] Fatal: ", @_; + Carp::croak "[$func] Fatal: ", @_; +} + +sub is_literal_value ($) { + ref $_[0] eq 'SCALAR' ? [ ${$_[0]} ] + : ( ref $_[0] eq 'REF' and ref ${$_[0]} eq 'ARRAY' ) ? [ @${ $_[0] } ] + : ( + ref $_[0] eq 'HASH' and keys %{$_[0]} == 1 + and + defined $_[0]->{-ident} and ! length ref $_[0]->{-ident} + ) ? [ $_[0]->{-ident} ] + : undef; +} + +# FIXME XSify - this can be done so much more efficiently +sub is_plain_value ($) { + no strict 'refs'; + ! length ref $_[0] ? \($_[0]) + : ( + ref $_[0] eq 'HASH' and keys %{$_[0]} == 1 + and + exists $_[0]->{-value} + ) ? \($_[0]->{-value}) + : ( + # reuse @_ for even moar speedz + defined ( $_[1] = Scalar::Util::blessed $_[0] ) + and + # deliberately not using Devel::OverloadInfo - the checks we are + # intersted in are much more limited than the fullblown thing, and + # this is a very hot piece of code + ( + # simply using ->can('(""') can leave behind stub methods that + # break actually using the overload later (see L and the source of overload::mycan()) + # + # either has stringification which DBI SHOULD prefer out of the box + grep { *{ (qq[${_}::(""]) }{CODE} } @{ $_[2] = mro::get_linear_isa( $_[1] ) } + or + # has nummification or boolification, AND fallback is *not* disabled + ( + SQL::Abstract::_ENV_::DETECT_AUTOGENERATED_STRINGIFICATION + and + ( + grep { *{"${_}::(0+"}{CODE} } @{$_[2]} + or + grep { *{"${_}::(bool"}{CODE} } @{$_[2]} + ) + and + ( + # no fallback specified at all + ! ( ($_[3]) = grep { *{"${_}::()"}{CODE} } @{$_[2]} ) + or + # fallback explicitly undef + ! defined ${"$_[3]::()"} + or + # explicitly true + !! ${"$_[3]::()"} + ) + ) + ) + ) ? \($_[0]) + : undef; } + #====================================================================== # NEW #====================================================================== @@ -75,42 +155,59 @@ sub new { $opt{logic} = $opt{logic} ? uc $opt{logic} : 'OR'; # how to return bind vars - # LDNOTE: changed nwiger code : why this 'delete' ?? - # $opt{bindtype} ||= delete($opt{bind_type}) || 'normal'; $opt{bindtype} ||= 'normal'; # default comparison is "=", but can be overridden $opt{cmp} ||= '='; - # generic SQL comparison operators - my $anchored_cmp_ops = join ('|', map { '^' . $_ . '$' } ( - '(?:is \s+)? (?:not \s+)? like', - 'is', - (map { quotemeta($_) } (qw/ < > != <> = <= >= /) ), - )); - $opt{cmp_ops} = qr/$anchored_cmp_ops/ix; + # try to recognize which are the 'equality' and 'inequality' ops + # (temporary quickfix (in 2007), should go through a more seasoned API) + $opt{equality_op} = qr/^( \Q$opt{cmp}\E | \= )$/ix; + $opt{inequality_op} = qr/^( != | <> )$/ix; - # try to recognize which are the 'equality' and 'unequality' ops - # (temporary quickfix, should go through a more seasoned API) - $opt{equality_op} = qr/^(\Q$opt{cmp}\E|is|(is\s+)?like)$/i; - $opt{inequality_op} = qr/^(!=|<>|(is\s+)?not(\s+like)?)$/i; + $opt{like_op} = qr/^ (is\s+)? r?like $/xi; + $opt{not_like_op} = qr/^ (is\s+)? not \s+ r?like $/xi; # SQL booleans $opt{sqltrue} ||= '1=1'; $opt{sqlfalse} ||= '0=1'; - # special operators + # special operators $opt{special_ops} ||= []; + # regexes are applied in order, thus push after user-defines push @{$opt{special_ops}}, @BUILTIN_SPECIAL_OPS; - # unary operators + # unary operators $opt{unary_ops} ||= []; push @{$opt{unary_ops}}, @BUILTIN_UNARY_OPS; + # rudimentary sanity-check for user supplied bits treated as functions/operators + # If a purported function matches this regular expression, an exception is thrown. + # Literal SQL is *NOT* subject to this check, only functions (and column names + # when quoting is not in effect) + + # FIXME + # need to guard against ()'s in column names too, but this will break tons of + # hacks... ideas anyone? + $opt{injection_guard} ||= qr/ + \; + | + ^ \s* go \s + /xmi; + return bless \%opt, $class; } +sub _assert_pass_injection_guard { + if ($_[1] =~ $_[0]->{injection_guard}) { + my $class = ref $_[0]; + puke "Possible SQL injection attempt '$_[1]'. If this is indeed a part of the " + . "desired SQL use literal SQL ( \'...' or \[ '...' ] ) or supply your own " + . "{injection_guard} attribute to ${class}->new()" + } +} + #====================================================================== # INSERT methods @@ -126,22 +223,26 @@ sub insert { my ($sql, @bind) = $self->$method($data); $sql = join " ", $self->_sqlcase('insert into'), $table, $sql; - if (my $ret = $options->{returning}) { - $sql .= $self->_insert_returning ($ret); + if ($options->{returning}) { + my ($s, @b) = $self->_insert_returning ($options); + $sql .= $s; + push @bind, @b; } return wantarray ? ($sql, @bind) : $sql; } sub _insert_returning { - my ($self, $fields) = @_; + my ($self, $options) = @_; + + my $f = $options->{returning}; - my $f = $self->_SWITCH_refkind($fields, { - ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$fields;}, - SCALAR => sub {$self->_quote($fields)}, - SCALARREF => sub {$$fields}, + my $fieldlist = $self->_SWITCH_refkind($f, { + ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$f;}, + SCALAR => sub {$self->_quote($f)}, + SCALARREF => sub {$$f}, }); - return join (' ', $self->_sqlcase(' returning'), $f); + return $self->_sqlcase(' returning ') . $fieldlist; } sub _insert_HASHREF { # explicit list of fields and then values @@ -199,7 +300,7 @@ sub _insert_values { $self->_SWITCH_refkind($v, { - ARRAYREF => sub { + ARRAYREF => sub { if ($self->{array_datatypes}) { # if array datatype are activated push @values, '?'; push @all_bind, $self->_bindtype($column, $v); @@ -219,7 +320,7 @@ sub _insert_values { push @all_bind, @bind; }, - # THINK : anything useful to do with a HASHREF ? + # THINK : anything useful to do with a HASHREF ? HASHREF => sub { # (nothing, but old SQLA passed it through) #TODO in SQLA >= 2.0 it will die instead belch "HASH ref as bind value in insert is not supported"; @@ -268,7 +369,7 @@ sub update { my $label = $self->_quote($k); $self->_SWITCH_refkind($v, { - ARRAYREF => sub { + ARRAYREF => sub { if ($self->{array_datatypes}) { # array datatype push @set, "$label = ?"; push @all_bind, $self->_bindtype($k, $v); @@ -288,7 +389,19 @@ sub update { }, SCALARREF => sub { # literal SQL without bind push @set, "$label = $$v"; - }, + }, + HASHREF => sub { + my ($op, $arg, @rest) = %$v; + + puke 'Operator calls in update must be in the form { -op => $arg }' + if (@rest or not $op =~ /^\-(.+)/); + + local $self->{_nested_func_lhs} = $k; + my ($sql, @bind) = $self->_where_unary_op ($1, $arg); + + push @set, "$label = $sql"; + push @all_bind, @bind; + }, SCALAR_or_UNDEF => sub { push @set, "$label = ?"; push @all_bind, $self->_bindtype($k, $v); @@ -328,11 +441,11 @@ sub select { my $f = (ref $fields eq 'ARRAY') ? join ', ', map { $self->_quote($_) } @$fields : $fields; - my $sql = join(' ', $self->_sqlcase('select'), $f, + my $sql = join(' ', $self->_sqlcase('select'), $f, $self->_sqlcase('from'), $table) . $where_sql; - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } #====================================================================== @@ -349,7 +462,7 @@ sub delete { my($where_sql, @bind) = $self->where($where); my $sql = $self->_sqlcase('delete from') . " $table" . $where_sql; - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } @@ -372,7 +485,7 @@ sub where { $sql .= $self->_order_by($order); } - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } @@ -382,12 +495,17 @@ sub _recurse_where { # dispatch on appropriate method according to refkind of $where my $method = $self->_METHOD_FOR_refkind("_where", $where); + my ($sql, @bind) = $self->$method($where, $logic); - my ($sql, @bind) = $self->$method($where, $logic); - - # DBIx::Class directly calls _recurse_where in scalar context, so - # we must implement it, even if not in the official API - return wantarray ? ($sql, @bind) : $sql; + # DBIx::Class used to call _recurse_where in scalar context + # something else might too... + if (wantarray) { + return ($sql, @bind); + } + else { + belch "Calling _recurse_where in scalar context is deprecated and will go away before 2.0"; + return $sql; + } } @@ -407,7 +525,7 @@ sub _where_ARRAYREF { my (@sql_clauses, @all_bind); # need to use while() so can shift() for pairs - while (my $el = shift @clauses) { + while (my $el = shift @clauses) { # switch according to kind of $el and get corresponding ($sql, @bind) my ($sql, @bind) = $self->_SWITCH_refkind($el, { @@ -415,14 +533,13 @@ sub _where_ARRAYREF { # skip empty elements, otherwise get invalid trailing AND stuff ARRAYREF => sub {$self->_recurse_where($el) if @$el}, - ARRAYREFREF => sub { @{${$el}} if @{${$el}}}, + ARRAYREFREF => sub { + my ($s, @b) = @$$el; + $self->_assert_bindval_matches_bindtype(@b); + ($s, @b); + }, HASHREF => sub {$self->_recurse_where($el, 'and') if %$el}, - # LDNOTE : previous SQLA code for hashrefs was creating a dirty - # side-effect: the first hashref within an array would change - # the global logic to 'AND'. So [ {cond1, cond2}, [cond3, cond4] ] - # was interpreted as "(cond1 AND cond2) OR (cond3 AND cond4)", - # whereas it should be "(cond1 AND cond2) OR (cond3 OR cond4)". SCALARREF => sub { ($$el); }, @@ -447,8 +564,8 @@ sub _where_ARRAYREF { sub _where_ARRAYREFREF { my ($self, $where) = @_; - my ($sql, @bind) = @{${$where}}; - + my ($sql, @bind) = @$$where; + $self->_assert_bindval_matches_bindtype(@bind); return ($sql, @bind); } @@ -468,32 +585,24 @@ sub _where_HASHREF { if ($k =~ /^-./) { # put the operator in canonical form my $op = $k; - $op =~ s/^-//; # remove initial dash - $op =~ s/[_\t ]+/ /g; # underscores and whitespace become single spaces + $op = substr $op, 1; # remove initial dash $op =~ s/^\s+|\s+$//g;# remove leading/trailing space + $op =~ s/\s+/ /g; # compress whitespace - $self->_debug("Unary OP(-$op) within hashref, recursing..."); + # so that -not_foo works correctly + $op =~ s/^not_/NOT /i; - my $op_entry = List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}}; - if (my $handler = $op_entry->{handler}) { - if (not ref $handler) { - if ($op =~ s/\s?\d+$//) { - belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' - . "You probably wanted ...-and => [ -$op => COND1, -$op => COND2 ... ]"; - } - $self->$handler ($op, $v); - } - elsif (ref $handler eq 'CODE') { - $handler->($self, $op, $v); - } - else { - puke "Illegal handler for operator $k - expecting a method name or a coderef"; - } - } - else { - $self->debug("Generic unary OP: $k - recursing as function"); - $self->_where_func_generic ($op, $v); - } + $self->_debug("Unary OP(-$op) within hashref, recursing..."); + my ($s, @b) = $self->_where_unary_op ($op, $v); + + # top level vs nested + # we assume that handled unary ops will take care of their ()s + $s = "($s)" unless ( + List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}} + or + defined($self->{_nested_func_lhs}) && ($self->{_nested_func_lhs} eq $k) + ); + ($s, @b); } else { my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $v); @@ -508,9 +617,31 @@ sub _where_HASHREF { return $self->_join_sql_clauses('and', \@sql_clauses, \@all_bind); } -sub _where_func_generic { +sub _where_unary_op { my ($self, $op, $rhs) = @_; + if (my $op_entry = List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}}) { + my $handler = $op_entry->{handler}; + + if (not ref $handler) { + if ($op =~ s/ [_\s]? \d+ $//x ) { + belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' + . "You probably wanted ...-and => [ -$op => COND1, -$op => COND2 ... ]"; + } + return $self->$handler ($op, $rhs); + } + elsif (ref $handler eq 'CODE') { + return $handler->($self, $op, $rhs); + } + else { + puke "Illegal handler for operator $op - expecting a method name or a coderef"; + } + } + + $self->_debug("Generic unary OP: $op - recursing as function"); + + $self->_assert_pass_injection_guard($op); + my ($sql, @bind) = $self->_SWITCH_refkind ($rhs, { SCALAR => sub { puke "Illegal use of top-level '$op'" @@ -526,9 +657,9 @@ sub _where_func_generic { }, }); - $sql = sprintf ('%s%s', + $sql = sprintf ('%s %s', $self->_sqlcase($op), - ($op =~ $self->{cmp_ops}) ? " $sql" : "( $sql )", + $sql, ); return ($sql, @bind); @@ -548,16 +679,24 @@ sub _where_op_ANDOR { : $self->_where_HASHREF($v); }, - SCALARREF => sub { - puke "-$op => \\\$scalar not supported, use -nest => ..."; + SCALARREF => sub { + puke "-$op => \\\$scalar makes little sense, use " . + ($op =~ /^or/i + ? '[ \$scalar, \%rest_of_conditions ] instead' + : '-and => [ \$scalar, \%rest_of_conditions ] instead' + ); }, ARRAYREFREF => sub { - puke "-$op => \\[..] not supported, use -nest => ..."; + puke "-$op => \\[...] makes little sense, use " . + ($op =~ /^or/i + ? '[ \[...], \%rest_of_conditions ] instead' + : '-and => [ \[...], \%rest_of_conditions ] instead' + ); }, SCALAR => sub { # permissively interpreted as SQL - puke "-$op => 'scalar' not supported, use -nest => \\'scalar'"; + puke "-$op => \$value makes little sense, use -bool => \$value instead"; }, UNDEF => sub { @@ -567,14 +706,14 @@ sub _where_op_ANDOR { } sub _where_op_NEST { - my ($self, $op, $v) = @_; + my ($self, $op, $v) = @_; $self->_SWITCH_refkind($v, { SCALAR => sub { # permissively interpreted as SQL belch "literal SQL should be -nest => \\'scalar' " . "instead of -nest => 'scalar' "; - return ($v); + return ($v); }, UNDEF => sub { @@ -590,34 +729,78 @@ sub _where_op_NEST { sub _where_op_BOOL { - my ($self, $op, $v) = @_; + my ($self, $op, $v) = @_; - my ( $prefix, $suffix ) = ( $op =~ /\bnot\b/i ) - ? ( '(NOT ', ')' ) - : ( '', '' ); + my ($s, @b) = $self->_SWITCH_refkind($v, { + SCALAR => sub { # interpreted as SQL column + $self->_convert($self->_quote($v)); + }, - my ($sql, @bind) = do { - $self->_SWITCH_refkind($v, { - SCALAR => sub { # interpreted as SQL column - $self->_convert($self->_quote($v)); - }, + UNDEF => sub { + puke "-$op => undef not supported"; + }, - UNDEF => sub { - puke "-$op => undef not supported"; - }, + FALLBACK => sub { + $self->_recurse_where ($v); + }, + }); - FALLBACK => sub { - $self->_recurse_where ($v); - }, - }); - }; + $s = "(NOT $s)" if $op =~ /^not/i; + ($s, @b); +} - return ( - join ('', $prefix, $sql, $suffix), - @bind, - ); + +sub _where_op_IDENT { + my $self = shift; + my ($op, $rhs) = splice @_, -2; + if (! defined $rhs or length ref $rhs) { + puke "-$op requires a single plain scalar argument (a quotable identifier)"; + } + + # in case we are called as a top level special op (no '=') + my $lhs = shift; + + $_ = $self->_convert($self->_quote($_)) for ($lhs, $rhs); + + return $lhs + ? "$lhs = $rhs" + : $rhs + ; } +sub _where_op_VALUE { + my $self = shift; + my ($op, $rhs) = splice @_, -2; + + # in case we are called as a top level special op (no '=') + my $lhs = shift; + + # special-case NULL + if (! defined $rhs) { + return $lhs + ? $self->_convert($self->_quote($lhs)) . ' IS NULL' + : undef + ; + } + + my @bind = + $self->_bindtype ( + ($lhs || $self->{_nested_func_lhs}), + $rhs, + ) + ; + + return $lhs + ? ( + $self->_convert($self->_quote($lhs)) . ' = ' . $self->_convert('?'), + @bind + ) + : ( + $self->_convert('?'), + @bind, + ) + ; +} sub _where_hashpair_ARRAYREF { my ($self, $k, $v) = @_; @@ -642,9 +825,8 @@ sub _where_hashpair_ARRAYREF { my $logic = $op ? substr($op, 1) : ''; return $self->_recurse_where(\@distributed, $logic); - } + } else { - # LDNOTE : not sure of this one. What does "distribute over nothing" mean? $self->_debug("empty ARRAY($k) means 0=1"); return ($self->{sqlfalse}); } @@ -663,9 +845,24 @@ sub _where_hashpair_HASHREF { # put the operator in canonical form my $op = $orig_op; - $op =~ s/^-//; # remove initial dash - $op =~ s/[_\t ]+/ /g; # underscores and whitespace become single spaces + + # FIXME - we need to phase out dash-less ops + $op =~ s/^-//; # remove possible initial dash $op =~ s/^\s+|\s+$//g;# remove leading/trailing space + $op =~ s/\s+/ /g; # compress whitespace + + $self->_assert_pass_injection_guard($op); + + # fixup is_not + $op =~ s/^is_not/IS NOT/i; + + # so that -not_foo works correctly + $op =~ s/^not_/NOT /i; + + # another retarded special case: foo => { $op => { -value => undef } } + if (ref $val eq 'HASH' and keys %$val == 1 and exists $val->{-value} and ! defined $val->{-value} ) { + $val = undef; + } my ($sql, @bind); @@ -706,24 +903,28 @@ sub _where_hashpair_HASHREF { }, UNDEF => sub { # CASE: col => {op => undef} : sql "IS (NOT)? NULL" - my $is = ($op =~ $self->{equality_op}) ? 'is' : - ($op =~ $self->{inequality_op}) ? 'is not' : - puke "unexpected operator '$orig_op' with undef operand"; + my $is = + $op =~ /^not$/i ? 'is not' # legacy + : $op =~ $self->{equality_op} ? 'is' + : $op =~ $self->{like_op} ? belch("Supplying an undefined argument to '@{[ uc $op]}' is deprecated") && 'is' + : $op =~ $self->{inequality_op} ? 'is not' + : $op =~ $self->{not_like_op} ? belch("Supplying an undefined argument to '@{[ uc $op]}' is deprecated") && 'is not' + : puke "unexpected operator '$orig_op' with undef operand"; + $sql = $self->_quote($k) . $self->_sqlcase(" $is null"); }, FALLBACK => sub { # CASE: col => {op/func => $stuff} - # if we are starting to nest and the first func is not a cmp op - # assume equality - my $prefix; - unless ($self->{_nested_func_lhs}) { - $self->{_nested_func_lhs} = $k; - $prefix = $self->{cmp} unless $op =~ $self->{cmp_ops}; - } + # retain for proper column type bind + $self->{_nested_func_lhs} ||= $k; + + ($sql, @bind) = $self->_where_unary_op ($op, $val); - ($sql, @bind) = $self->_where_func_generic ($op, $val); - $sql = join ' ', $self->_convert($self->_quote($k)), $prefix||(), $sql; + $sql = join (' ', + $self->_convert($self->_quote($k)), + $self->{_nested_func_lhs} eq $k ? $sql : "($sql)", # top level vs nested + ); }, }); } @@ -734,7 +935,22 @@ sub _where_hashpair_HASHREF { return ($all_sql, @all_bind); } +sub _where_field_IS { + my ($self, $k, $op, $v) = @_; + + my ($s) = $self->_SWITCH_refkind($v, { + UNDEF => sub { + join ' ', + $self->_convert($self->_quote($k)), + map { $self->_sqlcase($_)} ($op, 'null') + }, + FALLBACK => sub { + puke "$op can only take undef as argument"; + }, + }); + $s; +} sub _where_field_op_ARRAYREF { my ($self, $k, $op, $vals) = @_; @@ -754,27 +970,35 @@ sub _where_field_op_ARRAYREF { shift @vals; } + # a long standing API wart - an attempt to change this behavior during + # the 1.50 series failed *spectacularly*. Warn instead and leave the + # behavior as is + if ( + @vals > 1 + and + (!$logic or $logic eq 'OR') + and + ( $op =~ $self->{inequality_op} or $op =~ $self->{not_like_op} ) + ) { + my $o = uc($op); + belch "A multi-element arrayref as an argument to the inequality op '$o' " + . 'is technically equivalent to an always-true 1=1 (you probably wanted ' + . "to say ...{ \$inequality_op => [ -and => \@values ] }... instead)" + ; + } + # distribute $op over each remaining member of @vals, append logic if exists return $self->_recurse_where([map { {$k => {$op, $_}} } @vals], $logic); - # LDNOTE : had planned to change the distribution logic when - # $op =~ $self->{inequality_op}, because of Morgan laws : - # with {field => {'!=' => [22, 33]}}, it would be ridiculous to generate - # WHERE field != 22 OR field != 33 : the user probably means - # WHERE field != 22 AND field != 33. - # To do this, replace the above to roughly : - # my $logic = ($op =~ $self->{inequality_op}) ? 'AND' : 'OR'; - # return $self->_recurse_where([map { {$k => {$op, $_}} } @vals], $logic); - - } + } else { - # try to DWIM on equality operators - # LDNOTE : not 100% sure this is the correct thing to do ... - return ($self->{sqlfalse}) if $op =~ $self->{equality_op}; - return ($self->{sqltrue}) if $op =~ $self->{inequality_op}; - - # otherwise - puke "operator '$op' applied on an empty array (field '$k')"; + # try to DWIM on equality operators + return + $op =~ $self->{equality_op} ? $self->{sqlfalse} + : $op =~ $self->{like_op} ? belch("Supplying an empty arrayref to '@{[ uc $op]}' is deprecated") && $self->{sqlfalse} + : $op =~ $self->{inequality_op} ? $self->{sqltrue} + : $op =~ $self->{not_like_op} ? belch("Supplying an empty arrayref to '@{[ uc $op]}' is deprecated") && $self->{sqltrue} + : puke "operator '$op' applied on an empty array (field '$k')"; } } @@ -790,7 +1014,7 @@ sub _where_hashpair_SCALARREF { sub _where_hashpair_ARRAYREFREF { my ($self, $k, $v) = @_; $self->_debug("REF($k) means literal SQL: @${$v}"); - my ($sql, @bind) = @${$v}; + my ($sql, @bind) = @$$v; $self->_assert_bindval_matches_bindtype(@bind); $sql = $self->_quote($k) . " " . $sql; return ($sql, @bind ); @@ -800,8 +1024,8 @@ sub _where_hashpair_ARRAYREFREF { sub _where_hashpair_SCALAR { my ($self, $k, $v) = @_; $self->_debug("NOREF($k) means simple key=val: $k $self->{cmp} $v"); - my $sql = join ' ', $self->_convert($self->_quote($k)), - $self->_sqlcase($self->{cmp}), + my $sql = join ' ', $self->_convert($self->_quote($k)), + $self->_sqlcase($self->{cmp}), $self->_convert('?'); my @bind = $self->_bindtype($k, $v); return ( $sql, @bind); @@ -858,29 +1082,43 @@ sub _where_field_BETWEEN { $placeholder = $self->_convert('?'); $op = $self->_sqlcase($op); + my $invalid_args = "Operator '$op' requires either an arrayref with two defined values or expressions, or a single literal scalarref/arrayref-ref"; + my ($clause, @bind) = $self->_SWITCH_refkind($vals, { ARRAYREFREF => sub { - return @$$vals; + my ($s, @b) = @$$vals; + $self->_assert_bindval_matches_bindtype(@b); + ($s, @b); }, SCALARREF => sub { return $$vals; }, ARRAYREF => sub { - puke "special op 'between' accepts an arrayref with exactly two values" - if @$vals != 2; + puke $invalid_args if @$vals != 2; my (@all_sql, @all_bind); foreach my $val (@$vals) { my ($sql, @bind) = $self->_SWITCH_refkind($val, { SCALAR => sub { - return ($placeholder, ($val)); + return ($placeholder, $self->_bindtype($k, $val) ); }, SCALARREF => sub { - return ($self->_convert($$val), ()); + return $$val; }, ARRAYREFREF => sub { my ($sql, @bind) = @$$val; - return ($self->_convert($sql), @bind); + $self->_assert_bindval_matches_bindtype(@bind); + return ($sql, @bind); + }, + HASHREF => sub { + my ($func, $arg, @rest) = %$val; + puke ("Only simple { -func => arg } functions accepted as sub-arguments to BETWEEN") + if (@rest or $func !~ /^ \- (.+)/x); + local $self->{_nested_func_lhs} = $k; + $self->_where_unary_op ($1 => $arg); + }, + FALLBACK => sub { + puke $invalid_args, }, }); push @all_sql, $sql; @@ -889,11 +1127,11 @@ sub _where_field_BETWEEN { return ( (join $and, @all_sql), - $self->_bindtype($k, @all_bind), + @all_bind ); }, FALLBACK => sub { - puke "special op 'between' accepts an arrayref with two values, or a single literal scalarref/arrayref-ref"; + puke $invalid_args, }, }); @@ -915,11 +1153,49 @@ sub _where_field_IN { my ($sql, @bind) = $self->_SWITCH_refkind($vals, { ARRAYREF => sub { # list of choices if (@$vals) { # nonempty list - my $placeholders = join ", ", (($placeholder) x @$vals); - my $sql = "$label $op ( $placeholders )"; - my @bind = $self->_bindtype($k, @$vals); + my (@all_sql, @all_bind); + + for my $val (@$vals) { + my ($sql, @bind) = $self->_SWITCH_refkind($val, { + SCALAR => sub { + return ($placeholder, $val); + }, + SCALARREF => sub { + return $$val; + }, + ARRAYREFREF => sub { + my ($sql, @bind) = @$$val; + $self->_assert_bindval_matches_bindtype(@bind); + return ($sql, @bind); + }, + HASHREF => sub { + my ($func, $arg, @rest) = %$val; + puke ("Only simple { -func => arg } functions accepted as sub-arguments to IN") + if (@rest or $func !~ /^ \- (.+)/x); + local $self->{_nested_func_lhs} = $k; + $self->_where_unary_op ($1 => $arg); + }, + UNDEF => sub { + puke( + 'SQL::Abstract before v1.75 used to generate incorrect SQL when the ' + . "-$op operator was given an undef-containing list: !!!AUDIT YOUR CODE " + . 'AND DATA!!! (the upcoming Data::Query-based version of SQL::Abstract ' + . 'will emit the logically correct SQL instead of raising this exception)' + ); + }, + }); + push @all_sql, $sql; + push @all_bind, @bind; + } - return ($sql, @bind); + return ( + sprintf ('%s %s ( %s )', + $label, + $op, + join (', ', @all_sql) + ), + $self->_bindtype($k, @all_bind), + ); } else { # empty list : some databases won't understand "IN ()", so DWIM my $sql = ($op =~ /\bnot\b/i) ? $self->{sqltrue} : $self->{sqlfalse}; @@ -938,8 +1214,12 @@ sub _where_field_IN { return ("$label $op ( $sql )", @bind); }, + UNDEF => sub { + puke "Argument passed to the '$op' operator can not be undefined"; + }, + FALLBACK => sub { - puke "special op 'in' requires an arrayref (or scalarref/arrayref-ref)"; + puke "special op $op requires an arrayref (or scalarref/arrayref-ref)"; }, }); @@ -991,7 +1271,11 @@ sub _order_by_chunks { map { $self->_order_by_chunks ($_ ) } @$arg; }, - ARRAYREFREF => sub { [ @$$arg ] }, + ARRAYREFREF => sub { + my ($s, @b) = @$$arg; + $self->_assert_bindval_matches_bindtype(@b); + [ $s, @b ]; + }, SCALAR => sub {$self->_quote($arg)}, @@ -1001,11 +1285,11 @@ sub _order_by_chunks { HASHREF => sub { # get first pair in hash - my ($key, $val) = each %$arg; + my ($key, $val, @rest) = %$arg; return () unless $key; - if ( (keys %$arg) > 1 or not $key =~ /^-(desc|asc)/i ) { + if ( @rest or not $key =~ /^-(desc|asc)/i ) { puke "hash passed to _order_by must have exactly one key (-desc or -asc)"; } @@ -1046,7 +1330,6 @@ sub _table { ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$from;}, SCALAR => sub {$self->_quote($from)}, SCALARREF => sub {$$from}, - ARRAYREFREF => sub {join ', ', @$from;}, }); } @@ -1055,80 +1338,67 @@ sub _table { # UTILITY FUNCTIONS #====================================================================== +# highly optimized, as it's called way too often sub _quote { - my $self = shift; - my $label = shift; + # my ($self, $label) = @_; - $label or puke "can't quote an empty label"; + return '' unless defined $_[1]; + return ${$_[1]} if ref($_[1]) eq 'SCALAR'; - # left and right quote characters - my ($ql, $qr, @other) = $self->_SWITCH_refkind($self->{quote_char}, { - SCALAR => sub {($self->{quote_char}, $self->{quote_char})}, - ARRAYREF => sub {@{$self->{quote_char}}}, - UNDEF => sub {()}, - }); - not @other - or puke "quote_char must be an arrayref of 2 values"; - - # no quoting if no quoting chars - $ql or return $label; - - # no quoting for literal SQL - return $$label if ref($label) eq 'SCALAR'; - - # separate table / column (if applicable) - my $sep = $self->{name_sep} || ''; - my @to_quote = $sep ? split /\Q$sep\E/, $label : ($label); + unless ($_[0]->{quote_char}) { + $_[0]->_assert_pass_injection_guard($_[1]); + return $_[1]; + } - # do the quoting, except for "*" or for `table`.* - my @quoted = map { $_ eq '*' ? $_: $ql.$_.$qr} @to_quote; + my $qref = ref $_[0]->{quote_char}; + my ($l, $r); + if (!$qref) { + ($l, $r) = ( $_[0]->{quote_char}, $_[0]->{quote_char} ); + } + elsif ($qref eq 'ARRAY') { + ($l, $r) = @{$_[0]->{quote_char}}; + } + else { + puke "Unsupported quote_char format: $_[0]->{quote_char}"; + } + my $esc = $_[0]->{escape_char} || $r; - # reassemble and return. - return join $sep, @quoted; + # parts containing * are naturally unquoted + return join( $_[0]->{name_sep}||'', map + { $_ eq '*' ? $_ : do { (my $n = $_) =~ s/(\Q$esc\E|\Q$r\E)/$esc$1/g; $l . $n . $r } } + ( $_[0]->{name_sep} ? split (/\Q$_[0]->{name_sep}\E/, $_[1] ) : $_[1] ) + ); } # Conversion, if applicable sub _convert ($) { - my ($self, $arg) = @_; - -# LDNOTE : modified the previous implementation below because -# it was not consistent : the first "return" is always an array, -# the second "return" is context-dependent. Anyway, _convert -# seems always used with just a single argument, so make it a -# scalar function. -# return @_ unless $self->{convert}; -# my $conv = $self->_sqlcase($self->{convert}); -# my @ret = map { $conv.'('.$_.')' } @_; -# return wantarray ? @ret : $ret[0]; - if ($self->{convert}) { - my $conv = $self->_sqlcase($self->{convert}); - $arg = $conv.'('.$arg.')'; + #my ($self, $arg) = @_; + if ($_[0]->{convert}) { + return $_[0]->_sqlcase($_[0]->{convert}) .'(' . $_[1] . ')'; } - return $arg; + return $_[1]; } # And bindtype sub _bindtype (@) { - my $self = shift; - my($col, @vals) = @_; - - #LDNOTE : changed original implementation below because it did not make - # sense when bindtype eq 'columns' and @vals > 1. -# return $self->{bindtype} eq 'columns' ? [ $col, @vals ] : @vals; - - return $self->{bindtype} eq 'columns' ? map {[$col, $_]} @vals : @vals; + #my ($self, $col, @vals) = @_; + # called often - tighten code + return $_[0]->{bindtype} eq 'columns' + ? map {[$_[1], $_]} @_[2 .. $#_] + : @_[2 .. $#_] + ; } # Dies if any element of @bind is not in [colname => value] format # if bindtype is 'columns'. sub _assert_bindval_matches_bindtype { - my ($self, @bind) = @_; - +# my ($self, @bind) = @_; + my $self = shift; if ($self->{bindtype} eq 'columns') { - foreach my $val (@bind) { - if (!defined $val || ref($val) ne 'ARRAY' || @$val != 2) { - die "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" + for (@_) { + if (!defined $_ || ref($_) ne 'ARRAY' || @$_ != 2) { + puke "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" } } } @@ -1153,11 +1423,9 @@ sub _join_sql_clauses { # Fix SQL case, if so requested sub _sqlcase { - my $self = shift; - # LDNOTE: if $self->{case} is true, then it contains 'lower', so we # don't touch the argument ... crooked logic, but let's not change it! - return $self->{case} ? $_[0] : uc($_[0]); + return $_[0]->{case} ? $_[1] : uc($_[1]); } @@ -1167,38 +1435,37 @@ sub _sqlcase { sub _refkind { my ($self, $data) = @_; - my $suffix = ''; - my $ref; - my $n_steps = 0; - while (1) { - # blessed objects are treated like scalars - $ref = (Scalar::Util::blessed $data) ? '' : ref $data; - $n_steps += 1 if $ref; - last if $ref ne 'REF'; - $data = $$data; - } + return 'UNDEF' unless defined $data; - my $base = $ref || (defined $data ? 'SCALAR' : 'UNDEF'); + # blessed objects are treated like scalars + my $ref = (Scalar::Util::blessed $data) ? '' : ref $data; - return $base . ('REF' x $n_steps); -} + return 'SCALAR' unless $ref; + my $n_steps = 1; + while ($ref eq 'REF') { + $data = $$data; + $ref = (Scalar::Util::blessed $data) ? '' : ref $data; + $n_steps++ if $ref; + } + return ($ref||'SCALAR') . ('REF' x $n_steps); +} sub _try_refkind { my ($self, $data) = @_; my @try = ($self->_refkind($data)); push @try, 'SCALAR_or_UNDEF' if $try[0] eq 'SCALAR' || $try[0] eq 'UNDEF'; push @try, 'FALLBACK'; - return @try; + return \@try; } sub _METHOD_FOR_refkind { my ($self, $meth_prefix, $data) = @_; my $method; - for ($self->_try_refkind($data)) { + for (@{$self->_try_refkind($data)}) { $method = $self->can($meth_prefix."_".$_) and last; } @@ -1211,7 +1478,7 @@ sub _SWITCH_refkind { my ($self, $data, $dispatch_table) = @_; my $coderef; - for ($self->_try_refkind($data)) { + for (@{$self->_try_refkind($data)}) { $coderef = $dispatch_table->{$_} and last; } @@ -1243,7 +1510,7 @@ sub values { foreach my $k ( sort keys %$data ) { my $v = $data->{$k}; $self->_SWITCH_refkind($v, { - ARRAYREF => sub { + ARRAYREF => sub { if ($self->{array_datatypes}) { # array datatype push @all_bind, $self->_bindtype($k, $v); } @@ -1290,7 +1557,7 @@ sub generate { } elsif ($r eq 'SCALAR') { # literal SQL without bind push @sqlq, "$label = $$v"; - } else { + } else { push @sqlq, "$label = ?"; push @sqlv, $self->_bindtype($k, $v); } @@ -1308,7 +1575,7 @@ sub generate { } elsif ($r eq 'SCALAR') { # literal SQL without bind # embedded literal SQL push @sqlq, $$v; - } else { + } else { push @sqlq, '?'; push @sqlv, $v; } @@ -1363,7 +1630,7 @@ SQL::Abstract - Generate SQL from Perl data structures my $sql = SQL::Abstract->new; - my($stmt, @bind) = $sql->select($table, \@fields, \%where, \@order); + my($stmt, @bind) = $sql->select($source, \@fields, \%where, \@order); my($stmt, @bind) = $sql->insert($table, \%fieldvals || \@values); @@ -1431,14 +1698,14 @@ These are then used directly in your DBI code: If your database has array types (like for example Postgres), activate the special option C<< array_datatypes => 1 >> -when creating the C object. +when creating the C object. Then you may use an arrayref to insert and update database array types: my $sql = SQL::Abstract->new(array_datatypes => 1); my %data = ( planets => [qw/Mercury Venus Earth Mars/] ); - + my($stmt, @bind) = $sql->insert('solar_system', \%data); This results in: @@ -1457,8 +1724,8 @@ say something like this: my %data = ( name => 'Bill', - date_entered => \["to_date(?,'MM/DD/YYYY')", "03/02/2003"], - ); + date_entered => \[ "to_date(?,'MM/DD/YYYY')", "03/02/2003" ], + ); The first value in the array is the actual SQL. Any other values are optional and would be included in the bind values array. This gives @@ -1466,7 +1733,7 @@ you: my($stmt, @bind) = $sql->insert('people', \%data); - $stmt = "INSERT INTO people (name, date_entered) + $stmt = "INSERT INTO people (name, date_entered) VALUES (?, to_date(?,'MM/DD/YYYY'))"; @bind = ('Bill', '03/02/2003'); @@ -1509,16 +1776,13 @@ Which you could then use in DBI code like so: Easy, eh? -=head1 FUNCTIONS +=head1 METHODS -The functions are simple. There's one for each major SQL operation, +The methods are simple. There's one for each major SQL operation, and a constructor you use first. The arguments are specified in a -similar order to each function (table, then fields, then a where +similar order to each method (table, then fields, then a where clause) to try and simplify things. - - - =head2 new(option => 'value') The C function takes a list of options and values, and returns @@ -1552,7 +1816,7 @@ C to C you would get SQL such as: WHERE name like 'nwiger' AND email like 'nate@wiger.org' -You can also override the comparsion on an individual basis - see +You can also override the comparison on an individual basis - see the huge section on L at the bottom. =item sqltrue, sqlfalse @@ -1570,8 +1834,8 @@ for arrays, and "and" for hashes. This means that a WHERE array of the form: @where = ( - event_date => {'>=', '2/13/99'}, - event_date => {'<=', '4/24/03'}, + event_date => {'>=', '2/13/99'}, + event_date => {'<=', '4/24/03'}, ); will generate SQL like this: @@ -1590,7 +1854,7 @@ Which will change the above C to: The logic can also be changed locally by inserting a modifier in front of an arrayref : - @where = (-and => [event_date => {'>=', '2/13/99'}, + @where = (-and => [event_date => {'>=', '2/13/99'}, event_date => {'<=', '4/24/03'} ]); See the L section for explanations. @@ -1661,14 +1925,14 @@ are or are not included. You could wrap that above C loop in a simple sub called C or something and reuse it repeatedly. You still get a layer of abstraction over manual SQL specification. -Note that if you set L to C, the C<\[$sql, @bind]> +Note that if you set L to C, the C<\[ $sql, @bind ]> construct (see L) will expect the bind values in this format. =item quote_char This is the character that a table or column name will be quoted -with. By default this is an empty string, but you could set it to +with. By default this is an empty string, but you could set it to the character C<`>, to generate SQL like this: SELECT `a_field` FROM `a_table` WHERE `some_field` LIKE '%someval%' @@ -1680,9 +1944,24 @@ that generates SQL like this: SELECT [a_field] FROM [a_table] WHERE [some_field] LIKE '%someval%' -Quoting is useful if you have tables or columns names that are reserved +Quoting is useful if you have tables or columns names that are reserved words in your database's SQL dialect. +=item escape_char + +This is the character that will be used to escape Ls appearing +in an identifier before it has been quoted. + +The paramter default in case of a single L character is the quote +character itself. + +When opening-closing-style quoting is used (L is an arrayref) +this parameter defaults to the B L. Occurences +of the B L within the identifier are currently left +untouched. The default for opening-closing-style quotes may change in future +versions, thus you are B to specify the escape character +explicitly. + =item name_sep This is the character that separates a table and column name. It is @@ -1691,10 +1970,24 @@ so that tables and column names can be individually quoted like this: SELECT `table`.`one_field` FROM `table` WHERE `table`.`other_field` = 1 +=item injection_guard + +A regular expression C that is applied to any C<-function> and unquoted +column name specified in a query structure. This is a safety mechanism to avoid +injection attacks when mishandling user input e.g.: + + my %condition_as_column_value_pairs = get_values_from_user(); + $sqla->select( ... , \%condition_as_column_value_pairs ); + +If the expression matches an exception is thrown. Note that literal SQL +supplied via C<\'...'> or C<\['...']> is B checked in any way. + +Defaults to checking for C<;> and the C keyword (TransactSQL) + =item array_datatypes -When this option is true, arrayrefs in INSERT or UPDATE are -interpreted as array datatypes and are passed directly +When this option is true, arrayrefs in INSERT or UPDATE are +interpreted as array datatypes and are passed directly to the DBI layer. When this option is false, arrayrefs are interpreted as literal SQL, just like refs to arrayrefs @@ -1705,13 +1998,13 @@ for literal SQL). =item special_ops -Takes a reference to a list of "special operators" +Takes a reference to a list of "special operators" to extend the syntax understood by L. See section L for details. =item unary_ops -Takes a reference to a list of "unary operators" +Takes a reference to a list of "unary operators" to extend the syntax understood by L. See section L for details. @@ -1756,14 +2049,14 @@ with those data types. =head2 select($source, $fields, $where, $order) -This returns a SQL SELECT statement and associated list of bind values, as +This returns a SQL SELECT statement and associated list of bind values, as specified by the arguments : =over =item $source -Specification of the 'FROM' part of the statement. +Specification of the 'FROM' part of the statement. The argument can be either a plain scalar (interpreted as a table name, will be quoted), or an arrayref (interpreted as a list of table names, joined by commas, quoted), or a scalarref @@ -1772,25 +2065,25 @@ of table names, joined by commas, quoted), or a scalarref =item $fields -Specification of the list of fields to retrieve from +Specification of the list of fields to retrieve from the source. The argument can be either an arrayref (interpreted as a list -of field names, will be joined by commas and quoted), or a +of field names, will be joined by commas and quoted), or a plain scalar (literal SQL, not quoted). -Please observe that this API is not as flexible as for -the first argument C<$table>, for backwards compatibility reasons. +Please observe that this API is not as flexible as that of +the first argument C<$source>, for backwards compatibility reasons. =item $where Optional argument to specify the WHERE part of the query. The argument is most often a hashref, but can also be -an arrayref or plain scalar -- +an arrayref or plain scalar -- see section L for details. =item $order Optional argument to specify the ORDER BY part of the query. -The argument can be a scalar, a hashref or an arrayref +The argument can be a scalar, a hashref or an arrayref -- see section L for details. @@ -1851,8 +2144,87 @@ Might give you: You get the idea. Strings get their case twiddled, but everything else remains verbatim. +=head1 EXPORTABLE FUNCTIONS + +=head2 is_plain_value + +Determines if the supplied argument is a plain value as understood by this +module: + +=over + +=item * The value is C + +=item * The value is a non-reference + +=item * The value is an object with stringification overloading + +=item * The value is of the form C<< { -value => $anything } >> + +=back + +On failure returns C, on sucess returns a B reference +to the original supplied argument. + +=over + +=item * Note +The stringification overloading detection is rather advanced: it takes +into consideration not only the presence of a C<""> overload, but if that +fails also checks for enabled +L|overload/Magic Autogeneration>, based +on either C<0+> or C. +Unfortunately testing in the field indicates that this +detection B<< may tickle a latent bug in perl versions before 5.018 >>, +but only when very large numbers of stringifying objects are involved. +At the time of writing ( Sep 2014 ) there is no clear explanation of +the direct cause, nor is there a manageably small test case that reliably +reproduces the problem. + +If you encounter any of the following exceptions in B - this module may be to blame: + + Operation "ne": no method found, + left argument in overloaded package , + right argument in overloaded package + +or perhaps even + + Stub found while resolving method "???" overloading """" in package + +If you fall victim to the above - please attempt to reduce the problem +to something that could be sent to the L +(either publicly or privately). As a workaround in the meantime you can +set C<$ENV{SQLA_ISVALUE_IGNORE_AUTOGENERATED_STRINGIFICATION}> to a true +value, which will most likely eliminate your problem (at the expense of +not being able to properly detect exotic forms of stringification). + +This notice and environment variable will be removed in a future version, +as soon as the underlying problem is found and a reliable workaround is +devised. + +=back + +=head2 is_literal_value + +Determines if the supplied argument is a literal value as understood by this +module: + +=over + +=item * C<\$sql_string> + +=item * C<\[ $sql_string, @bind_values ]> + +=item * C<< { -ident => $plain_defined_string } >> + +=back + +On failure returns C, on sucess returns an B reference +containing the unpacked version of the supplied literal SQL and bind values. =head1 WHERE CLAUSES @@ -1895,13 +2267,34 @@ an arrayref: ); This simple code will create the following: - + $stmt = "WHERE user = ? AND ( status = ? OR status = ? OR status = ? )"; @bind = ('nwiger', 'assigned', 'in-progress', 'pending'); -A field associated to an empty arrayref will be considered a +A field associated to an empty arrayref will be considered a logical false and will generate 0=1. +=head2 Tests for NULL values + +If the value part is C then this is converted to SQL + + my %where = ( + user => 'nwiger', + status => undef, + ); + +becomes: + + $stmt = "WHERE user = ? AND status IS NULL"; + @bind = ('nwiger'); + +To test if a column IS NOT NULL: + + my %where = ( + user => 'nwiger', + status => { '!=', undef }, + ); + =head2 Specific comparison operators If you want to specify a different type of operator for your comparison, @@ -1948,13 +2341,13 @@ To get an OR instead, you can combine it with the arrayref idea: my %where => ( user => 'nwiger', - priority => [ {'=', 2}, {'!=', 1} ] + priority => [ { '=', 2 }, { '>', 5 } ] ); Which would generate: - $stmt = "WHERE user = ? AND priority = ? OR priority != ?"; - @bind = ('nwiger', '2', '1'); + $stmt = "WHERE ( priority = ? OR priority > ? ) AND user = ?"; + @bind = ('2', '5', 'nwiger'); If you want to include literal SQL (with or without bind values), just use a scalar reference or array reference as the value: @@ -1985,7 +2378,7 @@ Because, in Perl you I do this: As the second C key will obliterate the first. The solution is to use the special C<-modifier> form inside an arrayref: - priority => [ -and => {'!=', 2}, + priority => [ -and => {'!=', 2}, {'!=', 1} ] @@ -2026,7 +2419,7 @@ Which would generate: $stmt = "WHERE status = ? AND reportid IN (?,?,?)"; @bind = ('completed', '567', '2335', '2'); -The reverse operator C<-not_in> generates SQL C and is used in +The reverse operator C<-not_in> generates SQL C and is used in the same way. If the argument to C<-in> is an empty array, 'sqlfalse' is generated @@ -2052,7 +2445,8 @@ would generate: )"; @bind = ('2000'); - +Finally, if the argument to C<-in> is not a reference, it will be +treated as a single-element array. Another pair of operators is C<-between> and C<-not_between>, used with an arrayref of two values: @@ -2075,7 +2469,7 @@ are possible: start0 => { -between => [ 1, 2 ] }, start1 => { -between => \["? AND ?", 1, 2] }, start2 => { -between => \"lower(x) AND upper(y)" }, - start3 => { -between => [ + start3 => { -between => [ \"lower(x)", \["upper(?)", 'stuff' ], ] }, @@ -2092,7 +2486,7 @@ Would give you: @bind = (1, 2, 1, 2, 'stuff'); -These are the two builtin "special operators"; but the +These are the two builtin "special operators"; but the list can be expanded : see section L below. =head2 Unary operators: bool @@ -2100,7 +2494,7 @@ list can be expanded : see section L below. If you wish to test against boolean columns or functions within your database you can use the C<-bool> and C<-not_bool> operators. For example to test the column C being true and the column - being false you would use:- +C being false you would use:- my %where = ( -bool => 'is_user', @@ -2117,15 +2511,19 @@ then you should use the and/or operators:- my %where = ( -and => [ -bool => 'one', - -bool => 'two', - -bool => 'three', - -not_bool => 'four', + -not_bool => { two=> { -rlike => 'bar' } }, + -not_bool => { three => [ { '=', 2 }, { '>', 5 } ] }, ], ); Would give you: - WHERE one AND two AND three AND NOT four + WHERE + one + AND + (NOT two RLIKE ?) + AND + (NOT ( three = ? OR three > ? )) =head2 Nested conditions, -and/-or prefixes @@ -2152,43 +2550,27 @@ This data structure would create the following: @bind = ('nwiger', 'pending', 'dispatched', 'robot', 'unassigned'); -There is also a special C<-nest> -operator which adds an additional set of parens, to create a subquery. -For example, to get something like this: - - $stmt = "WHERE user = ? AND ( workhrs > ? OR geo = ? )"; - @bind = ('nwiger', '20', 'ASIA'); - -You would do: - - my %where = ( - user => 'nwiger', - -nest => [ workhrs => {'>', 20}, geo => 'ASIA' ], - ); - - -Finally, clauses in hashrefs or arrayrefs can be -prefixed with an C<-and> or C<-or> to change the logic -inside : +Clauses in hashrefs or arrayrefs can be prefixed with an C<-and> or C<-or> +to change the logic inside : my @where = ( -and => [ user => 'nwiger', - -nest => [ - -and => [workhrs => {'>', 20}, geo => 'ASIA' ], - -and => [workhrs => {'<', 50}, geo => 'EURO' ] + [ + -and => [ workhrs => {'>', 20}, geo => 'ASIA' ], + -or => { workhrs => {'<', 50}, geo => 'EURO' }, ], ], ); That would yield: - WHERE ( user = ? AND - ( ( workhrs > ? AND geo = ? ) - OR ( workhrs < ? AND geo = ? ) ) ) + WHERE ( user = ? AND ( + ( workhrs > ? AND geo = ? ) + OR ( workhrs < ? OR geo = ? ) + ) ) - -=head2 Algebraic inconsistency, for historical reasons +=head3 Algebraic inconsistency, for historical reasons C: when connecting several conditions, the C<-and->|C<-or> operator goes C of the nested structure; whereas when connecting @@ -2203,68 +2585,103 @@ C the arrayref. Here is an example combining both features : yielding - WHERE ( ( ( a = ? AND b = ? ) - OR ( c = ? OR d = ? ) + WHERE ( ( ( a = ? AND b = ? ) + OR ( c = ? OR d = ? ) OR ( e LIKE ? AND e LIKE ? ) ) ) This difference in syntax is unfortunate but must be preserved for historical reasons. So be careful : the two examples below would seem algebraically equivalent, but they are not - {col => [-and => {-like => 'foo%'}, {-like => '%bar'}]} + {col => [-and => {-like => 'foo%'}, {-like => '%bar'}]} # yields : WHERE ( ( col LIKE ? AND col LIKE ? ) ) - [-and => {col => {-like => 'foo%'}, {col => {-like => '%bar'}}]] + [-and => {col => {-like => 'foo%'}, {col => {-like => '%bar'}}]] # yields : WHERE ( ( col LIKE ? OR col LIKE ? ) ) -=head2 Literal SQL +=head2 Literal SQL and value type operators + +The basic premise of SQL::Abstract is that in WHERE specifications the "left +side" is a column name and the "right side" is a value (normally rendered as +a placeholder). This holds true for both hashrefs and arrayref pairs as you +see in the L examples above. Sometimes it is necessary to +alter this behavior. There are several ways of doing so. -Finally, sometimes only literal SQL will do. If you want to include -literal SQL verbatim, you can specify it as a scalar reference, namely: +=head3 -ident + +This is a virtual operator that signals the string to its right side is an +identifier (a column name) and not a value. For example to compare two +columns you would write: - my $inn = 'is Not Null'; my %where = ( priority => { '<', 2 }, - requestor => \$inn + requestor => { -ident => 'submitter' }, ); -This would create: +which creates: - $stmt = "WHERE priority < ? AND requestor is Not Null"; + $stmt = "WHERE priority < ? AND requestor = submitter"; @bind = ('2'); -Note that in this example, you only get one bind parameter back, since -the verbatim SQL is passed as part of the statement. +If you are maintaining legacy code you may see a different construct as +described in L, please use C<-ident> in new +code. + +=head3 -value -Of course, just to prove a point, the above can also be accomplished -with this: +This is a virtual operator that signals that the construct to its right side +is a value to be passed to DBI. This is for example necessary when you want +to write a where clause against an array (for RDBMS that support such +datatypes). For example: my %where = ( - priority => { '<', 2 }, - requestor => { '!=', undef }, + array => { -value => [1, 2, 3] } ); +will result in: -TMTOWTDI + $stmt = 'WHERE array = ?'; + @bind = ([1, 2, 3]); -Conditions on boolean columns can be expressed in the same way, passing -a reference to an empty string, however using liternal SQL in this way -is deprecated - the preferred method is to use the boolean operators - -see L : +Note that if you were to simply say: my %where = ( - priority => { '<', 2 }, - is_ready => \""; + array => [1, 2, 3] ); -which yields +the result would probably not be what you wanted: - $stmt = "WHERE priority < ? AND is_ready"; - @bind = ('2'); + $stmt = 'WHERE array = ? OR array = ? OR array = ?'; + @bind = (1, 2, 3); + +=head3 Literal SQL + +Finally, sometimes only literal SQL will do. To include a random snippet +of SQL verbatim, you specify it as a scalar reference. Consider this only +as a last resort. Usually there is a better way. For example: + + my %where = ( + priority => { '<', 2 }, + requestor => { -in => \'(SELECT name FROM hitmen)' }, + ); + +Would create: + $stmt = "WHERE priority < ? AND requestor IN (SELECT name FROM hitmen)" + @bind = (2); -=head2 Literal SQL with placeholders and bind values (subqueries) +Note that in this example, you only get one bind parameter back, since +the verbatim SQL is passed as part of the statement. + +=head4 CAVEAT + + Never use untrusted input as a literal SQL argument - this is a massive + security risk (there is no way to check literal snippets for SQL + injections and other nastyness). If you need to deal with untrusted input + use literal SQL with placeholders as described next. + +=head3 Literal SQL with placeholders and bind values (subqueries) If the literal SQL to be inserted has placeholders and bind values, use a reference to an arrayref (yes this is a double reference -- @@ -2272,7 +2689,7 @@ not so common, but perfectly legal Perl). For example, to find a date in Postgres you can use something like this: my %where = ( - date_column => \[q/= date '2008-09-30' - ?::integer/, 10/] + date_column => \[ "= date '2008-09-30' - ?::integer", 10 ] ) This would create: @@ -2281,15 +2698,16 @@ This would create: @bind = ('10'); Note that you must pass the bind values in the same format as they are returned -by L. That means that if you set L to C, you must -provide the bind values in the C<< [ column_meta => value ] >> format, where -C is an opaque scalar value; most commonly the column name, but -you can use any scalar value (including references and blessed references), -L will simply pass it through intact. So if C is set -to C the above example will look like: +by L. This means that if you set L +to C, you must provide the bind values in the +C<< [ column_meta => value ] >> format, where C is an opaque +scalar value; most commonly the column name, but you can use any scalar value +(including references and blessed references), L will simply +pass it through intact. So if C is set to C the above +example will look like: my %where = ( - date_column => \[q/= date '2008-09-30' - ?::integer/, [ dummy => 10 ]/] + date_column => \[ "= date '2008-09-30' - ?::integer", [ {} => 10 ] ] ) Literal SQL is especially useful for nesting parenthesized clauses in the @@ -2304,17 +2722,17 @@ main SQL query. Here is a first example : This yields : - $stmt = "WHERE (foo = ? AND bar IN (SELECT c1 FROM t1 + $stmt = "WHERE (foo = ? AND bar IN (SELECT c1 FROM t1 WHERE c2 < ? AND c3 LIKE ?))"; @bind = (1234, 100, "foo%"); -Other subquery operators, like for example C<"E ALL"> or C<"NOT IN">, +Other subquery operators, like for example C<"E ALL"> or C<"NOT IN">, are expressed in the same way. Of course the C<$sub_stmt> and -its associated bind values can be generated through a former call +its associated bind values can be generated through a former call to C : my ($sub_stmt, @sub_bind) - = $sql->select("t1", "c1", {c2 => {"<" => 100}, + = $sql->select("t1", "c1", {c2 => {"<" => 100}, c3 => {-like => "foo%"}}); my %where = ( foo => 1234, @@ -2322,48 +2740,39 @@ to C : ); In the examples above, the subquery was used as an operator on a column; -but the same principle also applies for a clause within the main C<%where> +but the same principle also applies for a clause within the main C<%where> hash, like an EXISTS subquery : - my ($sub_stmt, @sub_bind) + my ($sub_stmt, @sub_bind) = $sql->select("t1", "*", {c1 => 1, c2 => \"> t0.c0"}); - my %where = ( + my %where = ( -and => [ foo => 1234, - -nest => \["EXISTS ($sub_stmt)" => @sub_bind], - ); + \["EXISTS ($sub_stmt)" => @sub_bind], + ]); which yields - $stmt = "WHERE (foo = ? AND EXISTS (SELECT * FROM t1 + $stmt = "WHERE (foo = ? AND EXISTS (SELECT * FROM t1 WHERE c1 = ? AND c2 > t0.c0))"; @bind = (1234, 1); -Observe that the condition on C in the subquery refers to -column C of the main query : this is I a bind -value, so we have to express it through a scalar ref. +Observe that the condition on C in the subquery refers to +column C of the main query : this is I a bind +value, so we have to express it through a scalar ref. Writing C<< c2 => {">" => "t0.c0"} >> would have generated C<< c2 > ? >> with bind value C<"t0.c0"> ... not exactly what we wanted here. -Another use of the subquery technique is when some SQL clauses need -parentheses, as it often occurs with some proprietary SQL extensions -like for example fulltext expressions, geospatial expressions, -NATIVE clauses, etc. Here is an example of a fulltext query in MySQL : - - my %where = ( - -nest => \["MATCH (col1, col2) AGAINST (?)" => qw/apples/] - ); - Finally, here is an example where a subquery is used for expressing unary negation: - my ($sub_stmt, @sub_bind) + my ($sub_stmt, @sub_bind) = $sql->where({age => [{"<" => 10}, {">" => 20}]}); $sub_stmt =~ s/^ where //i; # don't want "WHERE" in the subclause my %where = ( lname => {like => '%son%'}, - -nest => \["NOT ($sub_stmt)" => @sub_bind], + \["NOT ($sub_stmt)" => @sub_bind], ); This yields @@ -2371,7 +2780,47 @@ This yields $stmt = "lname LIKE ? AND NOT ( age < ? OR age > ? )" @bind = ('%son%', 10, 20) +=head3 Deprecated usage of Literal SQL +Below are some examples of archaic use of literal SQL. It is shown only as +reference for those who deal with legacy code. Each example has a much +better, cleaner and safer alternative that users should opt for in new code. + +=over + +=item * + + my %where = ( requestor => \'IS NOT NULL' ) + + $stmt = "WHERE requestor IS NOT NULL" + +This used to be the way of generating NULL comparisons, before the handling +of C got formalized. For new code please use the superior syntax as +described in L. + +=item * + + my %where = ( requestor => \'= submitter' ) + + $stmt = "WHERE requestor = submitter" + +This used to be the only way to compare columns. Use the superior L +method for all new code. For example an identifier declared in such a way +will be properly quoted if L is properly set, while the legacy +form will remain as supplied. + +=item * + + my %where = ( is_ready => \"", completed => { '>', '2012-12-21' } ) + + $stmt = "WHERE completed > ? AND is_ready" + @bind = ('2012-12-21') + +Using an empty string literal used to be the only way to express a boolean. +For all new code please use the much more readable +L<-bool|/Unary operators: bool> operator. + +=back =head2 Conclusion @@ -2388,12 +2837,9 @@ knew everything ahead of time, you wouldn't have to worry about dynamically-generating SQL and could just hardwire it into your script. - - - =head1 ORDER BY CLAUSES -Some functions take an order by clause. This can either be a scalar (just a +Some functions take an order by clause. This can either be a scalar (just a column name,) a hash of C<< { -desc => 'col' } >> or C<< { -asc => 'col' } >>, or an array of either of the two previous forms. Examples: @@ -2412,7 +2858,7 @@ or an array of either of the two previous forms. Examples: | ['colA', {-asc => 'colB'}] | ORDER BY colA, colB ASC | - { -asc => [qw/colA colB] } | ORDER BY colA ASC, colB ASC + { -asc => [qw/colA colB/] } | ORDER BY colA ASC, colB ASC | [ | { -asc => 'colA' }, | ORDER BY colA ASC, colB DESC, @@ -2439,9 +2885,9 @@ or an array of either of the two previous forms. Examples: }, ]); -A "special operator" is a SQL syntactic clause that can be +A "special operator" is a SQL syntactic clause that can be applied to a field, instead of a usual binary operator. -For example : +For example : WHERE field IN (?, ?, ?) WHERE field BETWEEN ? AND ? @@ -2485,13 +2931,13 @@ When supplied with a coderef, it is called as: =back -For example, here is an implementation +For example, here is an implementation of the MATCH .. AGAINST syntax for MySQL my $sqlmaker = SQL::Abstract->new(special_ops => [ - + # special op for MySql MATCH (field) AGAINST(word1, word2, ...) - {regex => qr/^match$/i, + {regex => qr/^match$/i, handler => sub { my ($self, $field, $op, $arg) = @_; $arg = [$arg] if not ref $arg; @@ -2504,7 +2950,7 @@ of the MATCH .. AGAINST syntax for MySQL return ($sql, @bind); } }, - + ]); @@ -2524,7 +2970,7 @@ of the MATCH .. AGAINST syntax for MySQL }, ]); -A "unary operator" is a SQL syntactic clause that can be +A "unary operator" is a SQL syntactic clause that can be applied to a field - the operator goes before the field You can write your own operator handlers - supply a C @@ -2586,6 +3032,12 @@ the same structure, you only have to generate the SQL the first time around. On subsequent queries, simply use the C function provided by this module to return your values in the correct order. +However this depends on the values having the same type - if, for +example, the values of a where clause may either have values +(resulting in sql of the form C with a single bind +value), or alternatively the values might be C (resulting in +sql of the form C with no bind value) then the +caching technique suggested will not work. =head1 FORMBUILDER @@ -2595,6 +3047,9 @@ can be as simple as the following: #!/usr/bin/perl + use warnings; + use strict; + use CGI::FormBuilder; use SQL::Abstract; @@ -2612,29 +3067,38 @@ query, but the point is that if you make your form look like your table, the actual query script can be extremely simplistic. If you're B lazy (I am), check out C for -a fast interface to returning and formatting data. I frequently +a fast interface to returning and formatting data. I frequently use these three modules together to write complex database query apps in under 50 lines. +=head1 REPO + +=over + +=item * gitweb: L + +=item * git: L + +=back =head1 CHANGES Version 1.50 was a major internal refactoring of C. Great care has been taken to preserve the I behavior documented in previous versions in the 1.* family; however, -some features that were previously undocumented, or behaved +some features that were previously undocumented, or behaved differently from the documentation, had to be changed in order to clarify the semantics. Hence, client code that was relying -on some dark areas of C v1.* +on some dark areas of C v1.* B in v1.50. The main changes are : =over -=item * +=item * -support for literal SQL through the C<< \ [$sql, bind] >> syntax. +support for literal SQL through the C<< \ [ $sql, @bind ] >> syntax. =item * @@ -2648,7 +3112,7 @@ support for the { operator => \["...", @bind] } construct (to embed literal SQL optional support for L -=item * +=item * defensive programming : check arguments @@ -2666,7 +3130,7 @@ as C<< "(cond1 AND cond2) OR (cond3 OR cond4)" >>. fixed semantics of _bindtype on array args -=item * +=item * dropped the C<_anoncopy> of the %where tree. No longer necessary, we just avoid shifting arrays within that tree. @@ -2677,15 +3141,13 @@ dropped the C<_modlogic> function =back - - =head1 ACKNOWLEDGEMENTS There are a number of individuals that have really helped out with this module. Unfortunately, most of them submitted bugs via CPAN so I have no idea who they are! But the people I do know are: - Ash Berlin (order_by hash term support) + Ash Berlin (order_by hash term support) Matt Trout (DBIx::Class support) Mark Stosberg (benchmarking) Chas Owens (initial "IN" operator support) @@ -2694,7 +3156,7 @@ so I have no idea who they are! But the people I do know are: Mike Fragassi (enhancements to "BETWEEN" and "LIKE") Dan Kubb (support for "quote_char" and "name_sep") Guillermo Roditi (patch to cleanup "IN" and "BETWEEN", fix and tests for _order_by) - Laurent Dami (internal refactoring, multiple -nest, extensible list of special operators, literal SQL) + Laurent Dami (internal refactoring, extensible list of special operators, literal SQL) Norbert Buchmuller (support for literal SQL in hashpair, misc. fixes & tests) Peter Rabbitson (rewrite of SQLA::Test, misc. fixes & tests) Oliver Charles (support for "RETURNING" after "INSERT")