X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract.pm;h=f8518deeeeb2be216383e8624102ecf18d7bb96c;hb=2af21cb852b6c6c629ce0eda1116fd366cea666b;hp=38634253de4bf7e721192efb5a0ebb2881b0f6ff;hpb=718d7732a02fd3bc3e20c0dda439d52b817b6188;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract.pm b/lib/SQL/Abstract.pm index 3863425..f8518de 100644 --- a/lib/SQL/Abstract.pm +++ b/lib/SQL/Abstract.pm @@ -1,21 +1,33 @@ package SQL::Abstract; # see doc at end of file -# LDNOTE : this code is heavy refactoring from original SQLA. -# Several design decisions will need discussion during -# the test / diffusion / acceptance phase; those are marked with flag -# 'LDNOTE' (note by laurent.dami AT free.fr) - -use Carp; use strict; use warnings; +use Carp (); use List::Util (); use Scalar::Util (); +use Exporter 'import'; +our @EXPORT_OK = qw(is_plain_value is_literal_value); + +BEGIN { + if ($] < 5.009_005) { + require MRO::Compat; + } + else { + require mro; + } + + *SQL::Abstract::_ENV_::DETECT_AUTOGENERATED_STRINGIFICATION = $ENV{SQLA_ISVALUE_IGNORE_AUTOGENERATED_STRINGIFICATION} + ? sub () { 0 } + : sub () { 1 } + ; +} + #====================================================================== # GLOBALS #====================================================================== -our $VERSION = '1.65_02'; +our $VERSION = '1.87'; # This would confuse some packagers $VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases @@ -25,17 +37,9 @@ our $AUTOLOAD; # special operators (-in, -between). May be extended/overridden by user. # See section WHERE: BUILTIN SPECIAL OPERATORS below for implementation my @BUILTIN_SPECIAL_OPS = ( - {regex => qr/^(not )?between$/i, handler => '_where_field_BETWEEN'}, - {regex => qr/^(not )?in$/i, handler => '_where_field_IN'}, -); - -# unaryish operators - key maps to handler -my @BUILTIN_UNARY_OPS = ( - # the digits are backcompat stuff - { regex => qr/^and (?: \s? \d+ )? $/xi, handler => '_where_op_ANDOR' }, - { regex => qr/^or (?: \s? \d+ )? $/xi, handler => '_where_op_ANDOR' }, - { regex => qr/^nest (?: \s? \d+ )? $/xi, handler => '_where_op_NEST' }, - { regex => qr/^ (?: not \s )? bool $/xi, handler => '_where_op_BOOL' }, + {regex => qr/^ (?: not \s )? between $/ix, handler => sub { die "NOPE" }}, + {regex => qr/^ (?: not \s )? in $/ix, handler => sub { die "NOPE" }}, + {regex => qr/^ is (?: \s+ not )? $/ix, handler => sub { die "NOPE" }}, ); #====================================================================== @@ -50,14 +54,72 @@ sub _debug { sub belch (@) { my($func) = (caller(1))[3]; - carp "[$func] Warning: ", @_; + Carp::carp "[$func] Warning: ", @_; } sub puke (@) { my($func) = (caller(1))[3]; - croak "[$func] Fatal: ", @_; + Carp::croak "[$func] Fatal: ", @_; } +sub is_literal_value ($) { + ref $_[0] eq 'SCALAR' ? [ ${$_[0]} ] + : ( ref $_[0] eq 'REF' and ref ${$_[0]} eq 'ARRAY' ) ? [ @${ $_[0] } ] + : undef; +} + +# FIXME XSify - this can be done so much more efficiently +sub is_plain_value ($) { + no strict 'refs'; + ! length ref $_[0] ? \($_[0]) + : ( + ref $_[0] eq 'HASH' and keys %{$_[0]} == 1 + and + exists $_[0]->{-value} + ) ? \($_[0]->{-value}) + : ( + # reuse @_ for even moar speedz + defined ( $_[1] = Scalar::Util::blessed $_[0] ) + and + # deliberately not using Devel::OverloadInfo - the checks we are + # intersted in are much more limited than the fullblown thing, and + # this is a very hot piece of code + ( + # simply using ->can('(""') can leave behind stub methods that + # break actually using the overload later (see L and the source of overload::mycan()) + # + # either has stringification which DBI SHOULD prefer out of the box + grep { *{ (qq[${_}::(""]) }{CODE} } @{ $_[2] = mro::get_linear_isa( $_[1] ) } + or + # has nummification or boolification, AND fallback is *not* disabled + ( + SQL::Abstract::_ENV_::DETECT_AUTOGENERATED_STRINGIFICATION + and + ( + grep { *{"${_}::(0+"}{CODE} } @{$_[2]} + or + grep { *{"${_}::(bool"}{CODE} } @{$_[2]} + ) + and + ( + # no fallback specified at all + ! ( ($_[3]) = grep { *{"${_}::()"}{CODE} } @{$_[2]} ) + or + # fallback explicitly undef + ! defined ${"$_[3]::()"} + or + # explicitly true + !! ${"$_[3]::()"} + ) + ) + ) + ) ? \($_[0]) + : undef; +} + + #====================================================================== # NEW @@ -75,17 +137,18 @@ sub new { $opt{logic} = $opt{logic} ? uc $opt{logic} : 'OR'; # how to return bind vars - # LDNOTE: changed nwiger code : why this 'delete' ?? - # $opt{bindtype} ||= delete($opt{bind_type}) || 'normal'; $opt{bindtype} ||= 'normal'; # default comparison is "=", but can be overridden $opt{cmp} ||= '='; - # try to recognize which are the 'equality' and 'unequality' ops - # (temporary quickfix, should go through a more seasoned API) - $opt{equality_op} = qr/^(\Q$opt{cmp}\E|is|(is\s+)?like)$/i; - $opt{inequality_op} = qr/^(!=|<>|(is\s+)?not(\s+like)?)$/i; + # try to recognize which are the 'equality' and 'inequality' ops + # (temporary quickfix (in 2007), should go through a more seasoned API) + $opt{equality_op} = qr/^( \Q$opt{cmp}\E | \= )$/ix; + $opt{inequality_op} = qr/^( != | <> )$/ix; + + $opt{like_op} = qr/^ (is\s+)? r?like $/xi; + $opt{not_like_op} = qr/^ (is\s+)? not \s+ r?like $/xi; # SQL booleans $opt{sqltrue} ||= '1=1'; @@ -93,15 +156,48 @@ sub new { # special operators $opt{special_ops} ||= []; + + # regexes are applied in order, thus push after user-defines push @{$opt{special_ops}}, @BUILTIN_SPECIAL_OPS; # unary operators $opt{unary_ops} ||= []; - push @{$opt{unary_ops}}, @BUILTIN_UNARY_OPS; + + # rudimentary sanity-check for user supplied bits treated as functions/operators + # If a purported function matches this regular expression, an exception is thrown. + # Literal SQL is *NOT* subject to this check, only functions (and column names + # when quoting is not in effect) + + # FIXME + # need to guard against ()'s in column names too, but this will break tons of + # hacks... ideas anyone? + $opt{injection_guard} ||= qr/ + \; + | + ^ \s* go \s + /xmi; + + $opt{node_types} = +{ + map +("-$_" => '_render_'.$_), + qw(op func value bind ident literal) + }; + + $opt{expand_unary} = {}; return bless \%opt, $class; } +sub sqltrue { +{ -literal => [ $_[0]->{sqltrue} ] } } +sub sqlfalse { +{ -literal => [ $_[0]->{sqlfalse} ] } } + +sub _assert_pass_injection_guard { + if ($_[1] =~ $_[0]->{injection_guard}) { + my $class = ref $_[0]; + puke "Possible SQL injection attempt '$_[1]'. If this is indeed a part of the " + . "desired SQL use literal SQL ( \'...' or \[ '...' ] ) or supply your own " + . "{injection_guard} attribute to ${class}->new()" + } +} #====================================================================== @@ -118,22 +214,30 @@ sub insert { my ($sql, @bind) = $self->$method($data); $sql = join " ", $self->_sqlcase('insert into'), $table, $sql; - if (my $ret = $options->{returning}) { - $sql .= $self->_insert_returning ($ret); + if ($options->{returning}) { + my ($s, @b) = $self->_insert_returning($options); + $sql .= $s; + push @bind, @b; } return wantarray ? ($sql, @bind) : $sql; } -sub _insert_returning { - my ($self, $fields) = @_; +# So that subclasses can override INSERT ... RETURNING separately from +# UPDATE and DELETE (e.g. DBIx::Class::SQLMaker::Oracle does this) +sub _insert_returning { shift->_returning(@_) } + +sub _returning { + my ($self, $options) = @_; - my $f = $self->_SWITCH_refkind($fields, { - ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$fields;}, - SCALAR => sub {$self->_quote($fields)}, - SCALARREF => sub {$$fields}, - }); - return join (' ', $self->_sqlcase(' returning'), $f); + my $f = $options->{returning}; + + my ($sql, @bind) = $self->_render_expr( + $self->_expand_maybe_list_expr($f, undef, -ident) + ); + return wantarray + ? $self->_sqlcase(' returning ') . $sql + : ($self->_sqlcase(' returning ').$sql, @bind); } sub _insert_HASHREF { # explicit list of fields and then values @@ -157,13 +261,14 @@ sub _insert_ARRAYREF { # just generate values(?,?) part (no list of fields) $self->{bindtype} ne 'columns' or belch "can't do 'columns' bindtype when called with arrayref"; - # fold the list of values into a hash of column name - value pairs - # (where the column names are artificially generated, and their - # lexicographical ordering keep the ordering of the original list) - my $i = "a"; # incremented values will be in lexicographical order - my $data_in_hash = { map { ($i++ => $_) } @$data }; - - return $self->_insert_values($data_in_hash); + my (@values, @all_bind); + foreach my $value (@$data) { + my ($values, @bind) = $self->_insert_value(undef, $value); + push @values, $values; + push @all_bind, @bind; + } + my $sql = $self->_sqlcase('values')." ( ".join(", ", @values)." )"; + return ($sql, @all_bind); } sub _insert_ARRAYREFREF { # literal SQL with bind @@ -187,53 +292,44 @@ sub _insert_values { my (@values, @all_bind); foreach my $column (sort keys %$data) { - my $v = $data->{$column}; - - $self->_SWITCH_refkind($v, { - - ARRAYREF => sub { - if ($self->{array_datatypes}) { # if array datatype are activated - push @values, '?'; - push @all_bind, $self->_bindtype($column, $v); - } - else { # else literal SQL with bind - my ($sql, @bind) = @$v; - $self->_assert_bindval_matches_bindtype(@bind); - push @values, $sql; - push @all_bind, @bind; - } - }, - - ARRAYREFREF => sub { # literal SQL with bind - my ($sql, @bind) = @${$v}; - $self->_assert_bindval_matches_bindtype(@bind); - push @values, $sql; - push @all_bind, @bind; - }, - - # THINK : anything useful to do with a HASHREF ? - HASHREF => sub { # (nothing, but old SQLA passed it through) - #TODO in SQLA >= 2.0 it will die instead - belch "HASH ref as bind value in insert is not supported"; - push @values, '?'; - push @all_bind, $self->_bindtype($column, $v); - }, + my ($values, @bind) = $self->_insert_value($column, $data->{$column}); + push @values, $values; + push @all_bind, @bind; + } + my $sql = $self->_sqlcase('values')." ( ".join(", ", @values)." )"; + return ($sql, @all_bind); +} - SCALARREF => sub { # literal SQL without bind - push @values, $$v; - }, +sub _insert_value { + my ($self, $column, $v) = @_; - SCALAR_or_UNDEF => sub { - push @values, '?'; - push @all_bind, $self->_bindtype($column, $v); - }, + return $self->_render_expr( + $self->_expand_insert_value($column, $v) + ); +} - }); +sub _expand_insert_value { + my ($self, $column, $v) = @_; + if (ref($v) eq 'ARRAY') { + if ($self->{array_datatypes}) { + return +{ -bind => [ $column, $v ] }; + } + my ($sql, @bind) = @$v; + $self->_assert_bindval_matches_bindtype(@bind); + return +{ -literal => $v }; } - - my $sql = $self->_sqlcase('values')." ( ".join(", ", @values)." )"; - return ($sql, @all_bind); + if (ref($v) eq 'HASH') { + if (grep !/^-/, keys %$v) { + belch "HASH ref as bind value in insert is not supported"; + return +{ -bind => [ $column, $v ] }; + } + } + if (!defined($v)) { + return +{ -bind => [ $column, undef ] }; + } + local our $Cur_Col_Meta = $column; + return $self->_expand_expr($v); } @@ -244,53 +340,19 @@ sub _insert_values { sub update { - my $self = shift; - my $table = $self->_table(shift); - my $data = shift || return; - my $where = shift; + my $self = shift; + my $table = $self->_table(shift); + my $data = shift || return; + my $where = shift; + my $options = shift; # first build the 'SET' part of the sql statement - my (@set, @all_bind); puke "Unsupported data type specified to \$sql->update" unless ref $data eq 'HASH'; - for my $k (sort keys %$data) { - my $v = $data->{$k}; - my $r = ref $v; - my $label = $self->_quote($k); - - $self->_SWITCH_refkind($v, { - ARRAYREF => sub { - if ($self->{array_datatypes}) { # array datatype - push @set, "$label = ?"; - push @all_bind, $self->_bindtype($k, $v); - } - else { # literal SQL with bind - my ($sql, @bind) = @$v; - $self->_assert_bindval_matches_bindtype(@bind); - push @set, "$label = $sql"; - push @all_bind, @bind; - } - }, - ARRAYREFREF => sub { # literal SQL with bind - my ($sql, @bind) = @${$v}; - $self->_assert_bindval_matches_bindtype(@bind); - push @set, "$label = $sql"; - push @all_bind, @bind; - }, - SCALARREF => sub { # literal SQL without bind - push @set, "$label = $$v"; - }, - SCALAR_or_UNDEF => sub { - push @set, "$label = ?"; - push @all_bind, $self->_bindtype($k, $v); - }, - }); - } - - # generate sql - my $sql = $self->_sqlcase('update') . " $table " . $self->_sqlcase('set ') - . join ', ', @set; + my ($sql, @all_bind) = $self->_update_set_values($data); + $sql = $self->_sqlcase('update ') . $table . $self->_sqlcase(' set ') + . $sql; if ($where) { my($where_sql, @where_bind) = $self->where($where); @@ -298,9 +360,50 @@ sub update { push @all_bind, @where_bind; } + if ($options->{returning}) { + my ($returning_sql, @returning_bind) = $self->_update_returning($options); + $sql .= $returning_sql; + push @all_bind, @returning_bind; + } + return wantarray ? ($sql, @all_bind) : $sql; } +sub _update_set_values { + my ($self, $data) = @_; + + return $self->_render_expr( + $self->_expand_update_set_values($data), + ); +} + +sub _expand_update_set_values { + my ($self, $data) = @_; + $self->_expand_maybe_list_expr( [ + map { + my ($k, $set) = @$_; + $set = { -bind => $_ } unless defined $set; + +{ -op => [ '=', { -ident => $k }, $set ] }; + } + map { + my $k = $_; + my $v = $data->{$k}; + (ref($v) eq 'ARRAY' + ? ($self->{array_datatypes} + ? [ $k, +{ -bind => [ $k, $v ] } ] + : [ $k, +{ -literal => $v } ]) + : do { + local our $Cur_Col_Meta = $k; + [ $k, $self->_expand_expr($v) ] + } + ); + } sort keys %$data + ] ); +} + +# So that subclasses can override UPDATE ... RETURNING separately from +# INSERT and DELETE +sub _update_returning { shift->_returning(@_) } @@ -316,34 +419,54 @@ sub select { my $where = shift; my $order = shift; - my($where_sql, @bind) = $self->where($where, $order); + my ($fields_sql, @bind) = $self->_select_fields($fields); - my $f = (ref $fields eq 'ARRAY') ? join ', ', map { $self->_quote($_) } @$fields - : $fields; - my $sql = join(' ', $self->_sqlcase('select'), $f, + my ($where_sql, @where_bind) = $self->where($where, $order); + push @bind, @where_bind; + + my $sql = join(' ', $self->_sqlcase('select'), $fields_sql, $self->_sqlcase('from'), $table) . $where_sql; return wantarray ? ($sql, @bind) : $sql; } +sub _select_fields { + my ($self, $fields) = @_; + return $fields unless ref($fields); + return $self->_render_expr( + $self->_expand_maybe_list_expr($fields, undef, '-ident') + ); +} + #====================================================================== # DELETE #====================================================================== sub delete { - my $self = shift; - my $table = $self->_table(shift); - my $where = shift; - + my $self = shift; + my $table = $self->_table(shift); + my $where = shift; + my $options = shift; my($where_sql, @bind) = $self->where($where); - my $sql = $self->_sqlcase('delete from') . " $table" . $where_sql; + my $sql = $self->_sqlcase('delete from ') . $table . $where_sql; + + if ($options->{returning}) { + my ($returning_sql, @returning_bind) = $self->_delete_returning($options); + $sql .= $returning_sql; + push @bind, @returning_bind; + } return wantarray ? ($sql, @bind) : $sql; } +# So that subclasses can override DELETE ... RETURNING separately from +# INSERT and UPDATE +sub _delete_returning { shift->_returning(@_) } + + #====================================================================== # WHERE: entry point @@ -355,587 +478,536 @@ sub delete { sub where { my ($self, $where, $order) = @_; + local $self->{convert_where} = $self->{convert}; + # where ? - my ($sql, @bind) = $self->_recurse_where($where); - $sql = $sql ? $self->_sqlcase(' where ') . "( $sql )" : ''; + my ($sql, @bind) = defined($where) + ? $self->_recurse_where($where) + : (undef); + $sql = (defined $sql and length $sql) ? $self->_sqlcase(' where ') . "( $sql )" : ''; # order by? if ($order) { - $sql .= $self->_order_by($order); + my ($order_sql, @order_bind) = $self->_order_by($order); + $sql .= $order_sql; + push @bind, @order_bind; } return wantarray ? ($sql, @bind) : $sql; } - -sub _recurse_where { - my ($self, $where, $logic) = @_; - - # dispatch on appropriate method according to refkind of $where - my $method = $self->_METHOD_FOR_refkind("_where", $where); - - my ($sql, @bind) = $self->$method($where, $logic); - - # DBIx::Class directly calls _recurse_where in scalar context, so - # we must implement it, even if not in the official API - return wantarray ? ($sql, @bind) : $sql; -} - - - -#====================================================================== -# WHERE: top-level ARRAYREF -#====================================================================== - - -sub _where_ARRAYREF { - my ($self, $where, $logic) = @_; - - $logic = uc($logic || $self->{logic}); - $logic eq 'AND' or $logic eq 'OR' or puke "unknown logic: $logic"; - - my @clauses = @$where; - - my (@sql_clauses, @all_bind); - # need to use while() so can shift() for pairs - while (my $el = shift @clauses) { - - # switch according to kind of $el and get corresponding ($sql, @bind) - my ($sql, @bind) = $self->_SWITCH_refkind($el, { - - # skip empty elements, otherwise get invalid trailing AND stuff - ARRAYREF => sub {$self->_recurse_where($el) if @$el}, - - ARRAYREFREF => sub { @{${$el}} if @{${$el}}}, - - HASHREF => sub {$self->_recurse_where($el, 'and') if %$el}, - # LDNOTE : previous SQLA code for hashrefs was creating a dirty - # side-effect: the first hashref within an array would change - # the global logic to 'AND'. So [ {cond1, cond2}, [cond3, cond4] ] - # was interpreted as "(cond1 AND cond2) OR (cond3 AND cond4)", - # whereas it should be "(cond1 AND cond2) OR (cond3 OR cond4)". - - SCALARREF => sub { ($$el); }, - - SCALAR => sub {# top-level arrayref with scalars, recurse in pairs - $self->_recurse_where({$el => shift(@clauses)})}, - - UNDEF => sub {puke "not supported : UNDEF in arrayref" }, - }); - - if ($sql) { - push @sql_clauses, $sql; - push @all_bind, @bind; +sub _expand_expr { + my ($self, $expr, $logic, $default_scalar_to) = @_; + local our $Default_Scalar_To = $default_scalar_to if $default_scalar_to; + return undef unless defined($expr); + if (ref($expr) eq 'HASH') { + if (keys %$expr > 1) { + $logic ||= 'and'; + return +{ -op => [ + $logic, + map $self->_expand_expr_hashpair($_ => $expr->{$_}, $logic), + sort keys %$expr + ] }; } + return unless %$expr; + return $self->_expand_expr_hashpair(%$expr, $logic); } - - return $self->_join_sql_clauses($logic, \@sql_clauses, \@all_bind); -} - -#====================================================================== -# WHERE: top-level ARRAYREFREF -#====================================================================== - -sub _where_ARRAYREFREF { - my ($self, $where) = @_; - my ($sql, @bind) = @{${$where}}; - - return ($sql, @bind); -} - -#====================================================================== -# WHERE: top-level HASHREF -#====================================================================== - -sub _where_HASHREF { - my ($self, $where) = @_; - my (@sql_clauses, @all_bind); - - for my $k (sort keys %$where) { - my $v = $where->{$k}; - - # ($k => $v) is either a special unary op or a regular hashpair - my ($sql, @bind) = do { - if ($k =~ /^-./) { - # put the operator in canonical form - my $op = $k; - $op =~ s/^-//; # remove initial dash - $op =~ s/[_\t ]+/ /g; # underscores and whitespace become single spaces - $op =~ s/^\s+|\s+$//g;# remove leading/trailing space - - $self->_debug("Unary OP(-$op) within hashref, recursing..."); - - my $op_entry = List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}}; - if (my $handler = $op_entry->{handler}) { - if (not ref $handler) { - if ($op =~ s/\s?\d+$//) { - belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' - . "You probably wanted ...-and => [ -$op => COND1, -$op => COND2 ... ]"; - } - $self->$handler ($op, $v); - } - elsif (ref $handler eq 'CODE') { - $handler->($self, $op, $v); - } - else { - puke "Illegal handler for operator $k - expecting a method name or a coderef"; - } - } - else { - $self->debug("Generic unary OP: $k - recursing as function"); - my ($sql, @bind) = $self->_where_func_generic ($op, $v); - $sql = "($sql)" unless (defined($self->{_nested_func_lhs}) && ($self->{_nested_func_lhs} eq $k)); # top level vs nested - ($sql, @bind); - } + if (ref($expr) eq 'ARRAY') { + my $logic = lc($logic || $self->{logic}); + $logic eq 'and' or $logic eq 'or' or puke "unknown logic: $logic"; + + my @expr = @$expr; + + my @res; + + while (my ($el) = splice @expr, 0, 1) { + puke "Supplying an empty left hand side argument is not supported in array-pairs" + unless defined($el) and length($el); + my $elref = ref($el); + if (!$elref) { + push(@res, $self->_expand_expr({ $el, shift(@expr) })); + } elsif ($elref eq 'ARRAY') { + push(@res, $self->_expand_expr($el)) if @$el; + } elsif (my $l = is_literal_value($el)) { + push @res, { -literal => $l }; + } elsif ($elref eq 'HASH') { + push @res, $self->_expand_expr($el); + } else { + die "notreached"; } - else { - my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $v); - $self->$method($k, $v); - } - }; - - push @sql_clauses, $sql; - push @all_bind, @bind; - } - - return $self->_join_sql_clauses('and', \@sql_clauses, \@all_bind); -} - -sub _where_func_generic { - my ($self, $op, $rhs) = @_; - - my ($sql, @bind) = $self->_SWITCH_refkind ($rhs, { - SCALAR => sub { - puke "Illegal use of top-level '$op'" - unless $self->{_nested_func_lhs}; - - return ( - $self->_convert('?'), - $self->_bindtype($self->{_nested_func_lhs}, $rhs) - ); - }, - FALLBACK => sub { - $self->_recurse_where ($rhs) - }, - }); - - $sql = sprintf ('%s %s', - $self->_sqlcase($op), - $sql, - ); - - return ($sql, @bind); -} - -sub _where_op_ANDOR { - my ($self, $op, $v) = @_; - - $self->_SWITCH_refkind($v, { - ARRAYREF => sub { - return $self->_where_ARRAYREF($v, $op); - }, - - HASHREF => sub { - return ( $op =~ /^or/i ) - ? $self->_where_ARRAYREF( [ map { $_ => $v->{$_} } ( sort keys %$v ) ], $op ) - : $self->_where_HASHREF($v); - }, - - SCALARREF => sub { - puke "-$op => \\\$scalar not supported, use -nest => ..."; - }, - - ARRAYREFREF => sub { - puke "-$op => \\[..] not supported, use -nest => ..."; - }, - - SCALAR => sub { # permissively interpreted as SQL - puke "-$op => 'scalar' not supported, use -nest => \\'scalar'"; - }, - - UNDEF => sub { - puke "-$op => undef not supported"; - }, - }); -} - -sub _where_op_NEST { - my ($self, $op, $v) = @_; - - $self->_SWITCH_refkind($v, { - - SCALAR => sub { # permissively interpreted as SQL - belch "literal SQL should be -nest => \\'scalar' " - . "instead of -nest => 'scalar' "; - return ($v); - }, - - UNDEF => sub { - puke "-$op => undef not supported"; - }, - - FALLBACK => sub { - $self->_recurse_where ($v); - }, - - }); -} - - -sub _where_op_BOOL { - my ($self, $op, $v) = @_; - - my ( $prefix, $suffix ) = ( $op =~ /\bnot\b/i ) - ? ( '(NOT ', ')' ) - : ( '', '' ); - - my ($sql, @bind) = do { - $self->_SWITCH_refkind($v, { - SCALAR => sub { # interpreted as SQL column - $self->_convert($self->_quote($v)); - }, - - UNDEF => sub { - puke "-$op => undef not supported"; - }, - - FALLBACK => sub { - $self->_recurse_where ($v); - }, - }); - }; - - return ( - join ('', $prefix, $sql, $suffix), - @bind, - ); -} - - -sub _where_hashpair_ARRAYREF { - my ($self, $k, $v) = @_; - - if( @$v ) { - my @v = @$v; # need copy because of shift below - $self->_debug("ARRAY($k) means distribute over elements"); - - # put apart first element if it is an operator (-and, -or) - my $op = ( - (defined $v[0] && $v[0] =~ /^ - (?: AND|OR ) $/ix) - ? shift @v - : '' - ); - my @distributed = map { {$k => $_} } @v; - - if ($op) { - $self->_debug("OP($op) reinjected into the distributed array"); - unshift @distributed, $op; } - - my $logic = $op ? substr($op, 1) : ''; - - return $self->_recurse_where(\@distributed, $logic); + return { -op => [ $logic, @res ] }; } - else { - # LDNOTE : not sure of this one. What does "distribute over nothing" mean? - $self->_debug("empty ARRAY($k) means 0=1"); - return ($self->{sqlfalse}); + if (my $literal = is_literal_value($expr)) { + return +{ -literal => $literal }; } + if (!ref($expr) or Scalar::Util::blessed($expr)) { + if (my $d = $Default_Scalar_To) { + return +{ $d => $expr }; + } + if (my $m = our $Cur_Col_Meta) { + return +{ -bind => [ $m, $expr ] }; + } + return +{ -value => $expr }; + } + die "notreached"; } -sub _where_hashpair_HASHREF { +sub _expand_expr_hashpair { my ($self, $k, $v, $logic) = @_; - $logic ||= 'and'; - - local $self->{_nested_func_lhs} = $self->{_nested_func_lhs}; - - my ($all_sql, @all_bind); - - for my $orig_op (sort keys %$v) { - my $val = $v->{$orig_op}; - - # put the operator in canonical form - my $op = $orig_op; - $op =~ s/^-//; # remove initial dash - $op =~ s/[_\t ]+/ /g; # underscores and whitespace become single spaces - $op =~ s/^\s+|\s+$//g;# remove leading/trailing space - - my ($sql, @bind); - - # CASE: col-value logic modifiers - if ( $orig_op =~ /^ \- (and|or) $/xi ) { - ($sql, @bind) = $self->_where_hashpair_HASHREF($k, $val, $1); + unless (defined($k) and length($k)) { + if (defined($k) and my $literal = is_literal_value($v)) { + belch 'Hash-pairs consisting of an empty string with a literal are deprecated, and will be removed in 2.0: use -and => [ $literal ] instead'; + return { -literal => $literal }; + } + puke "Supplying an empty left hand side argument is not supported"; + } + if ($k =~ /^-/) { + $self->_assert_pass_injection_guard($k =~ /^-(.*)$/s); + if ($k =~ s/ [_\s]? \d+ $//x ) { + belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' + . "You probably wanted ...-and => [ $k => COND1, $k => COND2 ... ]"; } - # CASE: special operators like -in or -between - elsif ( my $special_op = List::Util::first {$op =~ $_->{regex}} @{$self->{special_ops}} ) { - my $handler = $special_op->{handler}; - if (! $handler) { - puke "No handler supplied for special operator $orig_op"; + if ($k eq '-nest') { + return $self->_expand_expr($v); + } + if ($k eq '-bool') { + if (ref($v)) { + return $self->_expand_expr($v); } - elsif (not ref $handler) { - ($sql, @bind) = $self->$handler ($k, $op, $val); + puke "-bool => undef not supported" unless defined($v); + return { -ident => $v }; + } + if ($k eq '-not') { + return { -op => [ 'not', $self->_expand_expr($v) ] }; + } + if (my ($rest) = $k =~/^-not[_ ](.*)$/) { + return +{ -op => [ + 'not', + $self->_expand_expr_hashpair("-${rest}", $v, $logic) + ] }; + } + if (my ($logic) = $k =~ /^-(and|or)$/i) { + if (ref($v) eq 'HASH') { + return $self->_expand_expr($v, $logic); } - elsif (ref $handler eq 'CODE') { - ($sql, @bind) = $handler->($self, $k, $op, $val); + if (ref($v) eq 'ARRAY') { + return $self->_expand_expr($v, $logic); } - else { - puke "Illegal handler for special operator $orig_op - expecting a method name or a coderef"; + } + { + my $op = $k; + $op =~ s/^-// if length($op) > 1; + + # top level special ops are illegal in general + puke "Illegal use of top-level '-$op'" + if List::Util::first { $op =~ $_->{regex} } @{$self->{special_ops}}; + if (my $us = List::Util::first { $op =~ $_->{regex} } @{$self->{unary_ops}}) { + return { -op => [ $op, $v ] }; } } - else { - $self->_SWITCH_refkind($val, { - - ARRAYREF => sub { # CASE: col => {op => \@vals} - ($sql, @bind) = $self->_where_field_op_ARRAYREF($k, $op, $val); - }, - - ARRAYREFREF => sub { # CASE: col => {op => \[$sql, @bind]} (literal SQL with bind) - my ($sub_sql, @sub_bind) = @$$val; - $self->_assert_bindval_matches_bindtype(@sub_bind); - $sql = join ' ', $self->_convert($self->_quote($k)), - $self->_sqlcase($op), - $sub_sql; - @bind = @sub_bind; - }, - - UNDEF => sub { # CASE: col => {op => undef} : sql "IS (NOT)? NULL" - my $is = ($op =~ $self->{equality_op}) ? 'is' : - ($op =~ $self->{inequality_op}) ? 'is not' : - puke "unexpected operator '$orig_op' with undef operand"; - $sql = $self->_quote($k) . $self->_sqlcase(" $is null"); - }, - - FALLBACK => sub { # CASE: col => {op/func => $stuff} - - # retain for proper column type bind - $self->{_nested_func_lhs} ||= $k; - - ($sql, @bind) = $self->_where_func_generic ($op, $val); - - $sql = join (' ', - $self->_convert($self->_quote($k)), - $self->{_nested_func_lhs} eq $k ? $sql : "($sql)", # top level vs nested - ); - }, - }); + if ($k eq '-value' and my $m = our $Cur_Col_Meta) { + return +{ -bind => [ $m, $v ] }; + } + if (my $custom = $self->{expand_unary}{$k}) { + return $self->$custom($v); + } + if ($self->{node_types}{$k}) { + return { $k => $v }; + } + if ( + ref($v) eq 'HASH' + and keys %$v == 1 + and (keys %$v)[0] =~ /^-/ + ) { + my ($func) = $k =~ /^-(.*)$/; + return +{ -func => [ $func, $self->_expand_expr($v) ] }; + } + if (!ref($v) or is_literal_value($v)) { + return +{ -op => [ $k =~ /^-(.*)$/, $self->_expand_expr($v) ] }; } - - ($all_sql) = (defined $all_sql and $all_sql) ? $self->_join_sql_clauses($logic, [$all_sql, $sql], []) : $sql; - push @all_bind, @bind; } - return ($all_sql, @all_bind); -} - - - -sub _where_field_op_ARRAYREF { - my ($self, $k, $op, $vals) = @_; - - my @vals = @$vals; #always work on a copy - - if(@vals) { - $self->_debug(sprintf '%s means multiple elements: [ %s ]', - $vals, - join (', ', map { defined $_ ? "'$_'" : 'NULL' } @vals ), - ); - - # see if the first element is an -and/-or op - my $logic; - if (defined $vals[0] && $vals[0] =~ /^ - ( AND|OR ) $/ix) { - $logic = uc $1; - shift @vals; + if ( + !defined($v) + or ( + ref($v) eq 'HASH' + and exists $v->{-value} + and not defined $v->{-value} + ) + ) { + return $self->_expand_expr_hashpair($k => { $self->{cmp} => undef }); + } + if (!ref($v) or Scalar::Util::blessed($v)) { + my $d = our $Default_Scalar_To; + return +{ + -op => [ + $self->{cmp}, + { -ident => $k }, + ($d ? { $d => $v } : { -bind => [ $k, $v ] }) + ] + }; + } + if (ref($v) eq 'HASH') { + if (keys %$v > 1) { + return { -op => [ + 'and', + map $self->_expand_expr_hashpair($k => { $_ => $v->{$_} }), + sort keys %$v + ] }; } - - # distribute $op over each remaining member of @vals, append logic if exists - return $self->_recurse_where([map { {$k => {$op, $_}} } @vals], $logic); - - # LDNOTE : had planned to change the distribution logic when - # $op =~ $self->{inequality_op}, because of Morgan laws : - # with {field => {'!=' => [22, 33]}}, it would be ridiculous to generate - # WHERE field != 22 OR field != 33 : the user probably means - # WHERE field != 22 AND field != 33. - # To do this, replace the above to roughly : - # my $logic = ($op =~ $self->{inequality_op}) ? 'AND' : 'OR'; - # return $self->_recurse_where([map { {$k => {$op, $_}} } @vals], $logic); - + my ($vk, $vv) = %$v; + $vk =~ s/^-//; + $vk = lc($vk); + $self->_assert_pass_injection_guard($vk); + if ($vk =~ s/ [_\s]? \d+ $//x ) { + belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' + . "You probably wanted ...-and => [ -$vk => COND1, -$vk => COND2 ... ]"; + } + if ($vk =~ /^(?:not[ _])?between$/) { + local our $Cur_Col_Meta = $k; + my @rhs = map $self->_expand_expr($_), + ref($vv) eq 'ARRAY' ? @$vv : $vv; + unless ( + (@rhs == 1 and ref($rhs[0]) eq 'HASH' and $rhs[0]->{-literal}) + or + (@rhs == 2 and defined($rhs[0]) and defined($rhs[1])) + ) { + puke "Operator '${\uc($vk)}' requires either an arrayref with two defined values or expressions, or a single literal scalarref/arrayref-ref"; + } + return +{ -op => [ + join(' ', split '_', $vk), + { -ident => $k }, + @rhs + ] } + } + if ($vk =~ /^(?:not[ _])?in$/) { + if (my $literal = is_literal_value($vv)) { + my ($sql, @bind) = @$literal; + my $opened_sql = $self->_open_outer_paren($sql); + return +{ -op => [ + $vk, { -ident => $k }, + [ { -literal => [ $opened_sql, @bind ] } ] + ] }; + } + my $undef_err = + 'SQL::Abstract before v1.75 used to generate incorrect SQL when the ' + . "-${\uc($vk)} operator was given an undef-containing list: !!!AUDIT YOUR CODE " + . 'AND DATA!!! (the upcoming Data::Query-based version of SQL::Abstract ' + . 'will emit the logically correct SQL instead of raising this exception)' + ; + puke("Argument passed to the '${\uc($vk)}' operator can not be undefined") + if !defined($vv); + my @rhs = map $self->_expand_expr($_), + map { ref($_) ? $_ : { -bind => [ $k, $_ ] } } + map { defined($_) ? $_: puke($undef_err) } + (ref($vv) eq 'ARRAY' ? @$vv : $vv); + return $self->${\($vk =~ /^not/ ? 'sqltrue' : 'sqlfalse')} unless @rhs; + + return +{ -op => [ + join(' ', split '_', $vk), + { -ident => $k }, + \@rhs + ] }; + } + if ($vk eq 'ident') { + if (! defined $vv or (ref($vv) and ref($vv) eq 'ARRAY')) { + puke "-$vk requires a single plain scalar argument (a quotable identifier) or an arrayref of identifier parts"; + } + return +{ -op => [ + $self->{cmp}, + { -ident => $k }, + { -ident => $vv } + ] }; + } + if ($vk eq 'value') { + return $self->_expand_expr_hashpair($k, undef) unless defined($vv); + return +{ -op => [ + $self->{cmp}, + { -ident => $k }, + { -bind => [ $k, $vv ] } + ] }; + } + if ($vk =~ /^is(?:[ _]not)?$/) { + puke "$vk can only take undef as argument" + if defined($vv) + and not ( + ref($vv) eq 'HASH' + and exists($vv->{-value}) + and !defined($vv->{-value}) + ); + $vk =~ s/_/ /g; + return +{ -op => [ $vk.' null', { -ident => $k } ] }; + } + if ($vk =~ /^(and|or)$/) { + if (ref($vv) eq 'HASH') { + return +{ -op => [ + $vk, + map $self->_expand_expr_hashpair($k, { $_ => $vv->{$_} }), + sort keys %$vv + ] }; + } + } + if (my $us = List::Util::first { $vk =~ $_->{regex} } @{$self->{special_ops}}) { + return { -op => [ $vk, { -ident => $k }, $vv ] }; + } + if (my $us = List::Util::first { $vk =~ $_->{regex} } @{$self->{unary_ops}}) { + return { -op => [ + $self->{cmp}, + { -ident => $k }, + { -op => [ $vk, $vv ] } + ] }; + } + if (ref($vv) eq 'ARRAY') { + my ($logic, @values) = ( + (defined($vv->[0]) and $vv->[0] =~ /^-(and|or)$/i) + ? @$vv + : (-or => @$vv) + ); + if ( + $vk =~ $self->{inequality_op} + or join(' ', split '_', $vk) =~ $self->{not_like_op} + ) { + if (lc($logic) eq '-or' and @values > 1) { + my $op = uc join ' ', split '_', $vk; + belch "A multi-element arrayref as an argument to the inequality op '$op' " + . 'is technically equivalent to an always-true 1=1 (you probably wanted ' + . "to say ...{ \$inequality_op => [ -and => \@values ] }... instead)" + ; + } + } + unless (@values) { + # try to DWIM on equality operators + my $op = join ' ', split '_', $vk; + return + $op =~ $self->{equality_op} ? $self->sqlfalse + : $op =~ $self->{like_op} ? belch("Supplying an empty arrayref to '@{[ uc $op]}' is deprecated") && $self->sqlfalse + : $op =~ $self->{inequality_op} ? $self->sqltrue + : $op =~ $self->{not_like_op} ? belch("Supplying an empty arrayref to '@{[ uc $op]}' is deprecated") && $self->sqltrue + : puke "operator '$op' applied on an empty array (field '$k')"; + } + return +{ -op => [ + $logic =~ /^-(.*)$/, + map $self->_expand_expr_hashpair($k => { $vk => $_ }), + @values + ] }; + } + if ( + !defined($vv) + or ( + ref($vv) eq 'HASH' + and exists $vv->{-value} + and not defined $vv->{-value} + ) + ) { + my $op = join ' ', split '_', $vk; + my $is = + $op =~ /^not$/i ? 'is not' # legacy + : $op =~ $self->{equality_op} ? 'is' + : $op =~ $self->{like_op} ? belch("Supplying an undefined argument to '@{[ uc $op]}' is deprecated") && 'is' + : $op =~ $self->{inequality_op} ? 'is not' + : $op =~ $self->{not_like_op} ? belch("Supplying an undefined argument to '@{[ uc $op]}' is deprecated") && 'is not' + : puke "unexpected operator '$op' with undef operand"; + return +{ -op => [ $is.' null', { -ident => $k } ] }; + } + local our $Cur_Col_Meta = $k; + return +{ -op => [ + $vk, + { -ident => $k }, + $self->_expand_expr($vv) + ] }; } - else { - # try to DWIM on equality operators - # LDNOTE : not 100% sure this is the correct thing to do ... - return ($self->{sqlfalse}) if $op =~ $self->{equality_op}; - return ($self->{sqltrue}) if $op =~ $self->{inequality_op}; - - # otherwise - puke "operator '$op' applied on an empty array (field '$k')"; + if (ref($v) eq 'ARRAY') { + return $self->sqlfalse unless @$v; + $self->_debug("ARRAY($k) means distribute over elements"); + my $this_logic = ( + $v->[0] =~ /^-((?:and|or))$/i + ? ($v = [ @{$v}[1..$#$v] ], $1) + : ($self->{logic} || 'or') + ); + return +{ -op => [ + $this_logic, + map $self->_expand_expr({ $k => $_ }, $this_logic), @$v + ] }; + } + if (my $literal = is_literal_value($v)) { + unless (length $k) { + belch 'Hash-pairs consisting of an empty string with a literal are deprecated, and will be removed in 2.0: use -and => [ $literal ] instead'; + return \$literal; + } + my ($sql, @bind) = @$literal; + if ($self->{bindtype} eq 'columns') { + for (@bind) { + if (!defined $_ || ref($_) ne 'ARRAY' || @$_ != 2) { + puke "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" + } + } + } + return +{ -literal => [ $self->_quote($k).' '.$sql, @bind ] }; } + die "notreached"; } - -sub _where_hashpair_SCALARREF { - my ($self, $k, $v) = @_; - $self->_debug("SCALAR($k) means literal SQL: $$v"); - my $sql = $self->_quote($k) . " " . $$v; - return ($sql); +sub _render_expr { + my ($self, $expr) = @_; + my ($k, $v, @rest) = %$expr; + die "No" if @rest; + if (my $meth = $self->{node_types}{$k}) { + return $self->$meth($v); + } + die "notreached: $k"; } -# literal SQL with bind -sub _where_hashpair_ARRAYREFREF { - my ($self, $k, $v) = @_; - $self->_debug("REF($k) means literal SQL: @${$v}"); - my ($sql, @bind) = @${$v}; - $self->_assert_bindval_matches_bindtype(@bind); - $sql = $self->_quote($k) . " " . $sql; - return ($sql, @bind ); -} +sub _recurse_where { + my ($self, $where, $logic) = @_; -# literal SQL without bind -sub _where_hashpair_SCALAR { - my ($self, $k, $v) = @_; - $self->_debug("NOREF($k) means simple key=val: $k $self->{cmp} $v"); - my $sql = join ' ', $self->_convert($self->_quote($k)), - $self->_sqlcase($self->{cmp}), - $self->_convert('?'); - my @bind = $self->_bindtype($k, $v); - return ( $sql, @bind); -} +#print STDERR Data::Dumper::Concise::Dumper([ $where, $logic ]); + my $where_exp = $self->_expand_expr($where, $logic); -sub _where_hashpair_UNDEF { - my ($self, $k, $v) = @_; - $self->_debug("UNDEF($k) means IS NULL"); - my $sql = $self->_quote($k) . $self->_sqlcase(' is null'); - return ($sql); -} +#print STDERR Data::Dumper::Concise::Dumper([ EXP => $where_exp ]); -#====================================================================== -# WHERE: TOP-LEVEL OTHERS (SCALARREF, SCALAR, UNDEF) -#====================================================================== + # dispatch on appropriate method according to refkind of $where +# my $method = $self->_METHOD_FOR_refkind("_where", $where_exp); +# my ($sql, @bind) = $self->$method($where_exp, $logic); -sub _where_SCALARREF { - my ($self, $where) = @_; + my ($sql, @bind) = defined($where_exp) ? $self->_render_expr($where_exp) : (undef); - # literal sql - $self->_debug("SCALAR(*top) means literal SQL: $$where"); - return ($$where); + # DBIx::Class used to call _recurse_where in scalar context + # something else might too... + if (wantarray) { + return ($sql, @bind); + } + else { + belch "Calling _recurse_where in scalar context is deprecated and will go away before 2.0"; + return $sql; + } } +sub _render_ident { + my ($self, $ident) = @_; -sub _where_SCALAR { - my ($self, $where) = @_; - - # literal sql - $self->_debug("NOREF(*top) means literal SQL: $where"); - return ($where); + return $self->_convert($self->_quote($ident)); } +sub _render_value { + my ($self, $value) = @_; -sub _where_UNDEF { - my ($self) = @_; - return (); + return ($self->_convert('?'), $self->_bindtype(undef, $value)); } - -#====================================================================== -# WHERE: BUILTIN SPECIAL OPERATORS (-in, -between) -#====================================================================== - - -sub _where_field_BETWEEN { - my ($self, $k, $op, $vals) = @_; - - my ($label, $and, $placeholder); - $label = $self->_convert($self->_quote($k)); - $and = ' ' . $self->_sqlcase('and') . ' '; - $placeholder = $self->_convert('?'); - $op = $self->_sqlcase($op); - - my ($clause, @bind) = $self->_SWITCH_refkind($vals, { - ARRAYREFREF => sub { - return @$$vals; - }, - SCALARREF => sub { - return $$vals; - }, - ARRAYREF => sub { - puke "special op 'between' accepts an arrayref with exactly two values" - if @$vals != 2; - - my (@all_sql, @all_bind); - foreach my $val (@$vals) { - my ($sql, @bind) = $self->_SWITCH_refkind($val, { - SCALAR => sub { - return ($placeholder, ($val)); - }, - SCALARREF => sub { - return ($self->_convert($$val), ()); - }, - ARRAYREFREF => sub { - my ($sql, @bind) = @$$val; - return ($self->_convert($sql), @bind); - }, - }); - push @all_sql, $sql; - push @all_bind, @bind; - } - +my %unop_postfix = map +($_ => 1), + 'is null', 'is not null', + 'asc', 'desc', +; + +my %special = ( + (map +($_ => do { + my $op = $_; + sub { + my ($self, $args) = @_; + my ($left, $low, $high) = @$args; + my ($rhsql, @rhbind) = do { + if (@$args == 2) { + puke "Single arg to between must be a literal" + unless $low->{-literal}; + @{$low->{-literal}} + } else { + my ($l, $h) = map [ $self->_render_expr($_) ], $low, $high; + (join(' ', $l->[0], $self->_sqlcase('and'), $h->[0]), + @{$l}[1..$#$l], @{$h}[1..$#$h]) + } + }; + my ($lhsql, @lhbind) = $self->_render_expr($left); return ( - (join $and, @all_sql), - $self->_bindtype($k, @all_bind), + join(' ', '(', $lhsql, $self->_sqlcase($op), $rhsql, ')'), + @lhbind, @rhbind ); - }, - FALLBACK => sub { - puke "special op 'between' accepts an arrayref with two values, or a single literal scalarref/arrayref-ref"; - }, - }); - - my $sql = "( $label $op $clause )"; - return ($sql, @bind) -} - - -sub _where_field_IN { - my ($self, $k, $op, $vals) = @_; - - # backwards compatibility : if scalar, force into an arrayref - $vals = [$vals] if defined $vals && ! ref $vals; + } + }), 'between', 'not between'), + (map +($_ => do { + my $op = $_; + sub { + my ($self, $args) = @_; + my ($lhs, $rhs) = @$args; + my @in_bind; + my @in_sql = map { + my ($sql, @bind) = $self->_render_expr($_); + push @in_bind, @bind; + $sql; + } @$rhs; + my ($lhsql, @lbind) = $self->_render_expr($lhs); + return ( + $lhsql.' '.$self->_sqlcase($op).' ( ' + .join(', ', @in_sql) + .' )', + @lbind, @in_bind + ); + } + }), 'in', 'not in'), +); - my ($label) = $self->_convert($self->_quote($k)); - my ($placeholder) = $self->_convert('?'); - $op = $self->_sqlcase($op); +sub _render_op { + my ($self, $v) = @_; + my ($op, @args) = @$v; + $op =~ s/^-// if length($op) > 1; + $op = lc($op); + if (my $h = $special{$op}) { + return $self->$h(\@args); + } + if (my $us = List::Util::first { $op =~ $_->{regex} } @{$self->{special_ops}}) { + puke "Special op '${op}' requires first value to be identifier" + unless my ($k) = map $_->{-ident}, grep ref($_) eq 'HASH', $args[0]; + return $self->${\($us->{handler})}($k, $op, $args[1]); + } + if (my $us = List::Util::first { $op =~ $_->{regex} } @{$self->{unary_ops}}) { + return $self->${\($us->{handler})}($op, $args[0]); + } + my $final_op = $op =~ /^(?:is|not)_/ ? join(' ', split '_', $op) : $op; + if (@args == 1 and $op !~ /^(and|or)$/) { + my ($expr_sql, @bind) = $self->_render_expr($args[0]); + my $op_sql = $self->_sqlcase($final_op); + my $final_sql = ( + $unop_postfix{lc($final_op)} + ? "${expr_sql} ${op_sql}" + : "${op_sql} ${expr_sql}" + ); + return (($op eq 'not' ? '('.$final_sql.')' : $final_sql), @bind); + } else { + my @parts = map [ $self->_render_expr($_) ], @args; + my ($final_sql) = map +($op =~ /^(and|or)$/ ? "(${_})" : $_), join( + ($final_op eq ',' ? '' : ' ').$self->_sqlcase($final_op).' ', + map $_->[0], @parts + ); + return ( + $final_sql, + map @{$_}[1..$#$_], @parts + ); + } + die "unhandled"; +} - my ($sql, @bind) = $self->_SWITCH_refkind($vals, { - ARRAYREF => sub { # list of choices - if (@$vals) { # nonempty list - my $placeholders = join ", ", (($placeholder) x @$vals); - my $sql = "$label $op ( $placeholders )"; - my @bind = $self->_bindtype($k, @$vals); +sub _render_func { + my ($self, $rest) = @_; + my ($func, @args) = @$rest; + my @arg_sql; + my @bind = map { + my @x = @$_; + push @arg_sql, shift @x; + @x + } map [ $self->_render_expr($_) ], @args; + return ($self->_sqlcase($func).'('.join(', ', @arg_sql).')', @bind); +} - return ($sql, @bind); - } - else { # empty list : some databases won't understand "IN ()", so DWIM - my $sql = ($op =~ /\bnot\b/i) ? $self->{sqltrue} : $self->{sqlfalse}; - return ($sql); - } - }, - - SCALARREF => sub { # literal SQL - my $sql = $self->_open_outer_paren ($$vals); - return ("$label $op ( $sql )"); - }, - ARRAYREFREF => sub { # literal SQL with bind - my ($sql, @bind) = @$$vals; - $self->_assert_bindval_matches_bindtype(@bind); - $sql = $self->_open_outer_paren ($sql); - return ("$label $op ( $sql )", @bind); - }, - - FALLBACK => sub { - puke "special op 'in' requires an arrayref (or scalarref/arrayref-ref)"; - }, - }); +sub _render_bind { + my ($self, $bind) = @_; + return ($self->_convert('?'), $self->_bindtype(@$bind)); +} - return ($sql, @bind); +sub _render_literal { + my ($self, $literal) = @_; + $self->_assert_bindval_matches_bindtype(@{$literal}[1..$#$literal]); + return @$literal; } # Some databases (SQLite) treat col IN (1, 2) different from @@ -943,90 +1015,93 @@ sub _where_field_IN { # adding them back in the corresponding method sub _open_outer_paren { my ($self, $sql) = @_; - $sql = $1 while $sql =~ /^ \s* \( (.*) \) \s* $/xs; - return $sql; -} + while (my ($inner) = $sql =~ /^ \s* \( (.*) \) \s* $/xs) { -#====================================================================== -# ORDER BY -#====================================================================== + # there are closing parens inside, need the heavy duty machinery + # to reevaluate the extraction starting from $sql (full reevaluation) + if ($inner =~ /\)/) { + require Text::Balanced; -sub _order_by { - my ($self, $arg) = @_; + my (undef, $remainder) = do { + # idiotic design - writes to $@ but *DOES NOT* throw exceptions + local $@; + Text::Balanced::extract_bracketed($sql, '()', qr/\s*/); + }; - my (@sql, @bind); - for my $c ($self->_order_by_chunks ($arg) ) { - $self->_SWITCH_refkind ($c, { - SCALAR => sub { push @sql, $c }, - ARRAYREF => sub { push @sql, shift @$c; push @bind, @$c }, - }); - } + # the entire expression needs to be a balanced bracketed thing + # (after an extract no remainder sans trailing space) + last if defined $remainder and $remainder =~ /\S/; + } - my $sql = @sql - ? sprintf ('%s %s', - $self->_sqlcase(' order by'), - join (', ', @sql) - ) - : '' - ; + $sql = $inner; + } - return wantarray ? ($sql, @bind) : $sql; + $sql; } -sub _order_by_chunks { - my ($self, $arg) = @_; - - return $self->_SWITCH_refkind($arg, { - ARRAYREF => sub { - map { $self->_order_by_chunks ($_ ) } @$arg; - }, +#====================================================================== +# ORDER BY +#====================================================================== - ARRAYREFREF => sub { [ @$$arg ] }, +sub _expand_order_by { + my ($self, $arg) = @_; - SCALAR => sub {$self->_quote($arg)}, + return unless defined($arg) and not (ref($arg) eq 'ARRAY' and !@$arg); + + my $expander = sub { + my ($self, $dir, $expr) = @_; + my @to_expand = ref($expr) eq 'ARRAY' ? @$expr : $expr; + foreach my $arg (@to_expand) { + if ( + ref($arg) eq 'HASH' + and keys %$arg > 1 + and grep /^-(asc|desc)$/, keys %$arg + ) { + puke "ordering direction hash passed to order by must have exactly one key (-asc or -desc)"; + } + } + my @exp = map +(defined($dir) ? { -op => [ $dir => $_ ] } : $_), + map $self->_expand_expr($_, undef, -ident), @to_expand; + return (@exp > 1 ? { -op => [ ',', @exp ] } : $exp[0]); + }; - UNDEF => sub {return () }, + local @{$self->{expand_unary}}{qw(-asc -desc)} = ( + sub { shift->$expander(asc => @_) }, + sub { shift->$expander(desc => @_) }, + ); - SCALARREF => sub {$$arg}, # literal SQL, no quoting + return $self->$expander(undef, $arg); +} - HASHREF => sub { - # get first pair in hash - my ($key, $val) = each %$arg; +sub _order_by { + my ($self, $arg) = @_; - return () unless $key; + return '' unless defined(my $expanded = $self->_expand_order_by($arg)); - if ( (keys %$arg) > 1 or not $key =~ /^-(desc|asc)/i ) { - puke "hash passed to _order_by must have exactly one key (-desc or -asc)"; - } + my ($sql, @bind) = $self->_render_expr($expanded); - my $direction = $1; + my $final_sql = $self->_sqlcase(' order by ').$sql; - my @ret; - for my $c ($self->_order_by_chunks ($val)) { - my ($sql, @bind); + return wantarray ? ($final_sql, @bind) : $final_sql; +} - $self->_SWITCH_refkind ($c, { - SCALAR => sub { - $sql = $c; - }, - ARRAYREF => sub { - ($sql, @bind) = @$c; - }, - }); +sub _order_by_chunks { + my ($self, $arg) = @_; - $sql = $sql . ' ' . $self->_sqlcase($direction); + return () unless defined(my $expanded = $self->_expand_order_by($arg)); - push @ret, [ $sql, @bind]; + for ($expanded) { + if (ref() eq 'HASH' and my $op = $_->{-op}) { + if ($op->[0] eq ',') { + return map [ $self->_render_expr($_) ], @{$op}[1..$#$op]; } - - return @ret; - }, - }); + } + return [ $self->_render_expr($_) ]; + } } - #====================================================================== # DATASOURCE (FOR NOW, JUST PLAIN TABLE OR LIST OF TABLES) #====================================================================== @@ -1034,12 +1109,9 @@ sub _order_by_chunks { sub _table { my $self = shift; my $from = shift; - $self->_SWITCH_refkind($from, { - ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$from;}, - SCALAR => sub {$self->_quote($from)}, - SCALARREF => sub {$$from}, - ARRAYREFREF => sub {join ', ', @$from;}, - }); + ($self->_render_expr( + $self->_expand_maybe_list_expr($from, undef, -ident) + ))[0]; } @@ -1047,62 +1119,78 @@ sub _table { # UTILITY FUNCTIONS #====================================================================== +sub _expand_maybe_list_expr { + my ($self, $expr, $logic, $default) = @_; + my $e = do { + if (ref($expr) eq 'ARRAY') { + return { -op => [ + ',', map $self->_expand_expr($_, $logic, $default), @$expr + ] } if @$expr > 1; + $expr->[0] + } else { + $expr + } + }; + return $self->_expand_expr($e, $logic, $default); +} + # highly optimized, as it's called way too often sub _quote { # my ($self, $label) = @_; return '' unless defined $_[1]; return ${$_[1]} if ref($_[1]) eq 'SCALAR'; + puke 'Identifier cannot be hashref' if ref($_[1]) eq 'HASH'; - return $_[1] unless $_[0]->{quote_char}; + unless ($_[0]->{quote_char}) { + if (ref($_[1]) eq 'ARRAY') { + return join($_[0]->{name_sep}||'.', @{$_[1]}); + } else { + $_[0]->_assert_pass_injection_guard($_[1]); + return $_[1]; + } + } my $qref = ref $_[0]->{quote_char}; - my ($l, $r); - if (!$qref) { - ($l, $r) = ( $_[0]->{quote_char}, $_[0]->{quote_char} ); - } - elsif ($qref eq 'ARRAY') { - ($l, $r) = @{$_[0]->{quote_char}}; - } - else { - puke "Unsupported quote_char format: $_[0]->{quote_char}"; - } + my ($l, $r) = + !$qref ? ($_[0]->{quote_char}, $_[0]->{quote_char}) + : ($qref eq 'ARRAY') ? @{$_[0]->{quote_char}} + : puke "Unsupported quote_char format: $_[0]->{quote_char}"; + + my $esc = $_[0]->{escape_char} || $r; # parts containing * are naturally unquoted - return join( $_[0]->{name_sep}||'', map - { $_ eq '*' ? $_ : $l . $_ . $r } - ( $_[0]->{name_sep} ? split (/\Q$_[0]->{name_sep}\E/, $_[1] ) : $_[1] ) + return join( + $_[0]->{name_sep}||'', + map +( + $_ eq '*' + ? $_ + : do { (my $n = $_) =~ s/(\Q$esc\E|\Q$r\E)/$esc$1/g; $l . $n . $r } + ), + (ref($_[1]) eq 'ARRAY' + ? @{$_[1]} + : ( + $_[0]->{name_sep} + ? split (/\Q$_[0]->{name_sep}\E/, $_[1] ) + : $_[1] + ) + ) ); } # Conversion, if applicable -sub _convert ($) { +sub _convert { #my ($self, $arg) = @_; - -# LDNOTE : modified the previous implementation below because -# it was not consistent : the first "return" is always an array, -# the second "return" is context-dependent. Anyway, _convert -# seems always used with just a single argument, so make it a -# scalar function. -# return @_ unless $self->{convert}; -# my $conv = $self->_sqlcase($self->{convert}); -# my @ret = map { $conv.'('.$_.')' } @_; -# return wantarray ? @ret : $ret[0]; - if ($_[0]->{convert}) { - return $_[0]->_sqlcase($_[0]->{convert}) .'(' . $_[1] . ')'; + if ($_[0]->{convert_where}) { + return $_[0]->_sqlcase($_[0]->{convert_where}) .'(' . $_[1] . ')'; } return $_[1]; } # And bindtype -sub _bindtype (@) { +sub _bindtype { #my ($self, $col, @vals) = @_; - - #LDNOTE : changed original implementation below because it did not make - # sense when bindtype eq 'columns' and @vals > 1. -# return $self->{bindtype} eq 'columns' ? [ $col, @vals ] : @vals; - # called often - tighten code return $_[0]->{bindtype} eq 'columns' ? map {[$_[1], $_]} @_[2 .. $#_] @@ -1113,12 +1201,12 @@ sub _bindtype (@) { # Dies if any element of @bind is not in [colname => value] format # if bindtype is 'columns'. sub _assert_bindval_matches_bindtype { - my ($self, @bind) = @_; - +# my ($self, @bind) = @_; + my $self = shift; if ($self->{bindtype} eq 'columns') { - foreach my $val (@bind) { - if (!defined $val || ref($val) ne 'ARRAY' || @$val != 2) { - die "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" + for (@_) { + if (!defined $_ || ref($_) ne 'ARRAY' || @$_ != 2) { + puke "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" } } } @@ -1227,7 +1315,7 @@ sub values { unless ref $data eq 'HASH'; my @all_bind; - foreach my $k ( sort keys %$data ) { + foreach my $k (sort keys %$data) { my $v = $data->{$k}; $self->_SWITCH_refkind($v, { ARRAYREF => sub { @@ -1350,7 +1438,7 @@ SQL::Abstract - Generate SQL from Perl data structures my $sql = SQL::Abstract->new; - my($stmt, @bind) = $sql->select($table, \@fields, \%where, \@order); + my($stmt, @bind) = $sql->select($source, \@fields, \%where, $order); my($stmt, @bind) = $sql->insert($table, \%fieldvals || \@values); @@ -1363,7 +1451,7 @@ SQL::Abstract - Generate SQL from Perl data structures $sth->execute(@bind); # Just generate the WHERE clause - my($stmt, @bind) = $sql->where(\%where, \@order); + my($stmt, @bind) = $sql->where(\%where, $order); # Return values in the same order, for hashed queries # See PERFORMANCE section for more details @@ -1444,7 +1532,7 @@ say something like this: my %data = ( name => 'Bill', - date_entered => \["to_date(?,'MM/DD/YYYY')", "03/02/2003"], + date_entered => \[ "to_date(?,'MM/DD/YYYY')", "03/02/2003" ], ); The first value in the array is the actual SQL. Any other values are @@ -1496,16 +1584,13 @@ Which you could then use in DBI code like so: Easy, eh? -=head1 FUNCTIONS +=head1 METHODS -The functions are simple. There's one for each major SQL operation, +The methods are simple. There's one for every major SQL operation, and a constructor you use first. The arguments are specified in a -similar order to each function (table, then fields, then a where +similar order for each method (table, then fields, then a where clause) to try and simplify things. - - - =head2 new(option => 'value') The C function takes a list of options and values, and returns @@ -1539,7 +1624,7 @@ C to C you would get SQL such as: WHERE name like 'nwiger' AND email like 'nate@wiger.org' -You can also override the comparsion on an individual basis - see +You can also override the comparison on an individual basis - see the huge section on L at the bottom. =item sqltrue, sqlfalse @@ -1575,7 +1660,7 @@ Which will change the above C to: WHERE event_date >= '2/13/99' AND event_date <= '4/24/03' The logic can also be changed locally by inserting -a modifier in front of an arrayref : +a modifier in front of an arrayref: @where = (-and => [event_date => {'>=', '2/13/99'}, event_date => {'<=', '4/24/03'} ]); @@ -1648,7 +1733,7 @@ are or are not included. You could wrap that above C loop in a simple sub called C or something and reuse it repeatedly. You still get a layer of abstraction over manual SQL specification. -Note that if you set L to C, the C<\[$sql, @bind]> +Note that if you set L to C, the C<\[ $sql, @bind ]> construct (see L) will expect the bind values in this format. @@ -1670,6 +1755,21 @@ that generates SQL like this: Quoting is useful if you have tables or columns names that are reserved words in your database's SQL dialect. +=item escape_char + +This is the character that will be used to escape Ls appearing +in an identifier before it has been quoted. + +The parameter default in case of a single L character is the quote +character itself. + +When opening-closing-style quoting is used (L is an arrayref) +this parameter defaults to the B L. Occurrences +of the B L within the identifier are currently left +untouched. The default for opening-closing-style quotes may change in future +versions, thus you are B to specify the escape character +explicitly. + =item name_sep This is the character that separates a table and column name. It is @@ -1678,6 +1778,20 @@ so that tables and column names can be individually quoted like this: SELECT `table`.`one_field` FROM `table` WHERE `table`.`other_field` = 1 +=item injection_guard + +A regular expression C that is applied to any C<-function> and unquoted +column name specified in a query structure. This is a safety mechanism to avoid +injection attacks when mishandling user input e.g.: + + my %condition_as_column_value_pairs = get_values_from_user(); + $sqla->select( ... , \%condition_as_column_value_pairs ); + +If the expression matches an exception is thrown. Note that literal SQL +supplied via C<\'...'> or C<\['...']> is B checked in any way. + +Defaults to checking for C<;> and the C keyword (TransactSQL) + =item array_datatypes When this option is true, arrayrefs in INSERT or UPDATE are @@ -1732,7 +1846,7 @@ be supported by all database engines. =back -=head2 update($table, \%fieldvals, \%where) +=head2 update($table, \%fieldvals, \%where, \%options) This takes a table, hashref of field/value pairs, and an optional hashref L. It returns an SQL UPDATE function and a list @@ -1741,10 +1855,23 @@ See the sections on L and L for information on how to insert with those data types. +The optional C<\%options> hash reference may contain additional +options to generate the update SQL. Currently supported options +are: + +=over 4 + +=item returning + +See the C option to +L. + +=back + =head2 select($source, $fields, $where, $order) This returns a SQL SELECT statement and associated list of bind values, as -specified by the arguments : +specified by the arguments: =over @@ -1754,8 +1881,7 @@ Specification of the 'FROM' part of the statement. The argument can be either a plain scalar (interpreted as a table name, will be quoted), or an arrayref (interpreted as a list of table names, joined by commas, quoted), or a scalarref -(literal table name, not quoted), or a ref to an arrayref -(list of literal table names, joined by commas, not quoted). +(literal SQL, not quoted). =item $fields @@ -1764,8 +1890,8 @@ the source. The argument can be either an arrayref (interpreted as a list of field names, will be joined by commas and quoted), or a plain scalar (literal SQL, not quoted). -Please observe that this API is not as flexible as for -the first argument C<$table>, for backwards compatibility reasons. +Please observe that this API is not as flexible as that of +the first argument C<$source>, for backwards compatibility reasons. =item $where @@ -1784,12 +1910,25 @@ for details. =back -=head2 delete($table, \%where) +=head2 delete($table, \%where, \%options) This takes a table name and optional hashref L. It returns an SQL DELETE statement and list of bind values. -=head2 where(\%where, \@order) +The optional C<\%options> hash reference may contain additional +options to generate the delete SQL. Currently supported options +are: + +=over 4 + +=item returning + +See the C option to +L. + +=back + +=head2 where(\%where, $order) This is used to generate just the WHERE clause. For example, if you have an arbitrary data structure and know what the @@ -1838,8 +1977,85 @@ Might give you: You get the idea. Strings get their case twiddled, but everything else remains verbatim. +=head1 EXPORTABLE FUNCTIONS + +=head2 is_plain_value + +Determines if the supplied argument is a plain value as understood by this +module: + +=over + +=item * The value is C + +=item * The value is a non-reference + +=item * The value is an object with stringification overloading + +=item * The value is of the form C<< { -value => $anything } >> + +=back + +On failure returns C, on success returns a B reference +to the original supplied argument. + +=over + +=item * Note + +The stringification overloading detection is rather advanced: it takes +into consideration not only the presence of a C<""> overload, but if that +fails also checks for enabled +L|overload/Magic Autogeneration>, based +on either C<0+> or C. + +Unfortunately testing in the field indicates that this +detection B<< may tickle a latent bug in perl versions before 5.018 >>, +but only when very large numbers of stringifying objects are involved. +At the time of writing ( Sep 2014 ) there is no clear explanation of +the direct cause, nor is there a manageably small test case that reliably +reproduces the problem. + +If you encounter any of the following exceptions in B - this module may be to blame: + + Operation "ne": no method found, + left argument in overloaded package , + right argument in overloaded package + +or perhaps even + + Stub found while resolving method "???" overloading """" in package + +If you fall victim to the above - please attempt to reduce the problem +to something that could be sent to the L +(either publicly or privately). As a workaround in the meantime you can +set C<$ENV{SQLA_ISVALUE_IGNORE_AUTOGENERATED_STRINGIFICATION}> to a true +value, which will most likely eliminate your problem (at the expense of +not being able to properly detect exotic forms of stringification). + +This notice and environment variable will be removed in a future version, +as soon as the underlying problem is found and a reliable workaround is +devised. + +=back + +=head2 is_literal_value + +Determines if the supplied argument is a literal value as understood by this +module: + +=over + +=item * C<\$sql_string> +=item * C<\[ $sql_string, @bind_values ]> + +=back +On failure returns C, on success returns an B reference +containing the unpacked version of the supplied literal SQL and bind values. =head1 WHERE CLAUSES @@ -1889,6 +2105,27 @@ This simple code will create the following: A field associated to an empty arrayref will be considered a logical false and will generate 0=1. +=head2 Tests for NULL values + +If the value part is C then this is converted to SQL + + my %where = ( + user => 'nwiger', + status => undef, + ); + +becomes: + + $stmt = "WHERE user = ? AND status IS NULL"; + @bind = ('nwiger'); + +To test if a column IS NOT NULL: + + my %where = ( + user => 'nwiger', + status => { '!=', undef }, + ); + =head2 Specific comparison operators If you want to specify a different type of operator for your comparison, @@ -1935,16 +2172,16 @@ To get an OR instead, you can combine it with the arrayref idea: my %where => ( user => 'nwiger', - priority => [ {'=', 2}, {'!=', 1} ] + priority => [ { '=', 2 }, { '>', 5 } ] ); Which would generate: - $stmt = "WHERE user = ? AND priority = ? OR priority != ?"; - @bind = ('nwiger', '2', '1'); + $stmt = "WHERE ( priority = ? OR priority > ? ) AND user = ?"; + @bind = ('2', '5', 'nwiger'); If you want to include literal SQL (with or without bind values), just use a -scalar reference or array reference as the value: +scalar reference or reference to an arrayref as the value: my %where = ( date_entered => { '>' => \["to_date(?, 'MM/DD/YYYY')", "11/26/2008"] }, @@ -1953,7 +2190,7 @@ scalar reference or array reference as the value: Which would generate: - $stmt = "WHERE date_entered > "to_date(?, 'MM/DD/YYYY') AND date_expires < now()"; + $stmt = "WHERE date_entered > to_date(?, 'MM/DD/YYYY') AND date_expires < now()"; @bind = ('11/26/2008'); @@ -1967,7 +2204,7 @@ this (notice the C): Because, in Perl you I do this: - priority => { '!=', 2, '!=', 1 } + priority => { '!=' => 2, '!=' => 1 } As the second C key will obliterate the first. The solution is to use the special C<-modifier> form inside an arrayref: @@ -1998,7 +2235,7 @@ Here is a quick list of equivalencies, since there is some overlap: -=head2 Special operators : IN, BETWEEN, etc. +=head2 Special operators: IN, BETWEEN, etc. You can also use the hashref format to compare a list of fields using the C comparison operator, by specifying the list as an arrayref: @@ -2017,8 +2254,8 @@ The reverse operator C<-not_in> generates SQL C and is used in the same way. If the argument to C<-in> is an empty array, 'sqlfalse' is generated -(by default : C<1=0>). Similarly, C<< -not_in => [] >> generates -'sqltrue' (by default : C<1=1>). +(by default: C<1=0>). Similarly, C<< -not_in => [] >> generates +'sqltrue' (by default: C<1=1>). In addition to the array you can supply a chunk of literal sql or literal sql with bind: @@ -2039,7 +2276,8 @@ would generate: )"; @bind = ('2000'); - +Finally, if the argument to C<-in> is not a reference, it will be +treated as a single-element array. Another pair of operators is C<-between> and C<-not_between>, used with an arrayref of two values: @@ -2080,14 +2318,14 @@ Would give you: These are the two builtin "special operators"; but the -list can be expanded : see section L below. +list can be expanded: see section L below. =head2 Unary operators: bool If you wish to test against boolean columns or functions within your database you can use the C<-bool> and C<-not_bool> operators. For example to test the column C being true and the column - being false you would use:- +C being false you would use:- my %where = ( -bool => 'is_user', @@ -2104,15 +2342,19 @@ then you should use the and/or operators:- my %where = ( -and => [ -bool => 'one', - -bool => 'two', - -bool => 'three', - -not_bool => 'four', + -not_bool => { two=> { -rlike => 'bar' } }, + -not_bool => { three => [ { '=', 2 }, { '>', 5 } ] }, ], ); Would give you: - WHERE one AND two AND three AND NOT four + WHERE + one + AND + (NOT two RLIKE ?) + AND + (NOT ( three = ? OR three > ? )) =head2 Nested conditions, -and/-or prefixes @@ -2139,48 +2381,32 @@ This data structure would create the following: @bind = ('nwiger', 'pending', 'dispatched', 'robot', 'unassigned'); -There is also a special C<-nest> -operator which adds an additional set of parens, to create a subquery. -For example, to get something like this: - - $stmt = "WHERE user = ? AND ( workhrs > ? OR geo = ? )"; - @bind = ('nwiger', '20', 'ASIA'); - -You would do: - - my %where = ( - user => 'nwiger', - -nest => [ workhrs => {'>', 20}, geo => 'ASIA' ], - ); - - -Finally, clauses in hashrefs or arrayrefs can be -prefixed with an C<-and> or C<-or> to change the logic -inside : +Clauses in hashrefs or arrayrefs can be prefixed with an C<-and> or C<-or> +to change the logic inside: my @where = ( -and => [ user => 'nwiger', - -nest => [ - -and => [workhrs => {'>', 20}, geo => 'ASIA' ], - -and => [workhrs => {'<', 50}, geo => 'EURO' ] + [ + -and => [ workhrs => {'>', 20}, geo => 'ASIA' ], + -or => { workhrs => {'<', 50}, geo => 'EURO' }, ], ], ); That would yield: - WHERE ( user = ? AND - ( ( workhrs > ? AND geo = ? ) - OR ( workhrs < ? AND geo = ? ) ) ) + $stmt = "WHERE ( user = ? + AND ( ( workhrs > ? AND geo = ? ) + OR ( workhrs < ? OR geo = ? ) ) )"; + @bind = ('nwiger', '20', 'ASIA', '50', 'EURO'); - -=head2 Algebraic inconsistency, for historical reasons +=head3 Algebraic inconsistency, for historical reasons C: when connecting several conditions, the C<-and->|C<-or> operator goes C of the nested structure; whereas when connecting several constraints on one column, the C<-and> operator goes -C the arrayref. Here is an example combining both features : +C the arrayref. Here is an example combining both features: my @where = ( -and => [a => 1, b => 2], @@ -2195,63 +2421,104 @@ yielding OR ( e LIKE ? AND e LIKE ? ) ) ) This difference in syntax is unfortunate but must be preserved for -historical reasons. So be careful : the two examples below would +historical reasons. So be careful: the two examples below would seem algebraically equivalent, but they are not - {col => [-and => {-like => 'foo%'}, {-like => '%bar'}]} - # yields : WHERE ( ( col LIKE ? AND col LIKE ? ) ) + { col => [ -and => + { -like => 'foo%' }, + { -like => '%bar' }, + ] } + # yields: WHERE ( ( col LIKE ? AND col LIKE ? ) ) + + [ -and => + { col => { -like => 'foo%' } }, + { col => { -like => '%bar' } }, + ] + # yields: WHERE ( ( col LIKE ? OR col LIKE ? ) ) - [-and => {col => {-like => 'foo%'}, {col => {-like => '%bar'}}]] - # yields : WHERE ( ( col LIKE ? OR col LIKE ? ) ) +=head2 Literal SQL and value type operators -=head2 Literal SQL +The basic premise of SQL::Abstract is that in WHERE specifications the "left +side" is a column name and the "right side" is a value (normally rendered as +a placeholder). This holds true for both hashrefs and arrayref pairs as you +see in the L examples above. Sometimes it is necessary to +alter this behavior. There are several ways of doing so. -Finally, sometimes only literal SQL will do. If you want to include -literal SQL verbatim, you can specify it as a scalar reference, namely: +=head3 -ident + +This is a virtual operator that signals the string to its right side is an +identifier (a column name) and not a value. For example to compare two +columns you would write: - my $inn = 'is Not Null'; my %where = ( priority => { '<', 2 }, - requestor => \$inn + requestor => { -ident => 'submitter' }, ); -This would create: +which creates: - $stmt = "WHERE priority < ? AND requestor is Not Null"; + $stmt = "WHERE priority < ? AND requestor = submitter"; @bind = ('2'); -Note that in this example, you only get one bind parameter back, since -the verbatim SQL is passed as part of the statement. +If you are maintaining legacy code you may see a different construct as +described in L, please use C<-ident> in new +code. -Of course, just to prove a point, the above can also be accomplished -with this: +=head3 -value + +This is a virtual operator that signals that the construct to its right side +is a value to be passed to DBI. This is for example necessary when you want +to write a where clause against an array (for RDBMS that support such +datatypes). For example: my %where = ( - priority => { '<', 2 }, - requestor => { '!=', undef }, + array => { -value => [1, 2, 3] } ); +will result in: -TMTOWTDI + $stmt = 'WHERE array = ?'; + @bind = ([1, 2, 3]); -Conditions on boolean columns can be expressed in the same way, passing -a reference to an empty string, however using liternal SQL in this way -is deprecated - the preferred method is to use the boolean operators - -see L : +Note that if you were to simply say: my %where = ( - priority => { '<', 2 }, - is_ready => \""; + array => [1, 2, 3] ); -which yields +the result would probably not be what you wanted: - $stmt = "WHERE priority < ? AND is_ready"; - @bind = ('2'); + $stmt = 'WHERE array = ? OR array = ? OR array = ?'; + @bind = (1, 2, 3); + +=head3 Literal SQL + +Finally, sometimes only literal SQL will do. To include a random snippet +of SQL verbatim, you specify it as a scalar reference. Consider this only +as a last resort. Usually there is a better way. For example: + + my %where = ( + priority => { '<', 2 }, + requestor => { -in => \'(SELECT name FROM hitmen)' }, + ); + +Would create: + + $stmt = "WHERE priority < ? AND requestor IN (SELECT name FROM hitmen)" + @bind = (2); + +Note that in this example, you only get one bind parameter back, since +the verbatim SQL is passed as part of the statement. +=head4 CAVEAT -=head2 Literal SQL with placeholders and bind values (subqueries) + Never use untrusted input as a literal SQL argument - this is a massive + security risk (there is no way to check literal snippets for SQL + injections and other nastyness). If you need to deal with untrusted input + use literal SQL with placeholders as described next. + +=head3 Literal SQL with placeholders and bind values (subqueries) If the literal SQL to be inserted has placeholders and bind values, use a reference to an arrayref (yes this is a double reference -- @@ -2259,7 +2526,7 @@ not so common, but perfectly legal Perl). For example, to find a date in Postgres you can use something like this: my %where = ( - date_column => \[q/= date '2008-09-30' - ?::integer/, 10/] + date_column => \[ "= date '2008-09-30' - ?::integer", 10 ] ) This would create: @@ -2268,19 +2535,20 @@ This would create: @bind = ('10'); Note that you must pass the bind values in the same format as they are returned -by L. That means that if you set L to C, you must -provide the bind values in the C<< [ column_meta => value ] >> format, where -C is an opaque scalar value; most commonly the column name, but -you can use any scalar value (including references and blessed references), -L will simply pass it through intact. So if C is set -to C the above example will look like: +by L. This means that if you set L +to C, you must provide the bind values in the +C<< [ column_meta => value ] >> format, where C is an opaque +scalar value; most commonly the column name, but you can use any scalar value +(including references and blessed references), L will simply +pass it through intact. So if C is set to C the above +example will look like: my %where = ( - date_column => \[q/= date '2008-09-30' - ?::integer/, [ dummy => 10 ]/] + date_column => \[ "= date '2008-09-30' - ?::integer", [ {} => 10 ] ] ) Literal SQL is especially useful for nesting parenthesized clauses in the -main SQL query. Here is a first example : +main SQL query. Here is a first example: my ($sub_stmt, @sub_bind) = ("SELECT c1 FROM t1 WHERE c2 < ? AND c3 LIKE ?", 100, "foo%"); @@ -2289,7 +2557,7 @@ main SQL query. Here is a first example : bar => \["IN ($sub_stmt)" => @sub_bind], ); -This yields : +This yields: $stmt = "WHERE (foo = ? AND bar IN (SELECT c1 FROM t1 WHERE c2 < ? AND c3 LIKE ?))"; @@ -2310,14 +2578,14 @@ to C : In the examples above, the subquery was used as an operator on a column; but the same principle also applies for a clause within the main C<%where> -hash, like an EXISTS subquery : +hash, like an EXISTS subquery: my ($sub_stmt, @sub_bind) = $sql->select("t1", "*", {c1 => 1, c2 => \"> t0.c0"}); - my %where = ( + my %where = ( -and => [ foo => 1234, - -nest => \["EXISTS ($sub_stmt)" => @sub_bind], - ); + \["EXISTS ($sub_stmt)" => @sub_bind], + ]); which yields @@ -2327,21 +2595,12 @@ which yields Observe that the condition on C in the subquery refers to -column C of the main query : this is I a bind +column C of the main query: this is I a bind value, so we have to express it through a scalar ref. Writing C<< c2 => {">" => "t0.c0"} >> would have generated C<< c2 > ? >> with bind value C<"t0.c0"> ... not exactly what we wanted here. -Another use of the subquery technique is when some SQL clauses need -parentheses, as it often occurs with some proprietary SQL extensions -like for example fulltext expressions, geospatial expressions, -NATIVE clauses, etc. Here is an example of a fulltext query in MySQL : - - my %where = ( - -nest => \["MATCH (col1, col2) AGAINST (?)" => qw/apples/] - ); - Finally, here is an example where a subquery is used for expressing unary negation: @@ -2350,7 +2609,7 @@ for expressing unary negation: $sub_stmt =~ s/^ where //i; # don't want "WHERE" in the subclause my %where = ( lname => {like => '%son%'}, - -nest => \["NOT ($sub_stmt)" => @sub_bind], + \["NOT ($sub_stmt)" => @sub_bind], ); This yields @@ -2358,7 +2617,47 @@ This yields $stmt = "lname LIKE ? AND NOT ( age < ? OR age > ? )" @bind = ('%son%', 10, 20) +=head3 Deprecated usage of Literal SQL + +Below are some examples of archaic use of literal SQL. It is shown only as +reference for those who deal with legacy code. Each example has a much +better, cleaner and safer alternative that users should opt for in new code. + +=over + +=item * + + my %where = ( requestor => \'IS NOT NULL' ) + $stmt = "WHERE requestor IS NOT NULL" + +This used to be the way of generating NULL comparisons, before the handling +of C got formalized. For new code please use the superior syntax as +described in L. + +=item * + + my %where = ( requestor => \'= submitter' ) + + $stmt = "WHERE requestor = submitter" + +This used to be the only way to compare columns. Use the superior L +method for all new code. For example an identifier declared in such a way +will be properly quoted if L is properly set, while the legacy +form will remain as supplied. + +=item * + + my %where = ( is_ready => \"", completed => { '>', '2012-12-21' } ) + + $stmt = "WHERE completed > ? AND is_ready" + @bind = ('2012-12-21') + +Using an empty string literal used to be the only way to express a boolean. +For all new code please use the much more readable +L<-bool|/Unary operators: bool> operator. + +=back =head2 Conclusion @@ -2375,38 +2674,41 @@ knew everything ahead of time, you wouldn't have to worry about dynamically-generating SQL and could just hardwire it into your script. - - - =head1 ORDER BY CLAUSES Some functions take an order by clause. This can either be a scalar (just a -column name,) a hash of C<< { -desc => 'col' } >> or C<< { -asc => 'col' } >>, -or an array of either of the two previous forms. Examples: - - Given | Will Generate - ---------------------------------------------------------- - | - \'colA DESC' | ORDER BY colA DESC - | - 'colA' | ORDER BY colA - | - [qw/colA colB/] | ORDER BY colA, colB - | - {-asc => 'colA'} | ORDER BY colA ASC - | - {-desc => 'colB'} | ORDER BY colB DESC - | - ['colA', {-asc => 'colB'}] | ORDER BY colA, colB ASC - | - { -asc => [qw/colA colB/] } | ORDER BY colA ASC, colB ASC - | - [ | - { -asc => 'colA' }, | ORDER BY colA ASC, colB DESC, - { -desc => [qw/colB/], | colC ASC, colD ASC - { -asc => [qw/colC colD/],| - ] | - =========================================================== +column name), a hashref of C<< { -desc => 'col' } >> or C<< { -asc => 'col' } +>>, a scalarref, an arrayref-ref, or an arrayref of any of the previous +forms. Examples: + + Given | Will Generate + --------------------------------------------------------------- + | + 'colA' | ORDER BY colA + | + [qw/colA colB/] | ORDER BY colA, colB + | + {-asc => 'colA'} | ORDER BY colA ASC + | + {-desc => 'colB'} | ORDER BY colB DESC + | + ['colA', {-asc => 'colB'}] | ORDER BY colA, colB ASC + | + { -asc => [qw/colA colB/] } | ORDER BY colA ASC, colB ASC + | + \'colA DESC' | ORDER BY colA DESC + | + \[ 'FUNC(colA, ?)', $x ] | ORDER BY FUNC(colA, ?) + | /* ...with $x bound to ? */ + | + [ | ORDER BY + { -asc => 'colA' }, | colA ASC, + { -desc => [qw/colB/] }, | colB DESC, + { -asc => [qw/colC colD/] },| colC ASC, colD ASC, + \'colE DESC', | colE DESC, + \[ 'FUNC(colF, ?)', $x ], | FUNC(colF, ?) + ] | /* ...with $x bound to ? */ + =============================================================== @@ -2428,7 +2730,7 @@ or an array of either of the two previous forms. Examples: A "special operator" is a SQL syntactic clause that can be applied to a field, instead of a usual binary operator. -For example : +For example: WHERE field IN (?, ?, ?) WHERE field BETWEEN ? AND ? @@ -2455,14 +2757,14 @@ Either a coderef or a plain scalar method name. In both cases the expected return is C<< ($sql, @bind) >>. When supplied with a method name, it is simply called on the -L object as: +L object as: - $self->$method_name ($field, $op, $arg) + $self->$method_name($field, $op, $arg) Where: - $op is the part that matched the handler regex $field is the LHS of the operator + $op is the part that matched the handler regex $arg is the RHS When supplied with a coderef, it is called as: @@ -2531,9 +2833,9 @@ Either a coderef or a plain scalar method name. In both cases the expected return is C<< $sql >>. When supplied with a method name, it is simply called on the -L object as: +L object as: - $self->$method_name ($op, $arg) + $self->$method_name($op, $arg) Where: @@ -2573,6 +2875,12 @@ the same structure, you only have to generate the SQL the first time around. On subsequent queries, simply use the C function provided by this module to return your values in the correct order. +However this depends on the values having the same type - if, for +example, the values of a where clause may either have values +(resulting in sql of the form C with a single bind +value), or alternatively the values might be C (resulting in +sql of the form C with no bind value) then the +caching technique suggested will not work. =head1 FORMBUILDER @@ -2582,6 +2890,9 @@ can be as simple as the following: #!/usr/bin/perl + use warnings; + use strict; + use CGI::FormBuilder; use SQL::Abstract; @@ -2603,6 +2914,29 @@ a fast interface to returning and formatting data. I frequently use these three modules together to write complex database query apps in under 50 lines. +=head1 HOW TO CONTRIBUTE + +Contributions are always welcome, in all usable forms (we especially +welcome documentation improvements). The delivery methods include git- +or unified-diff formatted patches, GitHub pull requests, or plain bug +reports either via RT or the Mailing list. Contributors are generally +granted full access to the official repository after their first several +patches pass successful review. + +This project is maintained in a git repository. The code and related tools are +accessible at the following locations: + +=over + +=item * Official repo: L + +=item * Official gitweb: L + +=item * GitHub mirror: L + +=item * Authorized committers: L + +=back =head1 CHANGES @@ -2615,13 +2949,13 @@ to clarify the semantics. Hence, client code that was relying on some dark areas of C v1.* B in v1.50. -The main changes are : +The main changes are: =over =item * -support for literal SQL through the C<< \ [$sql, bind] >> syntax. +support for literal SQL through the C<< \ [ $sql, @bind ] >> syntax. =item * @@ -2637,7 +2971,7 @@ optional support for L =item * -defensive programming : check arguments +defensive programming: check arguments =item * @@ -2664,8 +2998,6 @@ dropped the C<_modlogic> function =back - - =head1 ACKNOWLEDGEMENTS There are a number of individuals that have really helped out with @@ -2681,7 +3013,7 @@ so I have no idea who they are! But the people I do know are: Mike Fragassi (enhancements to "BETWEEN" and "LIKE") Dan Kubb (support for "quote_char" and "name_sep") Guillermo Roditi (patch to cleanup "IN" and "BETWEEN", fix and tests for _order_by) - Laurent Dami (internal refactoring, multiple -nest, extensible list of special operators, literal SQL) + Laurent Dami (internal refactoring, extensible list of special operators, literal SQL) Norbert Buchmuller (support for literal SQL in hashpair, misc. fixes & tests) Peter Rabbitson (rewrite of SQLA::Test, misc. fixes & tests) Oliver Charles (support for "RETURNING" after "INSERT") @@ -2710,4 +3042,3 @@ terms as perl itself (either the GNU General Public License or the Artistic License) =cut -