X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract.pm;h=3940881780655186d44386811ff3e5205b8fa939;hb=3cb8f017c8746fa9c688540f3e8bda09363bda09;hp=05c2271f482b0b47fba8778a3c2e9c34a2bad1d7;hpb=54871ee983acf1261c22289912a642db6fc18707;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract.pm b/lib/SQL/Abstract.pm index 05c2271..3940881 100644 --- a/lib/SQL/Abstract.pm +++ b/lib/SQL/Abstract.pm @@ -5,28 +5,41 @@ package SQL::Abstract; # see doc at end of file # the test / diffusion / acceptance phase; those are marked with flag # 'LDNOTE' (note by laurent.dami AT free.fr) -use Carp; use strict; use warnings; -use List::Util qw/first/; -use Scalar::Util qw/blessed/; +use Carp (); +use List::Util (); +use Scalar::Util (); #====================================================================== # GLOBALS #====================================================================== -our $VERSION = '1.55'; +our $VERSION = '1.74'; # This would confuse some packagers -#$VERSION = eval $VERSION; # numify for warning-free dev releases +$VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases our $AUTOLOAD; # special operators (-in, -between). May be extended/overridden by user. # See section WHERE: BUILTIN SPECIAL OPERATORS below for implementation my @BUILTIN_SPECIAL_OPS = ( - {regex => qr/^(not )?between$/i, handler => '_where_field_BETWEEN'}, - {regex => qr/^(not )?in$/i, handler => '_where_field_IN'}, + {regex => qr/^ (?: not \s )? between $/ix, handler => '_where_field_BETWEEN'}, + {regex => qr/^ (?: not \s )? in $/ix, handler => '_where_field_IN'}, + {regex => qr/^ ident $/ix, handler => '_where_op_IDENT'}, + {regex => qr/^ value $/ix, handler => '_where_op_VALUE'}, +); + +# unaryish operators - key maps to handler +my @BUILTIN_UNARY_OPS = ( + # the digits are backcompat stuff + { regex => qr/^ and (?: [_\s]? \d+ )? $/xi, handler => '_where_op_ANDOR' }, + { regex => qr/^ or (?: [_\s]? \d+ )? $/xi, handler => '_where_op_ANDOR' }, + { regex => qr/^ nest (?: [_\s]? \d+ )? $/xi, handler => '_where_op_NEST' }, + { regex => qr/^ (?: not \s )? bool $/xi, handler => '_where_op_BOOL' }, + { regex => qr/^ ident $/xi, handler => '_where_op_IDENT' }, + { regex => qr/^ value $/ix, handler => '_where_op_VALUE' }, ); #====================================================================== @@ -41,12 +54,12 @@ sub _debug { sub belch (@) { my($func) = (caller(1))[3]; - carp "[$func] Warning: ", @_; + Carp::carp "[$func] Warning: ", @_; } sub puke (@) { my($func) = (caller(1))[3]; - croak "[$func] Fatal: ", @_; + Carp::croak "[$func] Fatal: ", @_; } @@ -75,37 +88,86 @@ sub new { # try to recognize which are the 'equality' and 'unequality' ops # (temporary quickfix, should go through a more seasoned API) - $opt{equality_op} = qr/^(\Q$opt{cmp}\E|is|(is\s+)?like)$/i; - $opt{inequality_op} = qr/^(!=|<>|(is\s+)?not(\s+like)?)$/i; + $opt{equality_op} = qr/^(\Q$opt{cmp}\E|is|(is\s+)?like)$/i; + $opt{inequality_op} = qr/^(!=|<>|(is\s+)?not(\s+like)?)$/i; # SQL booleans $opt{sqltrue} ||= '1=1'; $opt{sqlfalse} ||= '0=1'; - # special operators + # special operators $opt{special_ops} ||= []; + # regexes are applied in order, thus push after user-defines push @{$opt{special_ops}}, @BUILTIN_SPECIAL_OPS; + # unary operators + $opt{unary_ops} ||= []; + push @{$opt{unary_ops}}, @BUILTIN_UNARY_OPS; + + # rudimentary saniy-check for user supplied bits treated as functions/operators + # If a purported function matches this regular expression, an exception is thrown. + # Literal SQL is *NOT* subject to this check, only functions (and column names + # when quoting is not in effect) + + # FIXME + # need to guard against ()'s in column names too, but this will break tons of + # hacks... ideas anyone? + $opt{injection_guard} ||= qr/ + \; + | + ^ \s* go \s + /xmi; + return bless \%opt, $class; } +sub _assert_pass_injection_guard { + if ($_[1] =~ $_[0]->{injection_guard}) { + my $class = ref $_[0]; + puke "Possible SQL injection attempt '$_[1]'. If this is indeed a part of the " + . "desired SQL use literal SQL ( \'...' or \[ '...' ] ) or supply your own " + . "{injection_guard} attribute to ${class}->new()" + } +} + #====================================================================== # INSERT methods #====================================================================== sub insert { - my $self = shift; - my $table = $self->_table(shift); - my $data = shift || return; + my $self = shift; + my $table = $self->_table(shift); + my $data = shift || return; + my $options = shift; my $method = $self->_METHOD_FOR_refkind("_insert", $data); - my ($sql, @bind) = $self->$method($data); + my ($sql, @bind) = $self->$method($data); $sql = join " ", $self->_sqlcase('insert into'), $table, $sql; + + if ($options->{returning}) { + my ($s, @b) = $self->_insert_returning ($options); + $sql .= $s; + push @bind, @b; + } + return wantarray ? ($sql, @bind) : $sql; } +sub _insert_returning { + my ($self, $options) = @_; + + my $f = $options->{returning}; + + my $fieldlist = $self->_SWITCH_refkind($f, { + ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$f;}, + SCALAR => sub {$self->_quote($f)}, + SCALARREF => sub {$$f}, + }); + return $self->_sqlcase(' returning ') . $fieldlist; +} + sub _insert_HASHREF { # explicit list of fields and then values my ($self, $data) = @_; @@ -161,7 +223,7 @@ sub _insert_values { $self->_SWITCH_refkind($v, { - ARRAYREF => sub { + ARRAYREF => sub { if ($self->{array_datatypes}) { # if array datatype are activated push @values, '?'; push @all_bind, $self->_bindtype($column, $v); @@ -181,7 +243,7 @@ sub _insert_values { push @all_bind, @bind; }, - # THINK : anything useful to do with a HASHREF ? + # THINK : anything useful to do with a HASHREF ? HASHREF => sub { # (nothing, but old SQLA passed it through) #TODO in SQLA >= 2.0 it will die instead belch "HASH ref as bind value in insert is not supported"; @@ -230,7 +292,7 @@ sub update { my $label = $self->_quote($k); $self->_SWITCH_refkind($v, { - ARRAYREF => sub { + ARRAYREF => sub { if ($self->{array_datatypes}) { # array datatype push @set, "$label = ?"; push @all_bind, $self->_bindtype($k, $v); @@ -250,7 +312,19 @@ sub update { }, SCALARREF => sub { # literal SQL without bind push @set, "$label = $$v"; - }, + }, + HASHREF => sub { + my ($op, $arg, @rest) = %$v; + + puke 'Operator calls in update must be in the form { -op => $arg }' + if (@rest or not $op =~ /^\-(.+)/); + + local $self->{_nested_func_lhs} = $k; + my ($sql, @bind) = $self->_where_unary_op ($1, $arg); + + push @set, "$label = $sql"; + push @all_bind, @bind; + }, SCALAR_or_UNDEF => sub { push @set, "$label = ?"; push @all_bind, $self->_bindtype($k, $v); @@ -290,11 +364,11 @@ sub select { my $f = (ref $fields eq 'ARRAY') ? join ', ', map { $self->_quote($_) } @$fields : $fields; - my $sql = join(' ', $self->_sqlcase('select'), $f, + my $sql = join(' ', $self->_sqlcase('select'), $f, $self->_sqlcase('from'), $table) . $where_sql; - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } #====================================================================== @@ -311,7 +385,7 @@ sub delete { my($where_sql, @bind) = $self->where($where); my $sql = $self->_sqlcase('delete from') . " $table" . $where_sql; - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } @@ -334,7 +408,7 @@ sub where { $sql .= $self->_order_by($order); } - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } @@ -344,12 +418,11 @@ sub _recurse_where { # dispatch on appropriate method according to refkind of $where my $method = $self->_METHOD_FOR_refkind("_where", $where); + my ($sql, @bind) = $self->$method($where, $logic); - my ($sql, @bind) = $self->$method($where, $logic); - - # DBIx::Class directly calls _recurse_where in scalar context, so + # DBIx::Class directly calls _recurse_where in scalar context, so # we must implement it, even if not in the official API - return wantarray ? ($sql, @bind) : $sql; + return wantarray ? ($sql, @bind) : $sql; } @@ -369,7 +442,7 @@ sub _where_ARRAYREF { my (@sql_clauses, @all_bind); # need to use while() so can shift() for pairs - while (my $el = shift @clauses) { + while (my $el = shift @clauses) { # switch according to kind of $el and get corresponding ($sql, @bind) my ($sql, @bind) = $self->_SWITCH_refkind($el, { @@ -377,13 +450,17 @@ sub _where_ARRAYREF { # skip empty elements, otherwise get invalid trailing AND stuff ARRAYREF => sub {$self->_recurse_where($el) if @$el}, - ARRAYREFREF => sub { @{${$el}} if @{${$el}}}, + ARRAYREFREF => sub { + my ($s, @b) = @$$el; + $self->_assert_bindval_matches_bindtype(@b); + ($s, @b); + }, HASHREF => sub {$self->_recurse_where($el, 'and') if %$el}, # LDNOTE : previous SQLA code for hashrefs was creating a dirty # side-effect: the first hashref within an array would change # the global logic to 'AND'. So [ {cond1, cond2}, [cond3, cond4] ] - # was interpreted as "(cond1 AND cond2) OR (cond3 AND cond4)", + # was interpreted as "(cond1 AND cond2) OR (cond3 AND cond4)", # whereas it should be "(cond1 AND cond2) OR (cond3 OR cond4)". SCALARREF => sub { ($$el); }, @@ -409,8 +486,8 @@ sub _where_ARRAYREF { sub _where_ARRAYREFREF { my ($self, $where) = @_; - my ($sql, @bind) = @{${$where}}; - + my ($sql, @bind) = @$$where; + $self->_assert_bindval_matches_bindtype(@bind); return ($sql, @bind); } @@ -422,15 +499,38 @@ sub _where_HASHREF { my ($self, $where) = @_; my (@sql_clauses, @all_bind); - for my $k (sort keys %$where) { + for my $k (sort keys %$where) { my $v = $where->{$k}; - # ($k => $v) is either a special op or a regular hashpair - my ($sql, @bind) = ($k =~ /^-(.+)/) ? $self->_where_op_in_hash($1, $v) - : do { - my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $v); - $self->$method($k, $v); - }; + # ($k => $v) is either a special unary op or a regular hashpair + my ($sql, @bind) = do { + if ($k =~ /^-./) { + # put the operator in canonical form + my $op = $k; + $op = substr $op, 1; # remove initial dash + $op =~ s/^\s+|\s+$//g;# remove leading/trailing space + $op =~ s/\s+/ /g; # compress whitespace + + # so that -not_foo works correctly + $op =~ s/^not_/NOT /i; + + $self->_debug("Unary OP(-$op) within hashref, recursing..."); + my ($s, @b) = $self->_where_unary_op ($op, $v); + + # top level vs nested + # we assume that handled unary ops will take care of their ()s + $s = "($s)" unless ( + List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}} + or + defined($self->{_nested_func_lhs}) && ($self->{_nested_func_lhs} eq $k) + ); + ($s, @b); + } + else { + my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $v); + $self->$method($k, $v); + } + }; push @sql_clauses, $sql; push @all_bind, @bind; @@ -439,63 +539,183 @@ sub _where_HASHREF { return $self->_join_sql_clauses('and', \@sql_clauses, \@all_bind); } +sub _where_unary_op { + my ($self, $op, $rhs) = @_; -sub _where_op_in_hash { - my ($self, $op_str, $v) = @_; + if (my $op_entry = List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}}) { + my $handler = $op_entry->{handler}; - $op_str =~ /^ (AND|OR|NEST) ( \_? \d* ) $/xi - or puke "unknown operator: -$op_str"; - - my $op = uc($1); # uppercase, remove trailing digits - if ($2) { - belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' - . "You probably wanted ...-and => [ $op_str => COND1, $op_str => COND2 ... ]"; + if (not ref $handler) { + if ($op =~ s/ [_\s]? \d+ $//x ) { + belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' + . "You probably wanted ...-and => [ -$op => COND1, -$op => COND2 ... ]"; + } + return $self->$handler ($op, $rhs); + } + elsif (ref $handler eq 'CODE') { + return $handler->($self, $op, $rhs); + } + else { + puke "Illegal handler for operator $op - expecting a method name or a coderef"; + } } - $self->_debug("OP(-$op) within hashref, recursing..."); + $self->_debug("Generic unary OP: $op - recursing as function"); - $self->_SWITCH_refkind($v, { + $self->_assert_pass_injection_guard($op); + + my ($sql, @bind) = $self->_SWITCH_refkind ($rhs, { + SCALAR => sub { + puke "Illegal use of top-level '$op'" + unless $self->{_nested_func_lhs}; + return ( + $self->_convert('?'), + $self->_bindtype($self->{_nested_func_lhs}, $rhs) + ); + }, + FALLBACK => sub { + $self->_recurse_where ($rhs) + }, + }); + + $sql = sprintf ('%s %s', + $self->_sqlcase($op), + $sql, + ); + + return ($sql, @bind); +} + +sub _where_op_ANDOR { + my ($self, $op, $v) = @_; + + $self->_SWITCH_refkind($v, { ARRAYREF => sub { - return $self->_where_ARRAYREF($v, $op eq 'NEST' ? '' : $op); + return $self->_where_ARRAYREF($v, $op); }, HASHREF => sub { - if ($op eq 'OR') { - return $self->_where_ARRAYREF([ map { $_ => $v->{$_} } (sort keys %$v) ], 'OR'); - } - else { # NEST | AND - return $self->_where_HASHREF($v); - } + return ( $op =~ /^or/i ) + ? $self->_where_ARRAYREF( [ map { $_ => $v->{$_} } ( sort keys %$v ) ], $op ) + : $self->_where_HASHREF($v); + }, + + SCALARREF => sub { + puke "-$op => \\\$scalar makes little sense, use " . + ($op =~ /^or/i + ? '[ \$scalar, \%rest_of_conditions ] instead' + : '-and => [ \$scalar, \%rest_of_conditions ] instead' + ); + }, + + ARRAYREFREF => sub { + puke "-$op => \\[...] makes little sense, use " . + ($op =~ /^or/i + ? '[ \[...], \%rest_of_conditions ] instead' + : '-and => [ \[...], \%rest_of_conditions ] instead' + ); }, - SCALARREF => sub { # literal SQL - $op eq 'NEST' - or puke "-$op => \\\$scalar not supported, use -nest => ..."; - return ($$v); + SCALAR => sub { # permissively interpreted as SQL + puke "-$op => \$value makes little sense, use -bool => \$value instead"; }, - ARRAYREFREF => sub { # literal SQL - $op eq 'NEST' - or puke "-$op => \\[..] not supported, use -nest => ..."; - return @{${$v}}; + UNDEF => sub { + puke "-$op => undef not supported"; }, + }); +} + +sub _where_op_NEST { + my ($self, $op, $v) = @_; + + $self->_SWITCH_refkind($v, { SCALAR => sub { # permissively interpreted as SQL - $op eq 'NEST' - or puke "-$op => 'scalar' not supported, use -nest => \\'scalar'"; belch "literal SQL should be -nest => \\'scalar' " . "instead of -nest => 'scalar' "; - return ($v); + return ($v); }, UNDEF => sub { puke "-$op => undef not supported"; }, + + FALLBACK => sub { + $self->_recurse_where ($v); + }, + }); } +sub _where_op_BOOL { + my ($self, $op, $v) = @_; + + my ($s, @b) = $self->_SWITCH_refkind($v, { + SCALAR => sub { # interpreted as SQL column + $self->_convert($self->_quote($v)); + }, + + UNDEF => sub { + puke "-$op => undef not supported"; + }, + + FALLBACK => sub { + $self->_recurse_where ($v); + }, + }); + + $s = "(NOT $s)" if $op =~ /^not/i; + ($s, @b); +} + + +sub _where_op_IDENT { + my $self = shift; + my ($op, $rhs) = splice @_, -2; + if (ref $rhs) { + puke "-$op takes a single scalar argument (a quotable identifier)"; + } + + # in case we are called as a top level special op (no '=') + my $lhs = shift; + + $_ = $self->_convert($self->_quote($_)) for ($lhs, $rhs); + + return $lhs + ? "$lhs = $rhs" + : $rhs + ; +} + +sub _where_op_VALUE { + my $self = shift; + my ($op, $rhs) = splice @_, -2; + + # in case we are called as a top level special op (no '=') + my $lhs = shift; + + my @bind = + $self->_bindtype ( + ($lhs || $self->{_nested_func_lhs}), + $rhs, + ) + ; + + return $lhs + ? ( + $self->_convert($self->_quote($lhs)) . ' = ' . $self->_convert('?'), + @bind + ) + : ( + $self->_convert('?'), + @bind, + ) + ; +} + sub _where_hashpair_ARRAYREF { my ($self, $k, $v) = @_; @@ -519,7 +739,7 @@ sub _where_hashpair_ARRAYREF { my $logic = $op ? substr($op, 1) : ''; return $self->_recurse_where(\@distributed, $logic); - } + } else { # LDNOTE : not sure of this one. What does "distribute over nothing" mean? $self->_debug("empty ARRAY($k) means 0=1"); @@ -531,26 +751,37 @@ sub _where_hashpair_HASHREF { my ($self, $k, $v, $logic) = @_; $logic ||= 'and'; + local $self->{_nested_func_lhs} = $self->{_nested_func_lhs}; + my ($all_sql, @all_bind); - for my $op (sort keys %$v) { - my $val = $v->{$op}; + for my $orig_op (sort keys %$v) { + my $val = $v->{$orig_op}; # put the operator in canonical form - $op =~ s/^-//; # remove initial dash - $op =~ tr/_/ /; # underscores become spaces - $op =~ s/^\s+//; # no initial space - $op =~ s/\s+$//; # no final space - $op =~ s/\s+/ /; # multiple spaces become one + my $op = $orig_op; + + # FIXME - we need to phase out dash-less ops + $op =~ s/^-//; # remove possible initial dash + $op =~ s/^\s+|\s+$//g;# remove leading/trailing space + $op =~ s/\s+/ /g; # compress whitespace + + $self->_assert_pass_injection_guard($op); + + # so that -not_foo works correctly + $op =~ s/^not_/NOT /i; my ($sql, @bind); + # CASE: col-value logic modifiers + if ( $orig_op =~ /^ \- (and|or) $/xi ) { + ($sql, @bind) = $self->_where_hashpair_HASHREF($k, $val, $1); + } # CASE: special operators like -in or -between - my $special_op = first {$op =~ $_->{regex}} @{$self->{special_ops}}; - if ($special_op) { + elsif ( my $special_op = List::Util::first {$op =~ $_->{regex}} @{$self->{special_ops}} ) { my $handler = $special_op->{handler}; if (! $handler) { - puke "No handler supplied for special operator matching $special_op->{regex}"; + puke "No handler supplied for special operator $orig_op"; } elsif (not ref $handler) { ($sql, @bind) = $self->$handler ($k, $op, $val); @@ -559,7 +790,7 @@ sub _where_hashpair_HASHREF { ($sql, @bind) = $handler->($self, $k, $op, $val); } else { - puke "Illegal handler for special operator matching $special_op->{regex} - expecting a method name or a coderef"; + puke "Illegal handler for special operator $orig_op - expecting a method name or a coderef"; } } else { @@ -569,12 +800,6 @@ sub _where_hashpair_HASHREF { ($sql, @bind) = $self->_where_field_op_ARRAYREF($k, $op, $val); }, - SCALARREF => sub { # CASE: col => {op => \$scalar} (literal SQL without bind) - $sql = join ' ', $self->_convert($self->_quote($k)), - $self->_sqlcase($op), - $$val; - }, - ARRAYREFREF => sub { # CASE: col => {op => \[$sql, @bind]} (literal SQL with bind) my ($sub_sql, @sub_bind) = @$$val; $self->_assert_bindval_matches_bindtype(@sub_bind); @@ -584,22 +809,24 @@ sub _where_hashpair_HASHREF { @bind = @sub_bind; }, - HASHREF => sub { - ($sql, @bind) = $self->_where_hashpair_HASHREF($k, $val, $op); - }, - UNDEF => sub { # CASE: col => {op => undef} : sql "IS (NOT)? NULL" my $is = ($op =~ $self->{equality_op}) ? 'is' : ($op =~ $self->{inequality_op}) ? 'is not' : - puke "unexpected operator '$op' with undef operand"; + puke "unexpected operator '$orig_op' with undef operand"; $sql = $self->_quote($k) . $self->_sqlcase(" $is null"); }, - - FALLBACK => sub { # CASE: col => {op => $scalar} - $sql = join ' ', $self->_convert($self->_quote($k)), - $self->_sqlcase($op), - $self->_convert('?'); - @bind = $self->_bindtype($k, $val); + + FALLBACK => sub { # CASE: col => {op/func => $stuff} + + # retain for proper column type bind + $self->{_nested_func_lhs} ||= $k; + + ($sql, @bind) = $self->_where_unary_op ($op, $val); + + $sql = join (' ', + $self->_convert($self->_quote($k)), + $self->{_nested_func_lhs} eq $k ? $sql : "($sql)", # top level vs nested + ); }, }); } @@ -618,11 +845,14 @@ sub _where_field_op_ARRAYREF { my @vals = @$vals; #always work on a copy if(@vals) { - $self->_debug("ARRAY($vals) means multiple elements: [ @vals ]"); + $self->_debug(sprintf '%s means multiple elements: [ %s ]', + $vals, + join (', ', map { defined $_ ? "'$_'" : 'NULL' } @vals ), + ); # see if the first element is an -and/-or op my $logic; - if ($vals[0] =~ /^ - ( AND|OR ) $/ix) { + if (defined $vals[0] && $vals[0] =~ /^ - ( AND|OR ) $/ix) { $logic = uc $1; shift @vals; } @@ -630,18 +860,18 @@ sub _where_field_op_ARRAYREF { # distribute $op over each remaining member of @vals, append logic if exists return $self->_recurse_where([map { {$k => {$op, $_}} } @vals], $logic); - # LDNOTE : had planned to change the distribution logic when - # $op =~ $self->{inequality_op}, because of Morgan laws : + # LDNOTE : had planned to change the distribution logic when + # $op =~ $self->{inequality_op}, because of Morgan laws : # with {field => {'!=' => [22, 33]}}, it would be ridiculous to generate - # WHERE field != 22 OR field != 33 : the user probably means + # WHERE field != 22 OR field != 33 : the user probably means # WHERE field != 22 AND field != 33. # To do this, replace the above to roughly : # my $logic = ($op =~ $self->{inequality_op}) ? 'AND' : 'OR'; # return $self->_recurse_where([map { {$k => {$op, $_}} } @vals], $logic); - } + } else { - # try to DWIM on equality operators + # try to DWIM on equality operators # LDNOTE : not 100% sure this is the correct thing to do ... return ($self->{sqlfalse}) if $op =~ $self->{equality_op}; return ($self->{sqltrue}) if $op =~ $self->{inequality_op}; @@ -663,7 +893,7 @@ sub _where_hashpair_SCALARREF { sub _where_hashpair_ARRAYREFREF { my ($self, $k, $v) = @_; $self->_debug("REF($k) means literal SQL: @${$v}"); - my ($sql, @bind) = @${$v}; + my ($sql, @bind) = @$$v; $self->_assert_bindval_matches_bindtype(@bind); $sql = $self->_quote($k) . " " . $sql; return ($sql, @bind ); @@ -673,8 +903,8 @@ sub _where_hashpair_ARRAYREFREF { sub _where_hashpair_SCALAR { my ($self, $k, $v) = @_; $self->_debug("NOREF($k) means simple key=val: $k $self->{cmp} $v"); - my $sql = join ' ', $self->_convert($self->_quote($k)), - $self->_sqlcase($self->{cmp}), + my $sql = join ' ', $self->_convert($self->_quote($k)), + $self->_sqlcase($self->{cmp}), $self->_convert('?'); my @bind = $self->_bindtype($k, $v); return ( $sql, @bind); @@ -725,38 +955,61 @@ sub _where_UNDEF { sub _where_field_BETWEEN { my ($self, $k, $op, $vals) = @_; - (ref $vals eq 'ARRAY' && @$vals == 2) or - (ref $vals eq 'REF' && (@$$vals == 1 || @$$vals == 2 || @$$vals == 3)) - or puke "special op 'between' requires an arrayref of two values (or a scalarref or arrayrefref for literal SQL)"; - - my ($clause, @bind, $label, $and, $placeholder); + my ($label, $and, $placeholder); $label = $self->_convert($self->_quote($k)); $and = ' ' . $self->_sqlcase('and') . ' '; $placeholder = $self->_convert('?'); $op = $self->_sqlcase($op); - if (ref $vals eq 'REF') { - ($clause, @bind) = @$$vals; - } - else { - my (@all_sql, @all_bind); - - foreach my $val (@$vals) { - my ($sql, @bind) = $self->_SWITCH_refkind($val, { - SCALAR => sub { - return ($placeholder, ($val)); - }, - SCALARREF => sub { - return ($self->_convert($$val), ()); - }, - }); - push @all_sql, $sql; - push @all_bind, @bind; - } + my ($clause, @bind) = $self->_SWITCH_refkind($vals, { + ARRAYREFREF => sub { + my ($s, @b) = @$$vals; + $self->_assert_bindval_matches_bindtype(@b); + ($s, @b); + }, + SCALARREF => sub { + return $$vals; + }, + ARRAYREF => sub { + puke "special op 'between' accepts an arrayref with exactly two values" + if @$vals != 2; + + my (@all_sql, @all_bind); + foreach my $val (@$vals) { + my ($sql, @bind) = $self->_SWITCH_refkind($val, { + SCALAR => sub { + return ($placeholder, $self->_bindtype($k, $val) ); + }, + SCALARREF => sub { + return $$val; + }, + ARRAYREFREF => sub { + my ($sql, @bind) = @$$val; + $self->_assert_bindval_matches_bindtype(@bind); + return ($sql, @bind); + }, + HASHREF => sub { + my ($func, $arg, @rest) = %$val; + puke ("Only simple { -func => arg } functions accepted as sub-arguments to BETWEEN") + if (@rest or $func !~ /^ \- (.+)/x); + local $self->{_nested_func_lhs} = $k; + $self->_where_unary_op ($1 => $arg); + } + }); + push @all_sql, $sql; + push @all_bind, @bind; + } + + return ( + (join $and, @all_sql), + @all_bind + ); + }, + FALLBACK => sub { + puke "special op 'between' accepts an arrayref with two values, or a single literal scalarref/arrayref-ref"; + }, + }); - $clause = (join $and, @all_sql); - @bind = $self->_bindtype($k, @all_bind); - } my $sql = "( $label $op $clause )"; return ($sql, @bind) } @@ -775,11 +1028,44 @@ sub _where_field_IN { my ($sql, @bind) = $self->_SWITCH_refkind($vals, { ARRAYREF => sub { # list of choices if (@$vals) { # nonempty list - my $placeholders = join ", ", (($placeholder) x @$vals); - my $sql = "$label $op ( $placeholders )"; - my @bind = $self->_bindtype($k, @$vals); + my (@all_sql, @all_bind); + + for my $val (@$vals) { + my ($sql, @bind) = $self->_SWITCH_refkind($val, { + SCALAR => sub { + return ($placeholder, $val); + }, + SCALARREF => sub { + return $$val; + }, + ARRAYREFREF => sub { + my ($sql, @bind) = @$$val; + $self->_assert_bindval_matches_bindtype(@bind); + return ($sql, @bind); + }, + HASHREF => sub { + my ($func, $arg, @rest) = %$val; + puke ("Only simple { -func => arg } functions accepted as sub-arguments to IN") + if (@rest or $func !~ /^ \- (.+)/x); + local $self->{_nested_func_lhs} = $k; + $self->_where_unary_op ($1 => $arg); + }, + UNDEF => sub { + return $self->_sqlcase('null'); + }, + }); + push @all_sql, $sql; + push @all_bind, @bind; + } - return ($sql, @bind); + return ( + sprintf ('%s %s ( %s )', + $label, + $op, + join (', ', @all_sql) + ), + $self->_bindtype($k, @all_bind), + ); } else { # empty list : some databases won't understand "IN ()", so DWIM my $sql = ($op =~ /\bnot\b/i) ? $self->{sqltrue} : $self->{sqlfalse}; @@ -787,23 +1073,33 @@ sub _where_field_IN { } }, + SCALARREF => sub { # literal SQL + my $sql = $self->_open_outer_paren ($$vals); + return ("$label $op ( $sql )"); + }, ARRAYREFREF => sub { # literal SQL with bind my ($sql, @bind) = @$$vals; $self->_assert_bindval_matches_bindtype(@bind); + $sql = $self->_open_outer_paren ($sql); return ("$label $op ( $sql )", @bind); }, FALLBACK => sub { - puke "special op 'in' requires an arrayref (or arrayref-ref)"; + puke "special op 'in' requires an arrayref (or scalarref/arrayref-ref)"; }, }); return ($sql, @bind); } - - - +# Some databases (SQLite) treat col IN (1, 2) different from +# col IN ( (1, 2) ). Use this to strip all outer parens while +# adding them back in the corresponding method +sub _open_outer_paren { + my ($self, $sql) = @_; + $sql = $1 while $sql =~ /^ \s* \( (.*) \) \s* $/xs; + return $sql; +} #====================================================================== @@ -813,48 +1109,80 @@ sub _where_field_IN { sub _order_by { my ($self, $arg) = @_; - # construct list of ordering instructions - my @order = $self->_SWITCH_refkind($arg, { + my (@sql, @bind); + for my $c ($self->_order_by_chunks ($arg) ) { + $self->_SWITCH_refkind ($c, { + SCALAR => sub { push @sql, $c }, + ARRAYREF => sub { push @sql, shift @$c; push @bind, @$c }, + }); + } + + my $sql = @sql + ? sprintf ('%s %s', + $self->_sqlcase(' order by'), + join (', ', @sql) + ) + : '' + ; + + return wantarray ? ($sql, @bind) : $sql; +} + +sub _order_by_chunks { + my ($self, $arg) = @_; + + return $self->_SWITCH_refkind($arg, { ARRAYREF => sub { - map {$self->_SWITCH_refkind($_, { - SCALAR => sub {$self->_quote($_)}, - UNDEF => sub {}, - SCALARREF => sub {$$_}, # literal SQL, no quoting - HASHREF => sub {$self->_order_by_hash($_)} - }) } @$arg; + map { $self->_order_by_chunks ($_ ) } @$arg; + }, + + ARRAYREFREF => sub { + my ($s, @b) = @$$arg; + $self->_assert_bindval_matches_bindtype(@b); + [ $s, @b ]; }, SCALAR => sub {$self->_quote($arg)}, - UNDEF => sub {}, + + UNDEF => sub {return () }, + SCALARREF => sub {$$arg}, # literal SQL, no quoting - HASHREF => sub {$self->_order_by_hash($arg)}, - }); + HASHREF => sub { + # get first pair in hash + my ($key, $val, @rest) = %$arg; - # build SQL - my $order = join ', ', @order; - return $order ? $self->_sqlcase(' order by')." $order" : ''; -} + return () unless $key; + if ( @rest or not $key =~ /^-(desc|asc)/i ) { + puke "hash passed to _order_by must have exactly one key (-desc or -asc)"; + } -sub _order_by_hash { - my ($self, $hash) = @_; + my $direction = $1; - # get first pair in hash - my ($key, $val) = each %$hash; + my @ret; + for my $c ($self->_order_by_chunks ($val)) { + my ($sql, @bind); - # check if one pair was found and no other pair in hash - $key && !(each %$hash) - or puke "hash passed to _order_by must have exactly one key (-desc or -asc)"; + $self->_SWITCH_refkind ($c, { + SCALAR => sub { + $sql = $c; + }, + ARRAYREF => sub { + ($sql, @bind) = @$c; + }, + }); - my ($order) = ($key =~ /^-(desc|asc)/i) - or puke "invalid key in _order_by hash : $key"; + $sql = $sql . ' ' . $self->_sqlcase($direction); - $val = ref $val eq 'ARRAY' ? $val : [$val]; - return join ', ', map { $self->_quote($_) . ' ' . $self->_sqlcase($order) } @$val; -} + push @ret, [ $sql, @bind]; + } + return @ret; + }, + }); +} #====================================================================== @@ -868,7 +1196,6 @@ sub _table { ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$from;}, SCALAR => sub {$self->_quote($from)}, SCALARREF => sub {$$from}, - ARRAYREFREF => sub {join ', ', @$from;}, }); } @@ -877,80 +1204,81 @@ sub _table { # UTILITY FUNCTIONS #====================================================================== +# highly optimized, as it's called way too often sub _quote { - my $self = shift; - my $label = shift; - - $label or puke "can't quote an empty label"; - - # left and right quote characters - my ($ql, $qr, @other) = $self->_SWITCH_refkind($self->{quote_char}, { - SCALAR => sub {($self->{quote_char}, $self->{quote_char})}, - ARRAYREF => sub {@{$self->{quote_char}}}, - UNDEF => sub {()}, - }); - not @other - or puke "quote_char must be an arrayref of 2 values"; - - # no quoting if no quoting chars - $ql or return $label; + # my ($self, $label) = @_; - # no quoting for literal SQL - return $$label if ref($label) eq 'SCALAR'; + return '' unless defined $_[1]; + return ${$_[1]} if ref($_[1]) eq 'SCALAR'; - # separate table / column (if applicable) - my $sep = $self->{name_sep} || ''; - my @to_quote = $sep ? split /\Q$sep\E/, $label : ($label); + unless ($_[0]->{quote_char}) { + $_[0]->_assert_pass_injection_guard($_[1]); + return $_[1]; + } - # do the quoting, except for "*" or for `table`.* - my @quoted = map { $_ eq '*' ? $_: $ql.$_.$qr} @to_quote; + my $qref = ref $_[0]->{quote_char}; + my ($l, $r); + if (!$qref) { + ($l, $r) = ( $_[0]->{quote_char}, $_[0]->{quote_char} ); + } + elsif ($qref eq 'ARRAY') { + ($l, $r) = @{$_[0]->{quote_char}}; + } + else { + puke "Unsupported quote_char format: $_[0]->{quote_char}"; + } - # reassemble and return. - return join $sep, @quoted; + # parts containing * are naturally unquoted + return join( $_[0]->{name_sep}||'', map + { $_ eq '*' ? $_ : $l . $_ . $r } + ( $_[0]->{name_sep} ? split (/\Q$_[0]->{name_sep}\E/, $_[1] ) : $_[1] ) + ); } # Conversion, if applicable sub _convert ($) { - my ($self, $arg) = @_; + #my ($self, $arg) = @_; # LDNOTE : modified the previous implementation below because # it was not consistent : the first "return" is always an array, # the second "return" is context-dependent. Anyway, _convert -# seems always used with just a single argument, so make it a +# seems always used with just a single argument, so make it a # scalar function. # return @_ unless $self->{convert}; # my $conv = $self->_sqlcase($self->{convert}); # my @ret = map { $conv.'('.$_.')' } @_; # return wantarray ? @ret : $ret[0]; - if ($self->{convert}) { - my $conv = $self->_sqlcase($self->{convert}); - $arg = $conv.'('.$arg.')'; + if ($_[0]->{convert}) { + return $_[0]->_sqlcase($_[0]->{convert}) .'(' . $_[1] . ')'; } - return $arg; + return $_[1]; } # And bindtype sub _bindtype (@) { - my $self = shift; - my($col, @vals) = @_; + #my ($self, $col, @vals) = @_; - #LDNOTE : changed original implementation below because it did not make + #LDNOTE : changed original implementation below because it did not make # sense when bindtype eq 'columns' and @vals > 1. # return $self->{bindtype} eq 'columns' ? [ $col, @vals ] : @vals; - return $self->{bindtype} eq 'columns' ? map {[$col, $_]} @vals : @vals; + # called often - tighten code + return $_[0]->{bindtype} eq 'columns' + ? map {[$_[1], $_]} @_[2 .. $#_] + : @_[2 .. $#_] + ; } # Dies if any element of @bind is not in [colname => value] format # if bindtype is 'columns'. sub _assert_bindval_matches_bindtype { - my ($self, @bind) = @_; - +# my ($self, @bind) = @_; + my $self = shift; if ($self->{bindtype} eq 'columns') { - foreach my $val (@bind) { - if (!defined $val || ref($val) ne 'ARRAY' || @$val != 2) { - die "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" + for (@_) { + if (!defined $_ || ref($_) ne 'ARRAY' || @$_ != 2) { + puke "bindtype 'columns' selected, you need to pass: [column_name => bind_value]" } } } @@ -975,11 +1303,9 @@ sub _join_sql_clauses { # Fix SQL case, if so requested sub _sqlcase { - my $self = shift; - # LDNOTE: if $self->{case} is true, then it contains 'lower', so we # don't touch the argument ... crooked logic, but let's not change it! - return $self->{case} ? $_[0] : uc($_[0]); + return $_[0]->{case} ? $_[1] : uc($_[1]); } @@ -989,48 +1315,57 @@ sub _sqlcase { sub _refkind { my ($self, $data) = @_; - my $suffix = ''; - my $ref; - my $n_steps = 0; - - while (1) { - # blessed objects are treated like scalars - $ref = (blessed $data) ? '' : ref $data; - $n_steps += 1 if $ref; - last if $ref ne 'REF'; - $data = $$data; - } - my $base = $ref || (defined $data ? 'SCALAR' : 'UNDEF'); + return 'UNDEF' unless defined $data; - return $base . ('REF' x $n_steps); -} + # blessed objects are treated like scalars + my $ref = (Scalar::Util::blessed $data) ? '' : ref $data; + + return 'SCALAR' unless $ref; + my $n_steps = 1; + while ($ref eq 'REF') { + $data = $$data; + $ref = (Scalar::Util::blessed $data) ? '' : ref $data; + $n_steps++ if $ref; + } + return ($ref||'SCALAR') . ('REF' x $n_steps); +} sub _try_refkind { my ($self, $data) = @_; my @try = ($self->_refkind($data)); push @try, 'SCALAR_or_UNDEF' if $try[0] eq 'SCALAR' || $try[0] eq 'UNDEF'; push @try, 'FALLBACK'; - return @try; + return \@try; } sub _METHOD_FOR_refkind { my ($self, $meth_prefix, $data) = @_; - my $method = first {$_} map {$self->can($meth_prefix."_".$_)} - $self->_try_refkind($data) - or puke "cannot dispatch on '$meth_prefix' for ".$self->_refkind($data); - return $method; + + my $method; + for (@{$self->_try_refkind($data)}) { + $method = $self->can($meth_prefix."_".$_) + and last; + } + + return $method || puke "cannot dispatch on '$meth_prefix' for ".$self->_refkind($data); } sub _SWITCH_refkind { my ($self, $data, $dispatch_table) = @_; - my $coderef = first {$_} map {$dispatch_table->{$_}} - $self->_try_refkind($data) - or puke "no dispatch entry for ".$self->_refkind($data); + my $coderef; + for (@{$self->_try_refkind($data)}) { + $coderef = $dispatch_table->{$_} + and last; + } + + puke "no dispatch entry for ".$self->_refkind($data) + unless $coderef; + $coderef->(); } @@ -1055,7 +1390,7 @@ sub values { foreach my $k ( sort keys %$data ) { my $v = $data->{$k}; $self->_SWITCH_refkind($v, { - ARRAYREF => sub { + ARRAYREF => sub { if ($self->{array_datatypes}) { # array datatype push @all_bind, $self->_bindtype($k, $v); } @@ -1102,7 +1437,7 @@ sub generate { } elsif ($r eq 'SCALAR') { # literal SQL without bind push @sqlq, "$label = $$v"; - } else { + } else { push @sqlq, "$label = ?"; push @sqlv, $self->_bindtype($k, $v); } @@ -1120,7 +1455,7 @@ sub generate { } elsif ($r eq 'SCALAR') { # literal SQL without bind # embedded literal SQL push @sqlq, $$v; - } else { + } else { push @sqlq, '?'; push @sqlv, $v; } @@ -1175,7 +1510,7 @@ SQL::Abstract - Generate SQL from Perl data structures my $sql = SQL::Abstract->new; - my($stmt, @bind) = $sql->select($table, \@fields, \%where, \@order); + my($stmt, @bind) = $sql->select($source, \@fields, \%where, \@order); my($stmt, @bind) = $sql->insert($table, \%fieldvals || \@values); @@ -1243,14 +1578,14 @@ These are then used directly in your DBI code: If your database has array types (like for example Postgres), activate the special option C<< array_datatypes => 1 >> -when creating the C object. +when creating the C object. Then you may use an arrayref to insert and update database array types: my $sql = SQL::Abstract->new(array_datatypes => 1); my %data = ( planets => [qw/Mercury Venus Earth Mars/] ); - + my($stmt, @bind) = $sql->insert('solar_system', \%data); This results in: @@ -1270,7 +1605,7 @@ say something like this: my %data = ( name => 'Bill', date_entered => \["to_date(?,'MM/DD/YYYY')", "03/02/2003"], - ); + ); The first value in the array is the actual SQL. Any other values are optional and would be included in the bind values array. This gives @@ -1278,7 +1613,7 @@ you: my($stmt, @bind) = $sql->insert('people', \%data); - $stmt = "INSERT INTO people (name, date_entered) + $stmt = "INSERT INTO people (name, date_entered) VALUES (?, to_date(?,'MM/DD/YYYY'))"; @bind = ('Bill', '03/02/2003'); @@ -1325,7 +1660,7 @@ Easy, eh? The functions are simple. There's one for each major SQL operation, and a constructor you use first. The arguments are specified in a -similar order to each function (table, then fields, then a where +similar order to each function (table, then fields, then a where clause) to try and simplify things. @@ -1382,8 +1717,8 @@ for arrays, and "and" for hashes. This means that a WHERE array of the form: @where = ( - event_date => {'>=', '2/13/99'}, - event_date => {'<=', '4/24/03'}, + event_date => {'>=', '2/13/99'}, + event_date => {'<=', '4/24/03'}, ); will generate SQL like this: @@ -1402,7 +1737,7 @@ Which will change the above C to: The logic can also be changed locally by inserting a modifier in front of an arrayref : - @where = (-and => [event_date => {'>=', '2/13/99'}, + @where = (-and => [event_date => {'>=', '2/13/99'}, event_date => {'<=', '4/24/03'} ]); See the L section for explanations. @@ -1480,7 +1815,7 @@ will expect the bind values in this format. =item quote_char This is the character that a table or column name will be quoted -with. By default this is an empty string, but you could set it to +with. By default this is an empty string, but you could set it to the character C<`>, to generate SQL like this: SELECT `a_field` FROM `a_table` WHERE `some_field` LIKE '%someval%' @@ -1492,7 +1827,7 @@ that generates SQL like this: SELECT [a_field] FROM [a_table] WHERE [some_field] LIKE '%someval%' -Quoting is useful if you have tables or columns names that are reserved +Quoting is useful if you have tables or columns names that are reserved words in your database's SQL dialect. =item name_sep @@ -1503,10 +1838,24 @@ so that tables and column names can be individually quoted like this: SELECT `table`.`one_field` FROM `table` WHERE `table`.`other_field` = 1 +=item injection_guard + +A regular expression C that is applied to any C<-function> and unquoted +column name specified in a query structure. This is a safety mechanism to avoid +injection attacks when mishandling user input e.g.: + + my %condition_as_column_value_pairs = get_values_from_user(); + $sqla->select( ... , \%condition_as_column_value_pairs ); + +If the expression matches an exception is thrown. Note that literal SQL +supplied via C<\'...'> or C<\['...']> is B checked in any way. + +Defaults to checking for C<;> and the C keyword (TransactSQL) + =item array_datatypes -When this option is true, arrayrefs in INSERT or UPDATE are -interpreted as array datatypes and are passed directly +When this option is true, arrayrefs in INSERT or UPDATE are +interpreted as array datatypes and are passed directly to the DBI layer. When this option is false, arrayrefs are interpreted as literal SQL, just like refs to arrayrefs @@ -1517,15 +1866,21 @@ for literal SQL). =item special_ops -Takes a reference to a list of "special operators" +Takes a reference to a list of "special operators" to extend the syntax understood by L. See section L for details. +=item unary_ops + +Takes a reference to a list of "unary operators" +to extend the syntax understood by L. +See section L for details. + =back -=head2 insert($table, \@values || \%fieldvals) +=head2 insert($table, \@values || \%fieldvals, \%options) This is the simplest function. You simply give it a table name and either an arrayref of values or hashref of field/value pairs. @@ -1534,6 +1889,23 @@ See the sections on L and L for information on how to insert with those data types. +The optional C<\%options> hash reference may contain additional +options to generate the insert SQL. Currently supported options +are: + +=over 4 + +=item returning + +Takes either a scalar of raw SQL fields, or an array reference of +field names, and adds on an SQL C statement at the end. +This allows you to return data generated by the insert statement +(such as row IDs) without performing another C