X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FSQL%2FAbstract.pm;h=eb50d9826d1a66c691476b5ac448442578ef0362;hb=049338a019b51c01fc7e10a7c0493fab4f3c70d5;hp=f6467905e8bc2cd10694ca48176d8e6b229ec037;hpb=b8db59b809f637a299f69e7718edc831b6192373;p=dbsrgits%2FSQL-Abstract.git diff --git a/lib/SQL/Abstract.pm b/lib/SQL/Abstract.pm index f646790..eb50d98 100644 --- a/lib/SQL/Abstract.pm +++ b/lib/SQL/Abstract.pm @@ -15,7 +15,7 @@ use Scalar::Util (); # GLOBALS #====================================================================== -our $VERSION = '1.68'; +our $VERSION = '1.72'; # This would confuse some packagers $VERSION = eval $VERSION if $VERSION =~ /_/; # numify for warning-free dev releases @@ -27,6 +27,8 @@ our $AUTOLOAD; my @BUILTIN_SPECIAL_OPS = ( {regex => qr/^ (?: not \s )? between $/ix, handler => '_where_field_BETWEEN'}, {regex => qr/^ (?: not \s )? in $/ix, handler => '_where_field_IN'}, + {regex => qr/^ func $/ix, handler => '_where_field_FUNC'}, + {regex => qr/^ op $/ix, handler => '_where_op_OP'}, ); # unaryish operators - key maps to handler @@ -36,6 +38,8 @@ my @BUILTIN_UNARY_OPS = ( { regex => qr/^ or (?: [_\s]? \d+ )? $/xi, handler => '_where_op_ANDOR' }, { regex => qr/^ nest (?: [_\s]? \d+ )? $/xi, handler => '_where_op_NEST' }, { regex => qr/^ (?: not \s )? bool $/xi, handler => '_where_op_BOOL' }, + { regex => qr/^ func $/ix, handler => '_where_op_FUNC' }, + { regex => qr/^ op $/ix, handler => '_where_op_OP' }, ); #====================================================================== @@ -93,16 +97,40 @@ sub new { # special operators $opt{special_ops} ||= []; + # regexes are applied in order, thus push after user-defines push @{$opt{special_ops}}, @BUILTIN_SPECIAL_OPS; # unary operators $opt{unary_ops} ||= []; push @{$opt{unary_ops}}, @BUILTIN_UNARY_OPS; + # rudimentary saniy-check for user supplied bits treated as functions/operators + # If a purported function matches this regular expression, an exception is thrown. + # Literal SQL is *NOT* subject to this check, only functions (and column names + # when quoting is not in effect) + + # FIXME + # need to guard against ()'s in column names too, but this will break tons of + # hacks... ideas anyone? + $opt{injection_guard} ||= qr/ + \; + | + ^ \s* go \s + /xmi; + return bless \%opt, $class; } +sub _assert_pass_injection_guard { + if ($_[1] =~ $_[0]->{injection_guard}) { + my $class = ref $_[0]; + puke "Possible SQL injection attempt '$_[1]'. If this is indeed a part of the " + . "desired SQL use literal SQL ( \'...' or \[ '...' ] ) or supply your own " + . "{injection_guard} attribute to ${class}->new()" + } +} + #====================================================================== # INSERT methods @@ -118,22 +146,26 @@ sub insert { my ($sql, @bind) = $self->$method($data); $sql = join " ", $self->_sqlcase('insert into'), $table, $sql; - if (my $ret = $options->{returning}) { - $sql .= $self->_insert_returning ($ret); + if ($options->{returning}) { + my ($s, @b) = $self->_insert_returning ($options); + $sql .= $s; + push @bind, @b; } return wantarray ? ($sql, @bind) : $sql; } sub _insert_returning { - my ($self, $fields) = @_; + my ($self, $options) = @_; + + my $f = $options->{returning}; - my $f = $self->_SWITCH_refkind($fields, { - ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$fields;}, - SCALAR => sub {$self->_quote($fields)}, - SCALARREF => sub {$$fields}, + my $fieldlist = $self->_SWITCH_refkind($f, { + ARRAYREF => sub {join ', ', map { $self->_quote($_) } @$f;}, + SCALAR => sub {$self->_quote($f)}, + SCALARREF => sub {$$f}, }); - return join (' ', $self->_sqlcase(' returning'), $f); + return $self->_sqlcase(' returning ') . $fieldlist; } sub _insert_HASHREF { # explicit list of fields and then values @@ -280,7 +312,19 @@ sub update { }, SCALARREF => sub { # literal SQL without bind push @set, "$label = $$v"; - }, + }, + HASHREF => sub { + my ($op, $arg, @rest) = %$v; + + puke 'Operator calls in update must be in the form { -op => $arg }' + if (@rest or not $op =~ /^\-(.+)/); + + local $self->{_nested_func_lhs} = $k; + my ($sql, @bind) = $self->_where_unary_op ($1, $arg); + + push @set, "$label = $sql"; + push @all_bind, @bind; + }, SCALAR_or_UNDEF => sub { push @set, "$label = ?"; push @all_bind, $self->_bindtype($k, $v); @@ -471,29 +515,16 @@ sub _where_HASHREF { $op =~ s/^not_/NOT /i; $self->_debug("Unary OP(-$op) within hashref, recursing..."); - - my $op_entry = List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}}; - if (my $handler = $op_entry->{handler}) { - if (not ref $handler) { - if ($op =~ s/ [_\s]? \d+ $//x ) { - belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' - . "You probably wanted ...-and => [ -$op => COND1, -$op => COND2 ... ]"; - } - $self->$handler ($op, $v); - } - elsif (ref $handler eq 'CODE') { - $handler->($self, $op, $v); - } - else { - puke "Illegal handler for operator $k - expecting a method name or a coderef"; - } - } - else { - $self->debug("Generic unary OP: $k - recursing as function"); - my ($s, @b) = $self->_where_func_generic ($op, $v); - $s = "($s)" unless (defined($self->{_nested_func_lhs}) && ($self->{_nested_func_lhs} eq $k)); # top level vs nested - ($s, @b); - } + my ($s, @b) = $self->_where_unary_op ($op, $v); + + # top level vs nested + # we assume that handled unary ops will take care of their ()s + $s = "($s)" unless ( + List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}} + or + defined($self->{_nested_func_lhs}) && ($self->{_nested_func_lhs} eq $k) + ); + ($s, @b); } else { my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $v); @@ -508,9 +539,31 @@ sub _where_HASHREF { return $self->_join_sql_clauses('and', \@sql_clauses, \@all_bind); } -sub _where_func_generic { +sub _where_unary_op { my ($self, $op, $rhs) = @_; + if (my $op_entry = List::Util::first {$op =~ $_->{regex}} @{$self->{unary_ops}}) { + my $handler = $op_entry->{handler}; + + if (not ref $handler) { + if ($op =~ s/ [_\s]? \d+ $//x ) { + belch 'Use of [and|or|nest]_N modifiers is deprecated and will be removed in SQLA v2.0. ' + . "You probably wanted ...-and => [ -$op => COND1, -$op => COND2 ... ]"; + } + return $self->$handler ($op, $rhs); + } + elsif (ref $handler eq 'CODE') { + return $handler->($self, $op, $rhs); + } + else { + puke "Illegal handler for operator $op - expecting a method name or a coderef"; + } + } + + $self->debug("Generic unary OP: $op - recursing as function"); + + $self->_assert_pass_injection_guard($op); + my ($sql, @bind) = $self->_SWITCH_refkind ($rhs, { SCALAR => sub { puke "Illegal use of top-level '$op'" @@ -549,15 +602,23 @@ sub _where_op_ANDOR { }, SCALARREF => sub { - puke "-$op => \\\$scalar not supported, use -nest => ..."; + puke "-$op => \\\$scalar makes little sense, use " . + ($op =~ /^or/i + ? '[ \$scalar, \%rest_of_conditions ] instead' + : '-and => [ \$scalar, \%rest_of_conditions ] instead' + ); }, ARRAYREFREF => sub { - puke "-$op => \\[..] not supported, use -nest => ..."; + puke "-$op => \\[...] makes little sense, use " . + ($op =~ /^or/i + ? '[ \[...], \%rest_of_conditions ] instead' + : '-and => [ \[...], \%rest_of_conditions ] instead' + ); }, SCALAR => sub { # permissively interpreted as SQL - puke "-$op => 'scalar' not supported, use -nest => \\'scalar'"; + puke "-$op => \$value makes little sense, use -bool => \$value instead"; }, UNDEF => sub { @@ -661,6 +722,8 @@ sub _where_hashpair_HASHREF { $op =~ s/^\s+|\s+$//g;# remove leading/trailing space $op =~ s/\s+/ /g; # compress whitespace + $self->_assert_pass_injection_guard($op); + # so that -not_foo works correctly $op =~ s/^not_/NOT /i; @@ -714,7 +777,7 @@ sub _where_hashpair_HASHREF { # retain for proper column type bind $self->{_nested_func_lhs} ||= $k; - ($sql, @bind) = $self->_where_func_generic ($op, $val); + ($sql, @bind) = $self->_where_unary_op ($op, $val); $sql = join (' ', $self->_convert($self->_quote($k)), @@ -871,7 +934,7 @@ sub _where_field_BETWEEN { foreach my $val (@$vals) { my ($sql, @bind) = $self->_SWITCH_refkind($val, { SCALAR => sub { - return ($placeholder, $val); + return ($placeholder, $self->_bindtype($k, $val) ); }, SCALARREF => sub { return $$val; @@ -886,7 +949,7 @@ sub _where_field_BETWEEN { puke ("Only simple { -func => arg } functions accepted as sub-arguments to BETWEEN") if (@rest or $func !~ /^ \- (.+)/x); local $self->{_nested_func_lhs} = $k; - $self->_where_func_generic ($1 => $arg); + $self->_where_unary_op ($1 => $arg); } }); push @all_sql, $sql; @@ -895,7 +958,7 @@ sub _where_field_BETWEEN { return ( (join $and, @all_sql), - $self->_bindtype($k, @all_bind), + @all_bind ); }, FALLBACK => sub { @@ -907,6 +970,113 @@ sub _where_field_BETWEEN { return ($sql, @bind) } +sub _where_field_FUNC { + my ($self, $k, $op, $vals) = @_; + + return $self->_where_generic_FUNC($k,$vals); +} + +sub _where_op_FUNC { + my ($self, $k, $vals) = @_; + + return $self->_where_generic_FUNC('', $vals); +} + +sub _where_generic_FUNC { + my ($self, $k, $vals) = @_; + + my $label = $self->_convert($self->_quote($k)); + my $placeholder = $self->_convert('?'); + + puke '-func must be an array' unless ref $vals eq 'ARRAY'; + puke 'first arg for -func must be a scalar' unless !ref $vals->[0]; + + my ($func,@rest_of_vals) = @$vals; + + $self->_assert_pass_injection_guard($func); + + my (@all_sql, @all_bind); + foreach my $val (@rest_of_vals) { + my ($sql, @bind) = $self->_SWITCH_refkind($val, { + SCALAR => sub { + return ($placeholder, $self->_bindtype($k, $val) ); + }, + SCALARREF => sub { + return $$val; + }, + ARRAYREFREF => sub { + my ($sql, @bind) = @$$val; + $self->_assert_bindval_matches_bindtype(@bind); + return ($sql, @bind); + }, + HASHREF => sub { + my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $val); + $self->$method('', $val); + } + }); + push @all_sql, $sql; + push @all_bind, @bind; + } + + my ($clause, @bind) = ("$func(" . (join ",", @all_sql) . ")", @all_bind); + + my $sql = $k ? "( $label = $clause )" : "( $clause )"; + return ($sql, @bind) +} + +sub _where_op_OP { + my ($self) = @_; + + my ($k, $vals); + + if (@_ == 3) { + # $_[1] gets set to "op" + $vals = $_[2]; + $k = ''; + } elsif (@_ == 4) { + $k = $_[1]; + # $_[2] gets set to "op" + $vals = $_[3]; + } + + my $label = $self->_convert($self->_quote($k)); + my $placeholder = $self->_convert('?'); + + puke 'argument to -op must be an arrayref' unless ref $vals eq 'ARRAY'; + puke 'first arg for -op must be a scalar' unless !ref $vals->[0]; + + my ($op, @rest_of_vals) = @$vals; + + $self->_assert_pass_injection_guard($op); + + my (@all_sql, @all_bind); + foreach my $val (@rest_of_vals) { + my ($sql, @bind) = $self->_SWITCH_refkind($val, { + SCALAR => sub { + return ($placeholder, $self->_bindtype($k, $val) ); + }, + SCALARREF => sub { + return $$val; + }, + ARRAYREFREF => sub { + my ($sql, @bind) = @$$val; + $self->_assert_bindval_matches_bindtype(@bind); + return ($sql, @bind); + }, + HASHREF => sub { + my $method = $self->_METHOD_FOR_refkind("_where_hashpair", $val); + $self->$method('', $val); + } + }); + push @all_sql, $sql; + push @all_bind, @bind; + } + + my ($clause, @bind) = ((join " $op ", @all_sql), @all_bind); + + my $sql = $k ? "( $label = $clause )" : "( $clause )"; + return ($sql, @bind) +} sub _where_field_IN { my ($self, $k, $op, $vals) = @_; @@ -941,8 +1111,11 @@ sub _where_field_IN { puke ("Only simple { -func => arg } functions accepted as sub-arguments to IN") if (@rest or $func !~ /^ \- (.+)/x); local $self->{_nested_func_lhs} = $k; - $self->_where_func_generic ($1 => $arg); - } + $self->_where_unary_op ($1 => $arg); + }, + UNDEF => sub { + return $self->_sqlcase('null'); + }, }); push @all_sql, $sql; push @all_bind, @bind; @@ -1102,7 +1275,10 @@ sub _quote { return '' unless defined $_[1]; return ${$_[1]} if ref($_[1]) eq 'SCALAR'; - return $_[1] unless $_[0]->{quote_char}; + unless ($_[0]->{quote_char}) { + $_[0]->_assert_pass_injection_guard($_[1]); + return $_[1]; + } my $qref = ref $_[0]->{quote_char}; my ($l, $r); @@ -1726,6 +1902,20 @@ so that tables and column names can be individually quoted like this: SELECT `table`.`one_field` FROM `table` WHERE `table`.`other_field` = 1 +=item injection_guard + +A regular expression C that is applied to any C<-function> and unquoted +column name specified in a query structure. This is a safety mechanism to avoid +injection attacks when mishandling user input e.g.: + + my %condition_as_column_value_pairs = get_values_from_user(); + $sqla->select( ... , \%condition_as_column_value_pairs ); + +If the expression matches an exception is thrown. Note that literal SQL +supplied via C<\'...'> or C<\['...']> is B checked in any way. + +Defaults to checking for C<;> and the C keyword (TransactSQL) + =item array_datatypes When this option is true, arrayrefs in INSERT or UPDATE are @@ -2144,6 +2334,32 @@ Would give you: These are the two builtin "special operators"; but the list can be expanded : see section L below. +Another operator is C<-func> that allows you to call SQL functions with +arguments. It receives an array reference containing the function name +as the 0th argument and the other arguments being its parameters. For example: + + my %where = { + -func => ['substr', 'Hello', 50, 5], + }; + +Would give you: + + $stmt = "WHERE (substr(?,?,?))"; + @bind = ("Hello", 50, 5); + +Yet another operator is C<-op> that allows you to use SQL operators. It +receives an array reference containing the operator 0th argument and the other +arguments being its operands. For example: + + my %where = { + foo => { -op => ['+', \'bar', 50, 5] }, + }; + +Would give you: + + $stmt = "WHERE (foo = bar + ? + ?)"; + @bind = (50, 5); + =head2 Unary operators: bool If you wish to test against boolean columns or functions within your @@ -2201,41 +2417,25 @@ This data structure would create the following: @bind = ('nwiger', 'pending', 'dispatched', 'robot', 'unassigned'); -There is also a special C<-nest> -operator which adds an additional set of parens, to create a subquery. -For example, to get something like this: - - $stmt = "WHERE user = ? AND ( workhrs > ? OR geo = ? )"; - @bind = ('nwiger', '20', 'ASIA'); - -You would do: - - my %where = ( - user => 'nwiger', - -nest => [ workhrs => {'>', 20}, geo => 'ASIA' ], - ); - - -Finally, clauses in hashrefs or arrayrefs can be -prefixed with an C<-and> or C<-or> to change the logic -inside : +Clauses in hashrefs or arrayrefs can be prefixed with an C<-and> or C<-or> +to change the logic inside : my @where = ( -and => [ user => 'nwiger', - -nest => [ - -and => [workhrs => {'>', 20}, geo => 'ASIA' ], - -and => [workhrs => {'<', 50}, geo => 'EURO' ] + [ + -and => [ workhrs => {'>', 20}, geo => 'ASIA' ], + -or => { workhrs => {'<', 50}, geo => 'EURO' }, ], ], ); That would yield: - WHERE ( user = ? AND - ( ( workhrs > ? AND geo = ? ) - OR ( workhrs < ? AND geo = ? ) ) ) - + WHERE ( user = ? AND ( + ( workhrs > ? AND geo = ? ) + OR ( workhrs < ? OR geo = ? ) + ) ) =head2 Algebraic inconsistency, for historical reasons @@ -2387,10 +2587,10 @@ hash, like an EXISTS subquery : my ($sub_stmt, @sub_bind) = $sql->select("t1", "*", {c1 => 1, c2 => \"> t0.c0"}); - my %where = ( + my %where = ( -and => [ foo => 1234, - -nest => \["EXISTS ($sub_stmt)" => @sub_bind], - ); + \["EXISTS ($sub_stmt)" => @sub_bind], + ]); which yields @@ -2406,15 +2606,6 @@ Writing C<< c2 => {">" => "t0.c0"} >> would have generated C<< c2 > ? >> with bind value C<"t0.c0"> ... not exactly what we wanted here. -Another use of the subquery technique is when some SQL clauses need -parentheses, as it often occurs with some proprietary SQL extensions -like for example fulltext expressions, geospatial expressions, -NATIVE clauses, etc. Here is an example of a fulltext query in MySQL : - - my %where = ( - -nest => \["MATCH (col1, col2) AGAINST (?)" => qw/apples/] - ); - Finally, here is an example where a subquery is used for expressing unary negation: @@ -2423,7 +2614,7 @@ for expressing unary negation: $sub_stmt =~ s/^ where //i; # don't want "WHERE" in the subclause my %where = ( lname => {like => '%son%'}, - -nest => \["NOT ($sub_stmt)" => @sub_bind], + \["NOT ($sub_stmt)" => @sub_bind], ); This yields @@ -2682,6 +2873,15 @@ a fast interface to returning and formatting data. I frequently use these three modules together to write complex database query apps in under 50 lines. +=head1 REPO + +=over + +=item * gitweb: L + +=item * git: L + +=back =head1 CHANGES @@ -2760,7 +2960,7 @@ so I have no idea who they are! But the people I do know are: Mike Fragassi (enhancements to "BETWEEN" and "LIKE") Dan Kubb (support for "quote_char" and "name_sep") Guillermo Roditi (patch to cleanup "IN" and "BETWEEN", fix and tests for _order_by) - Laurent Dami (internal refactoring, multiple -nest, extensible list of special operators, literal SQL) + Laurent Dami (internal refactoring, extensible list of special operators, literal SQL) Norbert Buchmuller (support for literal SQL in hashpair, misc. fixes & tests) Peter Rabbitson (rewrite of SQLA::Test, misc. fixes & tests) Oliver Charles (support for "RETURNING" after "INSERT")