From: Peter Rabbitson Date: Mon, 31 May 2010 22:26:39 +0000 (+0000) Subject: Majorly cleanup $rs->update/delete (no $rs-aware code should be in ::Storages) X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=be64931c710bcde7abe7334349b4c8a123645332;p=dbsrgits%2FDBIx-Class-Historic.git Majorly cleanup $rs->update/delete (no $rs-aware code should be in ::Storages) --- diff --git a/Changes b/Changes index 2b8aac3..85dc399 100644 --- a/Changes +++ b/Changes @@ -6,11 +6,17 @@ Revision history for DBIx::Class going directly through execute_for_fetch bypassing execute_array * Fixes + - Fix update()/delete() on complex resultsets to no longer fall back + to silly row-by-row deletion, construct a massive OR statement + instead - Fix SkipFirst and FirstSkip limit dialects (Informix and Firebird) - A number of corner case fixes of void context populate() with \[] * Misc - Codebase is now trailing-whitespace-free + - Cleanup of complex resultset update/delete oprations - storage + specific code moved back to ResultSet and replaced by checks + of storage capabilities 0.08196 2011-11-29 05:35 (UTC) * Fixes diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm index 551427d..c8c89c5 100644 --- a/lib/DBIx/Class/ResultSet.pm +++ b/lib/DBIx/Class/ResultSet.pm @@ -1737,35 +1737,116 @@ sub first { sub _rs_update_delete { my ($self, $op, $values) = @_; + my $cond = $self->{cond}; my $rsrc = $self->result_source; + my $storage = $rsrc->schema->storage; + + my $attrs = { %{$self->_resolved_attrs} }; + + # "needs" is a strong word here - if the subquery is part of an IN clause - no point of + # even adding the group_by. It will really be used only when composing a poor-man's + # multicolumn-IN equivalent OR set + my $needs_group_by_subq = defined $attrs->{group_by}; + + # simplify the joinmap and maybe decide if a grouping (and thus subquery) is necessary + my $relation_classifications; + if (ref($attrs->{from}) eq 'ARRAY') { + $attrs->{from} = $storage->_prune_unused_joins ($attrs->{from}, $attrs->{select}, $cond, $attrs); + + $relation_classifications = $storage->_resolve_aliastypes_from_select_args ( + [ @{$attrs->{from}}[1 .. $#{$attrs->{from}}] ], + $attrs->{select}, + $cond, + $attrs + ) unless $needs_group_by_subq; # we already know we need a group, no point of resolving them + } + else { + $needs_group_by_subq ||= 1; # if {from} is unparseable assume the worst + } - my $needs_group_by_subq = $self->_has_resolved_attr (qw/collapse group_by -join/); - my $needs_subq = $needs_group_by_subq || $self->_has_resolved_attr(qw/rows offset/); + $needs_group_by_subq ||= exists $relation_classifications->{multiplying}; - if ($needs_group_by_subq or $needs_subq) { + # if no subquery - life is easy-ish + unless ( + $needs_group_by_subq + or + keys %$relation_classifications # if any joins at all - need to wrap a subq + or + $self->_has_resolved_attr(qw/rows offset/) # limits call for a subq + ) { + # Most databases do not allow aliasing of tables in UPDATE/DELETE. Thus + # a condition containing 'me' or other table prefixes will not work + # at all. What this code tries to do (badly) is to generate a condition + # with the qualifiers removed, by exploiting the quote mechanism of sqla + # + # this is atrocious and should be replaced by normal sqla introspection + # one sunny day + my ($sql, @bind) = do { + my $sqla = $rsrc->storage->sql_maker; + local $sqla->{_dequalify_idents} = 1; + $sqla->_recurse_where($self->{cond}); + } if $self->{cond}; + + return $rsrc->storage->$op( + $rsrc, + $op eq 'update' ? $values : (), + $self->{cond} ? \[$sql, @bind] : (), + ); + } - # make a new $rs selecting only the PKs (that's all we really need) - my $attrs = $self->_resolved_attrs_copy; + # we got this far - means it is time to wrap a subquery + my $pcols = [ $rsrc->_pri_cols ]; + my $existing_group_by = delete $attrs->{group_by}; + # make a new $rs selecting only the PKs (that's all we really need for the subq) + delete $attrs->{$_} for qw/collapse _collapse_order_by select _prefetch_selector_range as/; + $attrs->{columns} = [ map { "$attrs->{alias}.$_" } @$pcols ]; + $attrs->{group_by} = \ ''; # FIXME - this is an evil hack, it causes the optimiser to kick in and throw away the LEFT joins + my $subrs = (ref $self)->new($rsrc, $attrs); - delete $attrs->{$_} for qw/collapse _collapse_order_by select _prefetch_selector_range as/; - $attrs->{columns} = [ map { "$attrs->{alias}.$_" } ($self->result_source->_pri_cols) ]; + if (@$pcols == 1) { + return $storage->$op ( + $rsrc, + $op eq 'update' ? $values : (), + { $pcols->[0] => { -in => $subrs->as_query } }, + ); + } + elsif ($storage->_use_multicolumn_in) { + # This is hideously ugly, but SQLA does not understand multicol IN expressions + my $sql_maker = $storage->sql_maker; + my ($sql, @bind) = @${$subrs->as_query}; + $sql = sprintf ('(%s) IN %s', # the as_query already comes with a set of parenthesis + join (', ', map { $sql_maker->_quote ($_) } @$pcols), + $sql, + ); + return $storage->$op ( + $rsrc, + $op eq 'update' ? $values : (), + \[$sql, @bind], + ); + } + else { + # if all else fails - get all primary keys and operate over a ORed set + # wrap in a transaction for consistency + # this is where the group_by starts to matter + my $subq_group_by; if ($needs_group_by_subq) { - # make sure no group_by was supplied, or if there is one - make sure it matches - # the columns compiled above perfectly. Anything else can not be sanely executed - # on most databases so croak right then and there + $subq_group_by = $attrs->{columns}; - if (my $g = $attrs->{group_by}) { + # make sure if there is a supplied group_by it matches the columns compiled above + # perfectly. Anything else can not be sanely executed on most databases so croak + # right then and there + if ($existing_group_by) { my @current_group_by = map { $_ =~ /\./ ? $_ : "$attrs->{alias}.$_" } - @$g + @$existing_group_by ; if ( join ("\x00", sort @current_group_by) ne - join ("\x00", sort @{$attrs->{columns}} ) + join ("\x00", sort @$subq_group_by ) ) { $self->throw_exception ( "You have just attempted a $op operation on a resultset which does group_by" @@ -1776,33 +1857,27 @@ sub _rs_update_delete { ); } } - else { - $attrs->{group_by} = $attrs->{columns}; - } } - my $subrs = (ref $self)->new($rsrc, $attrs); - return $self->result_source->storage->_subq_update_delete($subrs, $op, $values); - } - else { - # Most databases do not allow aliasing of tables in UPDATE/DELETE. Thus - # a condition containing 'me' or other table prefixes will not work - # at all. What this code tries to do (badly) is to generate a condition - # with the qualifiers removed, by exploiting the quote mechanism of sqla - # - # this is atrocious and should be replaced by normal sqla introspection - # one sunny day - my ($sql, @bind) = do { - my $sqla = $rsrc->storage->sql_maker; - local $sqla->{_dequalify_idents} = 1; - $sqla->_recurse_where($self->{cond}); - } if $self->{cond}; + my $guard = $storage->txn_scope_guard; - return $rsrc->storage->$op( + my @op_condition; + for my $row ($subrs->search({}, { group_by => $subq_group_by })->cursor->all) { + push @op_condition, { map + { $pcols->[$_] => $row->[$_] } + (0 .. $#$pcols) + }; + } + + my $res = $storage->$op ( $rsrc, $op eq 'update' ? $values : (), - $self->{cond} ? \[$sql, @bind] : (), + \@op_condition, ); + + $guard->commit; + + return $res; } } diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm index e9fab0b..8abcc6e 100644 --- a/lib/DBIx/Class/Storage/DBI.pm +++ b/lib/DBIx/Class/Storage/DBI.pm @@ -62,8 +62,12 @@ __PACKAGE__->mk_group_accessors('simple' => @storage_options); my @capabilities = (qw/ insert_returning insert_returning_bound + + multicolumn_in + placeholders typeless_placeholders + join_optimizer /); __PACKAGE__->mk_group_accessors( dbms_capability => map { "_supports_$_" } @capabilities ); @@ -2093,103 +2097,6 @@ sub delete { shift->_execute('delete', @_); } -# We were sent here because the $rs contains a complex search -# which will require a subquery to select the correct rows -# (i.e. joined or limited resultsets, or non-introspectable conditions) -# -# Generating a single PK column subquery is trivial and supported -# by all RDBMS. However if we have a multicolumn PK, things get ugly. -# Look at _multipk_update_delete() -sub _subq_update_delete { - my $self = shift; - my ($rs, $op, $values) = @_; - - my $rsrc = $rs->result_source; - - # quick check if we got a sane rs on our hands - my @pcols = $rsrc->_pri_cols; - - my $sel = $rs->_resolved_attrs->{select}; - $sel = [ $sel ] unless ref $sel eq 'ARRAY'; - - if ( - join ("\x00", map { join '.', $rs->{attrs}{alias}, $_ } sort @pcols) - ne - join ("\x00", sort @$sel ) - ) { - $self->throw_exception ( - '_subq_update_delete can not be called on resultsets selecting columns other than the primary keys' - ); - } - - if (@pcols == 1) { - return $self->$op ( - $rsrc, - $op eq 'update' ? $values : (), - { $pcols[0] => { -in => $rs->as_query } }, - ); - } - - else { - return $self->_multipk_update_delete (@_); - } -} - -# ANSI SQL does not provide a reliable way to perform a multicol-PK -# resultset update/delete involving subqueries. So by default resort -# to simple (and inefficient) delete_all style per-row opearations, -# while allowing specific storages to override this with a faster -# implementation. -# -sub _multipk_update_delete { - return shift->_per_row_update_delete (@_); -} - -# This is the default loop used to delete/update rows for multi PK -# resultsets, and used by mysql exclusively (because it can't do anything -# else). -# -# We do not use $row->$op style queries, because resultset update/delete -# is not expected to cascade (this is what delete_all/update_all is for). -# -# There should be no race conditions as the entire operation is rolled -# in a transaction. -# -sub _per_row_update_delete { - my $self = shift; - my ($rs, $op, $values) = @_; - - my $rsrc = $rs->result_source; - my @pcols = $rsrc->_pri_cols; - - my $guard = $self->txn_scope_guard; - - # emulate the return value of $sth->execute for non-selects - my $row_cnt = '0E0'; - - my $subrs_cur = $rs->cursor; - my @all_pk = $subrs_cur->all; - for my $pks ( @all_pk) { - - my $cond; - for my $i (0.. $#pcols) { - $cond->{$pcols[$i]} = $pks->[$i]; - } - - $self->$op ( - $rsrc, - $op eq 'update' ? $values : (), - $cond, - ); - - $row_cnt++; - } - - $guard->commit; - - return $row_cnt; -} - sub _select { my $self = shift; $self->_execute($self->_select_args(@_)); diff --git a/lib/DBIx/Class/Storage/DBI/MultiColumnIn.pm b/lib/DBIx/Class/Storage/DBI/MultiColumnIn.pm deleted file mode 100644 index a0a5586..0000000 --- a/lib/DBIx/Class/Storage/DBI/MultiColumnIn.pm +++ /dev/null @@ -1,62 +0,0 @@ -package DBIx::Class::Storage::DBI::MultiColumnIn; - -use strict; -use warnings; - -use base 'DBIx::Class::Storage::DBI'; -use mro 'c3'; - -=head1 NAME - -DBIx::Class::Storage::DBI::MultiColumnIn - Storage component for RDBMS supporting multicolumn in clauses - -=head1 DESCRIPTION - -While ANSI SQL does not define a multicolumn in operator, many databases can -in fact understand WHERE (cola, colb) IN ( SELECT subcol_a, subcol_b ... ) -The storage class for any such RDBMS should inherit from this class, in order -to dramatically speed up update/delete operations on joined multipk resultsets. - -At this point the only overridden method is C<_multipk_update_delete()> - -=cut - -sub _multipk_update_delete { - my $self = shift; - my ($rs, $op, $values) = @_; - - my $rsrc = $rs->result_source; - my @pcols = $rsrc->_pri_cols; - my $attrs = $rs->_resolved_attrs; - - # naive check - this is an internal method after all, we should know what we are doing - $self->throw_exception ('Number of columns selected by supplied resultset does not match number of primary keys') - if ( ref $attrs->{select} ne 'ARRAY' or @{$attrs->{select}} != @pcols ); - - # This is hideously ugly, but SQLA does not understand multicol IN expressions - my $sqla = $self->_sql_maker; - my ($sql, @bind) = @${$rs->as_query}; - $sql = sprintf ('(%s) IN %s', # the as_query stuff is already enclosed in ()s - join (', ', map { $sqla->_quote ($_) } @pcols), - $sql, - ); - - return $self->$op ( - $rsrc, - $op eq 'update' ? $values : (), - \[$sql, @bind], - ); - -} - -=head1 AUTHORS - -See L - -=head1 LICENSE - -You may distribute this code under the same terms as Perl itself. - -=cut - -1; diff --git a/lib/DBIx/Class/Storage/DBI/Pg.pm b/lib/DBIx/Class/Storage/DBI/Pg.pm index 371f185..d38f84c 100644 --- a/lib/DBIx/Class/Storage/DBI/Pg.pm +++ b/lib/DBIx/Class/Storage/DBI/Pg.pm @@ -3,10 +3,7 @@ package DBIx::Class::Storage::DBI::Pg; use strict; use warnings; -use base qw/ - DBIx::Class::Storage::DBI::MultiColumnIn -/; -use mro 'c3'; +use base qw/DBIx::Class::Storage::DBI/; use Scope::Guard (); use Context::Preserve 'preserve_context'; @@ -17,6 +14,7 @@ use namespace::clean; __PACKAGE__->sql_limit_dialect ('LimitOffset'); __PACKAGE__->sql_quote_char ('"'); __PACKAGE__->datetime_parser_type ('DateTime::Format::Pg'); +__PACKAGE__->_use_multicolumn_in (1); sub _determine_supports_insert_returning { return shift->_server_info->{normalized_dbms_version} >= 8.002 diff --git a/lib/DBIx/Class/Storage/DBI/Replicated.pm b/lib/DBIx/Class/Storage/DBI/Replicated.pm index 7a9140a..5e02e7d 100644 --- a/lib/DBIx/Class/Storage/DBI/Replicated.pm +++ b/lib/DBIx/Class/Storage/DBI/Replicated.pm @@ -281,7 +281,6 @@ my $method_dispatch = { _prep_for_execute is_datatype_numeric _count_select - _subq_update_delete svp_rollback svp_begin svp_release @@ -301,12 +300,10 @@ my $method_dispatch = { _select_args _dbh_execute_for_fetch _sql_maker - _per_row_update_delete _dbh_execute_inserts_with_no_binds _select_args_to_query _gen_sql_bind _svp_generate_name - _multipk_update_delete _normalize_connect_info _parse_connect_do savepoints diff --git a/lib/DBIx/Class/Storage/DBI/mysql.pm b/lib/DBIx/Class/Storage/DBI/mysql.pm index 35ff42a..a0c0b52 100644 --- a/lib/DBIx/Class/Storage/DBI/mysql.pm +++ b/lib/DBIx/Class/Storage/DBI/mysql.pm @@ -3,16 +3,14 @@ package DBIx::Class::Storage::DBI::mysql; use strict; use warnings; -use base qw/ - DBIx::Class::Storage::DBI::MultiColumnIn - DBIx::Class::Storage::DBI -/; -use mro 'c3'; +use base qw/DBIx::Class::Storage::DBI/; __PACKAGE__->sql_maker_class('DBIx::Class::SQLMaker::MySQL'); __PACKAGE__->sql_limit_dialect ('LimitXY'); __PACKAGE__->sql_quote_char ('`'); +__PACKAGE__->_use_multicolumn_in (1); + sub with_deferred_fk_checks { my ($self, $sub) = @_; @@ -115,12 +113,6 @@ sub lag_behind_master { return shift->_get_dbh->selectrow_hashref('show slave status')->{Seconds_Behind_Master}; } -# MySql can not do subquery update/deletes, only way is slow per-row operations. -# This assumes you have set proper transaction isolation and use innodb. -sub _subq_update_delete { - return shift->_per_row_update_delete (@_); -} - 1; =head1 NAME diff --git a/t/71mysql.t b/t/71mysql.t index 3ed8493..c2318f8 100644 --- a/t/71mysql.t +++ b/t/71mysql.t @@ -299,8 +299,6 @@ NULLINSEARCH: { join => 'books', group_by => [ 'me.id', 'books.id' ] })->count(); }, 'count on grouped columns with the same name does not throw'); - - } ZEROINSEARCH: { diff --git a/t/resultset/update_delete.t b/t/resultset/update_delete.t index 4a9eab4..3de8bdb 100644 --- a/t/resultset/update_delete.t +++ b/t/resultset/update_delete.t @@ -8,7 +8,12 @@ use DBICTest; use DBIC::DebugObj; use DBIC::SqlMakerTest; -my $schema = DBICTest->init_schema(); +my $schema = DBICTest->init_schema; + +my ($sql, @bind); +my $debugobj = DBIC::DebugObj->new (\$sql, \@bind); +my $orig_debugobj = $schema->storage->debugobj; +my $orig_debug = $schema->storage->debug; my $tkfks = $schema->resultset('FourKeys_to_TwoKeys'); @@ -47,15 +52,54 @@ my $fks = $schema->resultset ('FourKeys') ->search ({ map { $_ => [1, 2] } qw/foo bar hello goodbye/}, { join => 'fourkeys_to_twokeys' }); is ($fks->count, 4, 'Joined FourKey count correct (2x2)'); + +$schema->storage->debugobj ($debugobj); +$schema->storage->debug (1); $fks->update ({ read_count => \ 'read_count + 1' }); -$_->discard_changes for ($fa, $fb); +$schema->storage->debugobj ($orig_debugobj); +$schema->storage->debug ($orig_debug); -is ($fa->read_count, 11, 'Update ran only once on joined resultset'); -is ($fb->read_count, 21, 'Update ran only once on joined resultset'); +is_same_sql_bind ( + $sql, + \@bind, + 'UPDATE fourkeys + SET read_count = read_count + 1 + WHERE ( bar = ? AND foo = ? AND goodbye = ? AND hello = ? ) OR ( bar = ? AND foo = ? AND goodbye = ? AND hello = ? )', + [ map { "'$_'" } ( (1) x 4, (2) x 4 ) ], + 'Correct update-SQL without multicolumn in support', +); +is ($fa->discard_changes->read_count, 11, 'Update ran only once on joined resultset'); +is ($fb->discard_changes->read_count, 21, 'Update ran only once on joined resultset'); + +# try the same sql with forced multicolumn in +$schema->storage->_use_multicolumn_in (1); +$schema->storage->debugobj ($debugobj); +$schema->storage->debug (1); +eval { $fks->update ({ read_count => \ 'read_count + 1' }) }; # this can't actually execute, we just need the "as_query" +$schema->storage->_use_multicolumn_in (undef); +$schema->storage->debugobj ($orig_debugobj); +$schema->storage->debug ($orig_debug); + +is_same_sql_bind ( + $sql, + \@bind, + 'UPDATE fourkeys + SET read_count = read_count + 1 + WHERE ( + (foo, bar, hello, goodbye) IN ( + SELECT me.foo, me.bar, me.hello, me.goodbye + FROM fourkeys me + WHERE ( bar = ? OR bar = ? ) AND ( foo = ? OR foo = ? ) AND ( goodbye = ? OR goodbye = ? ) AND ( hello = ? OR hello = ? ) + ) + ) + ', + [ map { "'$_'" } ( (1, 2) x 4 ) ], + 'Correct update-SQL with multicolumn in support', +); # -# Make sure multicolumn in or the equivalen functions correctly +# Make sure multicolumn in or the equivalent functions correctly # my $sub_rs = $tkfks->search ( @@ -81,7 +125,7 @@ throws_ok ( $sub_rs->search ( {}, { - group_by => [ reverse $sub_rs->result_source->primary_columns ], # reverse to make sure the PK-list comaprison works + group_by => [ reverse $sub_rs->result_source->primary_columns ], # reverse to make sure the PK-list comparison works }, )->update ({ pilot_sequence => \ 'pilot_sequence + 1' }); @@ -138,11 +182,7 @@ is ($tkfks->count, $tkfk_cnt -= 1, 'Only one row deleted'); # Make sure prefetch is properly stripped too # check with sql-equality, as sqlite will accept bad sql just fine -my ($sql, @bind); -my $orig_debugobj = $schema->storage->debugobj; -my $orig_debug = $schema->storage->debug; - -$schema->storage->debugobj (DBIC::DebugObj->new (\$sql, \@bind) ); +$schema->storage->debugobj ($debugobj); $schema->storage->debug (1); $schema->resultset('CD')->search( { year => { '!=' => 2010 } }, @@ -155,7 +195,7 @@ $schema->storage->debug ($orig_debug); is_same_sql_bind ( $sql, \@bind, - 'DELETE FROM cd WHERE ( cdid IN ( SELECT me.cdid FROM cd me WHERE ( year != ? ) GROUP BY me.cdid ) )', + 'DELETE FROM cd WHERE ( cdid IN ( SELECT me.cdid FROM cd me WHERE ( year != ? ) ) )', ["'2010'"], 'Update on prefetching resultset strips prefetch correctly' );