From: Peter Rabbitson <ribasushi@cpan.org>
Date: Mon, 31 May 2010 22:26:39 +0000 (+0000)
Subject: Majorly cleanup $rs->update/delete (no $rs-aware code should be in ::Storages)
X-Git-Tag: v0.08197~110
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=be64931c710bcde7abe7334349b4c8a123645332;p=dbsrgits%2FDBIx-Class.git

Majorly cleanup $rs->update/delete (no $rs-aware code should be in ::Storages)
---

diff --git a/Changes b/Changes
index 2b8aac3..85dc399 100644
--- a/Changes
+++ b/Changes
@@ -6,11 +6,17 @@ Revision history for DBIx::Class
           going directly through execute_for_fetch bypassing execute_array
 
     * Fixes
+        - Fix update()/delete() on complex resultsets to no longer fall back
+          to silly row-by-row deletion, construct a massive OR statement
+          instead
         - Fix SkipFirst and FirstSkip limit dialects (Informix and Firebird)
         - A number of corner case fixes of void context populate() with \[]
 
     * Misc
         - Codebase is now trailing-whitespace-free
+        - Cleanup of complex resultset update/delete oprations - storage
+          specific code moved back to ResultSet and replaced by checks
+          of storage capabilities
 
 0.08196 2011-11-29 05:35 (UTC)
     * Fixes
diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm
index 551427d..c8c89c5 100644
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -1737,35 +1737,116 @@ sub first {
 sub _rs_update_delete {
   my ($self, $op, $values) = @_;
 
+  my $cond = $self->{cond};
   my $rsrc = $self->result_source;
+  my $storage = $rsrc->schema->storage;
+
+  my $attrs = { %{$self->_resolved_attrs} };
+
+  # "needs" is a strong word here - if the subquery is part of an IN clause - no point of
+  # even adding the group_by. It will really be used only when composing a poor-man's
+  # multicolumn-IN equivalent OR set
+  my $needs_group_by_subq = defined $attrs->{group_by};
+
+  # simplify the joinmap and maybe decide if a grouping (and thus subquery) is necessary
+  my $relation_classifications;
+  if (ref($attrs->{from}) eq 'ARRAY') {
+    $attrs->{from} = $storage->_prune_unused_joins ($attrs->{from}, $attrs->{select}, $cond, $attrs);
+
+    $relation_classifications = $storage->_resolve_aliastypes_from_select_args (
+      [ @{$attrs->{from}}[1 .. $#{$attrs->{from}}] ],
+      $attrs->{select},
+      $cond,
+      $attrs
+    ) unless $needs_group_by_subq;  # we already know we need a group, no point of resolving them
+  }
+  else {
+    $needs_group_by_subq ||= 1; # if {from} is unparseable assume the worst
+  }
 
-  my $needs_group_by_subq = $self->_has_resolved_attr (qw/collapse group_by -join/);
-  my $needs_subq = $needs_group_by_subq || $self->_has_resolved_attr(qw/rows offset/);
+  $needs_group_by_subq ||= exists $relation_classifications->{multiplying};
 
-  if ($needs_group_by_subq or $needs_subq) {
+  # if no subquery - life is easy-ish
+  unless (
+    $needs_group_by_subq
+      or
+    keys %$relation_classifications # if any joins at all - need to wrap a subq
+      or
+    $self->_has_resolved_attr(qw/rows offset/) # limits call for a subq
+  ) {
+    # Most databases do not allow aliasing of tables in UPDATE/DELETE. Thus
+    # a condition containing 'me' or other table prefixes will not work
+    # at all. What this code tries to do (badly) is to generate a condition
+    # with the qualifiers removed, by exploiting the quote mechanism of sqla
+    #
+    # this is atrocious and should be replaced by normal sqla introspection
+    # one sunny day
+    my ($sql, @bind) = do {
+      my $sqla = $rsrc->storage->sql_maker;
+      local $sqla->{_dequalify_idents} = 1;
+      $sqla->_recurse_where($self->{cond});
+    } if $self->{cond};
+
+    return $rsrc->storage->$op(
+      $rsrc,
+      $op eq 'update' ? $values : (),
+      $self->{cond} ? \[$sql, @bind] : (),
+    );
+  }
 
-    # make a new $rs selecting only the PKs (that's all we really need)
-    my $attrs = $self->_resolved_attrs_copy;
+  # we got this far - means it is time to wrap a subquery
+  my $pcols = [ $rsrc->_pri_cols ];
+  my $existing_group_by = delete $attrs->{group_by};
 
+  # make a new $rs selecting only the PKs (that's all we really need for the subq)
+  delete $attrs->{$_} for qw/collapse _collapse_order_by select _prefetch_selector_range as/;
+  $attrs->{columns} = [ map { "$attrs->{alias}.$_" } @$pcols ];
+  $attrs->{group_by} = \ '';  # FIXME - this is an evil hack, it causes the optimiser to kick in and throw away the LEFT joins
+  my $subrs = (ref $self)->new($rsrc, $attrs);
 
-    delete $attrs->{$_} for qw/collapse _collapse_order_by select _prefetch_selector_range as/;
-    $attrs->{columns} = [ map { "$attrs->{alias}.$_" } ($self->result_source->_pri_cols) ];
+  if (@$pcols == 1) {
+    return $storage->$op (
+      $rsrc,
+      $op eq 'update' ? $values : (),
+      { $pcols->[0] => { -in => $subrs->as_query } },
+    );
+  }
+  elsif ($storage->_use_multicolumn_in) {
+    # This is hideously ugly, but SQLA does not understand multicol IN expressions
+    my $sql_maker = $storage->sql_maker;
+    my ($sql, @bind) = @${$subrs->as_query};
+    $sql = sprintf ('(%s) IN %s', # the as_query already comes with a set of parenthesis
+      join (', ', map { $sql_maker->_quote ($_) } @$pcols),
+      $sql,
+    );
 
+    return $storage->$op (
+      $rsrc,
+      $op eq 'update' ? $values : (),
+      \[$sql, @bind],
+    );
+  }
+  else {
+    # if all else fails - get all primary keys and operate over a ORed set
+    # wrap in a transaction for consistency
+    # this is where the group_by starts to matter
+    my $subq_group_by;
     if ($needs_group_by_subq) {
-      # make sure no group_by was supplied, or if there is one - make sure it matches
-      # the columns compiled above perfectly. Anything else can not be sanely executed
-      # on most databases so croak right then and there
+      $subq_group_by = $attrs->{columns};
 
-      if (my $g = $attrs->{group_by}) {
+      # make sure if there is a supplied group_by it matches the columns compiled above
+      # perfectly. Anything else can not be sanely executed on most databases so croak
+      # right then and there
+      if ($existing_group_by) {
         my @current_group_by = map
           { $_ =~ /\./ ? $_ : "$attrs->{alias}.$_" }
-          @$g
+          @$existing_group_by
         ;
 
         if (
           join ("\x00", sort @current_group_by)
             ne
-          join ("\x00", sort @{$attrs->{columns}} )
+          join ("\x00", sort @$subq_group_by )
         ) {
           $self->throw_exception (
             "You have just attempted a $op operation on a resultset which does group_by"
@@ -1776,33 +1857,27 @@ sub _rs_update_delete {
           );
         }
       }
-      else {
-        $attrs->{group_by} = $attrs->{columns};
-      }
     }
 
-    my $subrs = (ref $self)->new($rsrc, $attrs);
-    return $self->result_source->storage->_subq_update_delete($subrs, $op, $values);
-  }
-  else {
-    # Most databases do not allow aliasing of tables in UPDATE/DELETE. Thus
-    # a condition containing 'me' or other table prefixes will not work
-    # at all. What this code tries to do (badly) is to generate a condition
-    # with the qualifiers removed, by exploiting the quote mechanism of sqla
-    #
-    # this is atrocious and should be replaced by normal sqla introspection
-    # one sunny day
-    my ($sql, @bind) = do {
-      my $sqla = $rsrc->storage->sql_maker;
-      local $sqla->{_dequalify_idents} = 1;
-      $sqla->_recurse_where($self->{cond});
-    } if $self->{cond};
+    my $guard = $storage->txn_scope_guard;
 
-    return $rsrc->storage->$op(
+    my @op_condition;
+    for my $row ($subrs->search({}, { group_by => $subq_group_by })->cursor->all) {
+      push @op_condition, { map
+        { $pcols->[$_] => $row->[$_] }
+        (0 .. $#$pcols)
+      };
+    }
+
+    my $res = $storage->$op (
       $rsrc,
       $op eq 'update' ? $values : (),
-      $self->{cond} ? \[$sql, @bind] : (),
+      \@op_condition,
     );
+
+    $guard->commit;
+
+    return $res;
   }
 }
 
diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm
index e9fab0b..8abcc6e 100644
--- a/lib/DBIx/Class/Storage/DBI.pm
+++ b/lib/DBIx/Class/Storage/DBI.pm
@@ -62,8 +62,12 @@ __PACKAGE__->mk_group_accessors('simple' => @storage_options);
 my @capabilities = (qw/
   insert_returning
   insert_returning_bound
+
+  multicolumn_in
+
   placeholders
   typeless_placeholders
+
   join_optimizer
 /);
 __PACKAGE__->mk_group_accessors( dbms_capability => map { "_supports_$_" } @capabilities );
@@ -2093,103 +2097,6 @@ sub delete {
   shift->_execute('delete', @_);
 }
 
-# We were sent here because the $rs contains a complex search
-# which will require a subquery to select the correct rows
-# (i.e. joined or limited resultsets, or non-introspectable conditions)
-#
-# Generating a single PK column subquery is trivial and supported
-# by all RDBMS. However if we have a multicolumn PK, things get ugly.
-# Look at _multipk_update_delete()
-sub _subq_update_delete {
-  my $self = shift;
-  my ($rs, $op, $values) = @_;
-
-  my $rsrc = $rs->result_source;
-
-  # quick check if we got a sane rs on our hands
-  my @pcols = $rsrc->_pri_cols;
-
-  my $sel = $rs->_resolved_attrs->{select};
-  $sel = [ $sel ] unless ref $sel eq 'ARRAY';
-
-  if (
-      join ("\x00", map { join '.', $rs->{attrs}{alias}, $_ } sort @pcols)
-        ne
-      join ("\x00", sort @$sel )
-  ) {
-    $self->throw_exception (
-      '_subq_update_delete can not be called on resultsets selecting columns other than the primary keys'
-    );
-  }
-
-  if (@pcols == 1) {
-    return $self->$op (
-      $rsrc,
-      $op eq 'update' ? $values : (),
-      { $pcols[0] => { -in => $rs->as_query } },
-    );
-  }
-
-  else {
-    return $self->_multipk_update_delete (@_);
-  }
-}
-
-# ANSI SQL does not provide a reliable way to perform a multicol-PK
-# resultset update/delete involving subqueries. So by default resort
-# to simple (and inefficient) delete_all style per-row opearations,
-# while allowing specific storages to override this with a faster
-# implementation.
-#
-sub _multipk_update_delete {
-  return shift->_per_row_update_delete (@_);
-}
-
-# This is the default loop used to delete/update rows for multi PK
-# resultsets, and used by mysql exclusively (because it can't do anything
-# else).
-#
-# We do not use $row->$op style queries, because resultset update/delete
-# is not expected to cascade (this is what delete_all/update_all is for).
-#
-# There should be no race conditions as the entire operation is rolled
-# in a transaction.
-#
-sub _per_row_update_delete {
-  my $self = shift;
-  my ($rs, $op, $values) = @_;
-
-  my $rsrc = $rs->result_source;
-  my @pcols = $rsrc->_pri_cols;
-
-  my $guard = $self->txn_scope_guard;
-
-  # emulate the return value of $sth->execute for non-selects
-  my $row_cnt = '0E0';
-
-  my $subrs_cur = $rs->cursor;
-  my @all_pk = $subrs_cur->all;
-  for my $pks ( @all_pk) {
-
-    my $cond;
-    for my $i (0.. $#pcols) {
-      $cond->{$pcols[$i]} = $pks->[$i];
-    }
-
-    $self->$op (
-      $rsrc,
-      $op eq 'update' ? $values : (),
-      $cond,
-    );
-
-    $row_cnt++;
-  }
-
-  $guard->commit;
-
-  return $row_cnt;
-}
-
 sub _select {
   my $self = shift;
   $self->_execute($self->_select_args(@_));
diff --git a/lib/DBIx/Class/Storage/DBI/MultiColumnIn.pm b/lib/DBIx/Class/Storage/DBI/MultiColumnIn.pm
deleted file mode 100644
index a0a5586..0000000
--- a/lib/DBIx/Class/Storage/DBI/MultiColumnIn.pm
+++ /dev/null
@@ -1,62 +0,0 @@
-package DBIx::Class::Storage::DBI::MultiColumnIn;
-
-use strict;
-use warnings;
-
-use base 'DBIx::Class::Storage::DBI';
-use mro 'c3';
-
-=head1 NAME
-
-DBIx::Class::Storage::DBI::MultiColumnIn - Storage component for RDBMS supporting multicolumn in clauses
-
-=head1 DESCRIPTION
-
-While ANSI SQL does not define a multicolumn in operator, many databases can
-in fact understand WHERE (cola, colb) IN ( SELECT subcol_a, subcol_b ... )
-The storage class for any such RDBMS should inherit from this class, in order
-to dramatically speed up update/delete operations on joined multipk resultsets.
-
-At this point the only overridden method is C<_multipk_update_delete()>
-
-=cut
-
-sub _multipk_update_delete {
-  my $self = shift;
-  my ($rs, $op, $values) = @_;
-
-  my $rsrc = $rs->result_source;
-  my @pcols = $rsrc->_pri_cols;
-  my $attrs = $rs->_resolved_attrs;
-
-  # naive check - this is an internal method after all, we should know what we are doing
-  $self->throw_exception ('Number of columns selected by supplied resultset does not match number of primary keys')
-    if ( ref $attrs->{select} ne 'ARRAY' or @{$attrs->{select}} != @pcols );
-
-  # This is hideously ugly, but SQLA does not understand multicol IN expressions
-  my $sqla = $self->_sql_maker;
-  my ($sql, @bind) = @${$rs->as_query};
-  $sql = sprintf ('(%s) IN %s',   # the as_query stuff is already enclosed in ()s
-    join (', ', map { $sqla->_quote ($_) } @pcols),
-    $sql,
-  );
-
-  return $self->$op (
-    $rsrc,
-    $op eq 'update' ? $values : (),
-    \[$sql, @bind],
-  );
-
-}
-
-=head1 AUTHORS
-
-See L<DBIx::Class/CONTRIBUTORS>
-
-=head1 LICENSE
-
-You may distribute this code under the same terms as Perl itself.
-
-=cut
-
-1;
diff --git a/lib/DBIx/Class/Storage/DBI/Pg.pm b/lib/DBIx/Class/Storage/DBI/Pg.pm
index 371f185..d38f84c 100644
--- a/lib/DBIx/Class/Storage/DBI/Pg.pm
+++ b/lib/DBIx/Class/Storage/DBI/Pg.pm
@@ -3,10 +3,7 @@ package DBIx::Class::Storage::DBI::Pg;
 use strict;
 use warnings;
 
-use base qw/
-    DBIx::Class::Storage::DBI::MultiColumnIn
-/;
-use mro 'c3';
+use base qw/DBIx::Class::Storage::DBI/;
 
 use Scope::Guard ();
 use Context::Preserve 'preserve_context';
@@ -17,6 +14,7 @@ use namespace::clean;
 __PACKAGE__->sql_limit_dialect ('LimitOffset');
 __PACKAGE__->sql_quote_char ('"');
 __PACKAGE__->datetime_parser_type ('DateTime::Format::Pg');
+__PACKAGE__->_use_multicolumn_in (1);
 
 sub _determine_supports_insert_returning {
   return shift->_server_info->{normalized_dbms_version} >= 8.002
diff --git a/lib/DBIx/Class/Storage/DBI/Replicated.pm b/lib/DBIx/Class/Storage/DBI/Replicated.pm
index 7a9140a..5e02e7d 100644
--- a/lib/DBIx/Class/Storage/DBI/Replicated.pm
+++ b/lib/DBIx/Class/Storage/DBI/Replicated.pm
@@ -281,7 +281,6 @@ my $method_dispatch = {
     _prep_for_execute
     is_datatype_numeric
     _count_select
-    _subq_update_delete
     svp_rollback
     svp_begin
     svp_release
@@ -301,12 +300,10 @@ my $method_dispatch = {
     _select_args
     _dbh_execute_for_fetch
     _sql_maker
-    _per_row_update_delete
     _dbh_execute_inserts_with_no_binds
     _select_args_to_query
     _gen_sql_bind
     _svp_generate_name
-    _multipk_update_delete
     _normalize_connect_info
     _parse_connect_do
     savepoints
diff --git a/lib/DBIx/Class/Storage/DBI/mysql.pm b/lib/DBIx/Class/Storage/DBI/mysql.pm
index 35ff42a..a0c0b52 100644
--- a/lib/DBIx/Class/Storage/DBI/mysql.pm
+++ b/lib/DBIx/Class/Storage/DBI/mysql.pm
@@ -3,16 +3,14 @@ package DBIx::Class::Storage::DBI::mysql;
 use strict;
 use warnings;
 
-use base qw/
-  DBIx::Class::Storage::DBI::MultiColumnIn
-  DBIx::Class::Storage::DBI
-/;
-use mro 'c3';
+use base qw/DBIx::Class::Storage::DBI/;
 
 __PACKAGE__->sql_maker_class('DBIx::Class::SQLMaker::MySQL');
 __PACKAGE__->sql_limit_dialect ('LimitXY');
 __PACKAGE__->sql_quote_char ('`');
 
+__PACKAGE__->_use_multicolumn_in (1);
+
 sub with_deferred_fk_checks {
   my ($self, $sub) = @_;
 
@@ -115,12 +113,6 @@ sub lag_behind_master {
     return shift->_get_dbh->selectrow_hashref('show slave status')->{Seconds_Behind_Master};
 }
 
-# MySql can not do subquery update/deletes, only way is slow per-row operations.
-# This assumes you have set proper transaction isolation and use innodb.
-sub _subq_update_delete {
-  return shift->_per_row_update_delete (@_);
-}
-
 1;
 
 =head1 NAME
diff --git a/t/71mysql.t b/t/71mysql.t
index 3ed8493..c2318f8 100644
--- a/t/71mysql.t
+++ b/t/71mysql.t
@@ -299,8 +299,6 @@ NULLINSEARCH: {
       join => 'books', group_by => [ 'me.id', 'books.id' ]
     })->count();
   }, 'count on grouped columns with the same name does not throw');
-
-
 }
 
 ZEROINSEARCH: {
diff --git a/t/resultset/update_delete.t b/t/resultset/update_delete.t
index 4a9eab4..3de8bdb 100644
--- a/t/resultset/update_delete.t
+++ b/t/resultset/update_delete.t
@@ -8,7 +8,12 @@ use DBICTest;
 use DBIC::DebugObj;
 use DBIC::SqlMakerTest;
 
-my $schema = DBICTest->init_schema();
+my $schema = DBICTest->init_schema;
+
+my ($sql, @bind);
+my $debugobj = DBIC::DebugObj->new (\$sql, \@bind);
+my $orig_debugobj = $schema->storage->debugobj;
+my $orig_debug = $schema->storage->debug;
 
 my $tkfks = $schema->resultset('FourKeys_to_TwoKeys');
 
@@ -47,15 +52,54 @@ my $fks = $schema->resultset ('FourKeys')
                   ->search ({ map { $_ => [1, 2] } qw/foo bar hello goodbye/}, { join => 'fourkeys_to_twokeys' });
 
 is ($fks->count, 4, 'Joined FourKey count correct (2x2)');
+
+$schema->storage->debugobj ($debugobj);
+$schema->storage->debug (1);
 $fks->update ({ read_count => \ 'read_count + 1' });
-$_->discard_changes for ($fa, $fb);
+$schema->storage->debugobj ($orig_debugobj);
+$schema->storage->debug ($orig_debug);
 
-is ($fa->read_count, 11, 'Update ran only once on joined resultset');
-is ($fb->read_count, 21, 'Update ran only once on joined resultset');
+is_same_sql_bind (
+  $sql,
+  \@bind,
+  'UPDATE fourkeys
+   SET read_count = read_count + 1
+   WHERE ( bar = ? AND foo = ? AND goodbye = ? AND hello = ? ) OR ( bar = ? AND foo = ? AND goodbye = ? AND hello = ? )',
+  [ map { "'$_'" } ( (1) x 4, (2) x 4 ) ],
+  'Correct update-SQL without multicolumn in support',
+);
 
+is ($fa->discard_changes->read_count, 11, 'Update ran only once on joined resultset');
+is ($fb->discard_changes->read_count, 21, 'Update ran only once on joined resultset');
+
+# try the same sql with forced multicolumn in
+$schema->storage->_use_multicolumn_in (1);
+$schema->storage->debugobj ($debugobj);
+$schema->storage->debug (1);
+eval { $fks->update ({ read_count => \ 'read_count + 1' }) }; # this can't actually execute, we just need the "as_query"
+$schema->storage->_use_multicolumn_in (undef);
+$schema->storage->debugobj ($orig_debugobj);
+$schema->storage->debug ($orig_debug);
+
+is_same_sql_bind (
+  $sql,
+  \@bind,
+  'UPDATE fourkeys
+    SET read_count = read_count + 1
+    WHERE (
+      (foo, bar, hello, goodbye) IN (
+        SELECT me.foo, me.bar, me.hello, me.goodbye
+          FROM fourkeys me
+        WHERE ( bar = ? OR bar = ? ) AND ( foo = ? OR foo = ? ) AND ( goodbye = ? OR goodbye = ? ) AND ( hello = ? OR hello = ? )
+      )
+    )
+  ',
+  [ map { "'$_'" } ( (1, 2) x 4 ) ],
+  'Correct update-SQL with multicolumn in support',
+);
 
 #
-# Make sure multicolumn in or the equivalen functions correctly
+# Make sure multicolumn in or the equivalent functions correctly
 #
 
 my $sub_rs = $tkfks->search (
@@ -81,7 +125,7 @@ throws_ok (
 $sub_rs->search (
   {},
   {
-    group_by => [ reverse $sub_rs->result_source->primary_columns ],     # reverse to make sure the PK-list comaprison works
+    group_by => [ reverse $sub_rs->result_source->primary_columns ],     # reverse to make sure the PK-list comparison works
   },
 )->update ({ pilot_sequence => \ 'pilot_sequence + 1' });
 
@@ -138,11 +182,7 @@ is ($tkfks->count, $tkfk_cnt -= 1, 'Only one row deleted');
 
 # Make sure prefetch is properly stripped too
 # check with sql-equality, as sqlite will accept bad sql just fine
-my ($sql, @bind);
-my $orig_debugobj = $schema->storage->debugobj;
-my $orig_debug = $schema->storage->debug;
-
-$schema->storage->debugobj (DBIC::DebugObj->new (\$sql, \@bind) );
+$schema->storage->debugobj ($debugobj);
 $schema->storage->debug (1);
 $schema->resultset('CD')->search(
   { year => { '!=' => 2010 } },
@@ -155,7 +195,7 @@ $schema->storage->debug ($orig_debug);
 is_same_sql_bind (
   $sql,
   \@bind,
-  'DELETE FROM cd WHERE ( cdid IN ( SELECT me.cdid FROM cd me WHERE ( year != ? ) GROUP BY me.cdid ) )',
+  'DELETE FROM cd WHERE ( cdid IN ( SELECT me.cdid FROM cd me WHERE ( year != ? ) ) )',
   ["'2010'"],
   'Update on prefetching resultset strips prefetch correctly'
 );