From: Peter Rabbitson <ribasushi@cpan.org>
Date: Sat, 16 May 2009 07:35:47 +0000 (+0000)
Subject: Merge 'trunk' into 'joined_count'
X-Git-Tag: v0.08103~64^2~7
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5597834e4341123c5d6782ec8c0f66b4e7934e1d;hp=d6191de1a556525075eae5803a90f9fe5a05b5ad;p=dbsrgits%2FDBIx-Class.git

Merge 'trunk' into 'joined_count'

r6264@Thesaurus (orig r6263):  ribasushi | 2009-05-14 15:40:19 +0200
 r6121@Thesaurus (orig r6120):  caelum | 2009-05-04 17:13:22 +0200
 Making new branch for storage tweaking
 r6122@Thesaurus (orig r6121):  caelum | 2009-05-04 20:07:47 +0200
 support hashrefs for connect_replicants
 r6123@Thesaurus (orig r6122):  caelum | 2009-05-04 23:07:43 +0200
 ::Replicated - test hashref for connect_replicants and croak on coderef, switch to MX::Types, make test less noisy
 r6143@Thesaurus (orig r6142):  caelum | 2009-05-06 05:13:56 +0200
 fix ::DBI::Replicated::all_storages
 r6144@Thesaurus (orig r6143):  caelum | 2009-05-06 05:25:04 +0200
 Replicated - fixup types and namespace::clean
 r6147@Thesaurus (orig r6146):  caelum | 2009-05-06 15:29:39 +0200
 ::DBI:Replicated - merge connect_info from master to replicants
 r6184@Thesaurus (orig r6183):  caelum | 2009-05-08 18:08:29 +0200
 support ::DBI::Replicated opts in connect_info
 r6190@Thesaurus (orig r6189):  caelum | 2009-05-09 05:31:15 +0200
 ::DBI::Replicated - add master_read_weight to ::Random balancer_type
 r6191@Thesaurus (orig r6190):  caelum | 2009-05-09 12:50:25 +0200
 ::DBI::Replicated - fix fallback to master, test for the warning, other cleanups
 r6193@Thesaurus (orig r6192):  caelum | 2009-05-09 13:52:52 +0200
 updated Changes
 r6194@Thesaurus (orig r6193):  caelum | 2009-05-09 14:21:44 +0200
 ::DBI::Replicated - don't build pool/balancer from connect_info unless necessary

r6268@Thesaurus (orig r6267):  caelum | 2009-05-15 04:04:12 +0200
minor replication changes - use a real hash merge, clarify master_read_weight, really done with this now.
r6272@Thesaurus (orig r6271):  abraxxa | 2009-05-15 13:45:54 +0200
added Static sub-classing DBIx::Class result classes section to the cookbook

r6277@Thesaurus (orig r6276):  ribasushi | 2009-05-16 00:46:00 +0200
Optimize some Ordered.pm code
r6278@Thesaurus (orig r6277):  ribasushi | 2009-05-16 08:02:18 +0200
Cleanup tests
r6280@Thesaurus (orig r6279):  ribasushi | 2009-05-16 09:30:05 +0200
Not sure what this part of the test is for, but it breaks custom resultsets, and the test passes without it. Removing as a possible remnant of an ancient civilization
r6281@Thesaurus (orig r6280):  ribasushi | 2009-05-16 09:33:24 +0200
Add default resultclass/resultsetclass to the entire test schema, as I am tired of typing extra shit for debugging purposes
r6282@Thesaurus (orig r6281):  ribasushi | 2009-05-16 09:34:21 +0200
Now we can do diag $rs->hri_dump, ain't that nice
---

diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm
index e21feeb..079e0e4 100644
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -1150,11 +1150,45 @@ on the resultset and counts the results of that.
 
 =cut
 
+my @count_via_subq_attrs = qw/join seen_join group_by/;
 sub count {
   my $self = shift;
   return $self->search(@_)->count if @_ and defined $_[0];
   return scalar @{ $self->get_cache } if $self->get_cache;
-  my $count = $self->_count;
+
+  my @check_attrs = @count_via_subq_attrs;
+
+  # if we are not paged - we are simply asking for a limit
+  if (not $self->{attrs}{page} and not $self->{attrs}{software_limit}) {
+    push @check_attrs, qw/rows offset/;
+  }
+
+  return $self->_has_attr (@check_attrs)
+    ? $self->_count_subq
+    : $self->_count_simple
+}
+
+sub _count_subq {
+  my $self = shift;
+
+  my $attrs = { %{$self->_resolved_attrs} };
+
+  my $select_cols = $attrs->{group_by} || [ map { "$attrs->{alias}.$_" } ($self->result_source->primary_columns) ];
+  $attrs->{from} = [{
+    count_subq => $self->search ({}, { columns => $select_cols, group_by => $select_cols })
+                         ->as_query
+  }];
+
+  # the subquery above will integrate everything, including 'where' and any pagers
+  delete $attrs->{$_} for (@count_via_subq_attrs, qw/where rows offset pager page/ );
+
+  return $self->__count ($attrs);
+}
+
+sub _count_simple {
+  my $self = shift;
+
+  my $count = $self->__count;
   return 0 unless $count;
 
   # need to take offset from resolved attrs
@@ -1166,26 +1200,20 @@ sub count {
   return $count;
 }
 
-sub _count { # Separated out so pager can get the full count
-  my $self = shift;
-  my $attrs = { %{$self->_resolved_attrs} };
-
-  if (my $group_by = $attrs->{group_by}) {
-    delete $attrs->{order_by};
+sub __count {
+  my ($self, $attrs) = @_;
 
-    $attrs->{select} = $group_by; 
-    $attrs->{from} = [ { 'mesub' => (ref $self)->new($self->result_source, $attrs)->cursor->as_query } ];
-    delete $attrs->{where};
-  }
+  $attrs ||= { %{$self->{attrs}} };
 
   $attrs->{select} = { count => '*' };
   $attrs->{as} = [qw/count/];
 
-  # offset, order by, group by, where and page are not needed to count. record_filter is cdbi
-  delete $attrs->{$_} for qw/rows offset order_by group_by page pager record_filter/;
+  # take off any pagers, record_filter is cdbi, and no point of ordering a count
+  delete $attrs->{$_} for qw/rows offset page pager order_by record_filter/;
 
   my $tmp_rs = (ref $self)->new($self->result_source, $attrs);
   my ($count) = $tmp_rs->cursor->next;
+
   return $count;
 }
 
@@ -1298,6 +1326,18 @@ sub first {
   return $_[0]->reset->next;
 }
 
+
+# _update_delete_via_subq
+#
+# Presence of some rs attributes requires a subquery to reliably 
+# update/deletre
+#
+
+sub _update_delete_via_subq {
+  return $_[0]->_has_attr (qw/join seen_join group_by row offset page/);
+}
+
+
 # _cond_for_update_delete
 #
 # update/delete require the condition to be modified to handle
@@ -1312,18 +1352,6 @@ sub _cond_for_update_delete {
   # No-op. No condition, we're updating/deleting everything
   return $cond unless ref $full_cond;
 
-  # Some attributes when present require a subquery
-  # This might not work on some database (mysql), but...
-  # it won't work without the subquery either so who cares
-  if (grep { defined $self->{attrs}{$_} } qw/join seen_join from rows group_by/) {
-
-    foreach my $pk ($self->result_source->primary_columns) {
-      $cond->{$pk} = { IN => $self->get_column($pk)->as_query };
-    }
-
-    return $cond;
-  }
-
   if (ref $full_cond eq 'ARRAY') {
     $cond = [
       map {
@@ -1363,7 +1391,7 @@ sub _cond_for_update_delete {
   else {
     $self->throw_exception("Can't update/delete on resultset with condition unless hash or array");
   }
- 
+
   return $cond;
 }
 
@@ -1386,11 +1414,16 @@ if no records were updated; exact type of success value is storage-dependent.
 
 sub update {
   my ($self, $values) = @_;
-  $self->throw_exception("Values for update must be a hash")
+  $self->throw_exception('Values for update must be a hash')
     unless ref $values eq 'HASH';
 
+  # rs operations with subqueries are Storage dependent - delegate
+  if ($self->_update_delete_via_subq) {
+    return $self->result_source->storage->subq_update_delete($self, 'update', $values);
+  }
+
   my $cond = $self->_cond_for_update_delete;
-  
+
   return $self->result_source->storage->update(
     $self->result_source, $values, $cond
   );
@@ -1413,7 +1446,7 @@ will run DBIC cascade triggers, while L</update> will not.
 
 sub update_all {
   my ($self, $values) = @_;
-  $self->throw_exception("Values for update must be a hash")
+  $self->throw_exception('Values for update_all must be a hash')
     unless ref $values eq 'HASH';
   foreach my $obj ($self->all) {
     $obj->set_columns($values)->update;
@@ -1444,9 +1477,14 @@ need to respecify your query in a way that can be expressed without a join.
 =cut
 
 sub delete {
-  my ($self) = @_;
-  $self->throw_exception("Delete should not be passed any arguments")
-    if $_[1];
+  my $self = shift;
+  $self->throw_exception('delete does not accept any arguments')
+    if @_;
+
+  # rs operations with subqueries are Storage dependent - delegate
+  if ($self->_update_delete_via_subq) {
+    return $self->result_source->storage->subq_update_delete($self, 'delete');
+  }
 
   my $cond = $self->_cond_for_update_delete;
 
@@ -1470,7 +1508,10 @@ will run DBIC cascade triggers, while L</delete> will not.
 =cut
 
 sub delete_all {
-  my ($self) = @_;
+  my $self = shift;
+  $self->throw_exception('delete_all does not accept any arguments')
+    if @_;
+
   $_->delete for $self->all;
   return 1;
 }
@@ -1672,7 +1713,7 @@ sub pager {
     unless $self->{attrs}{page};
   $attrs->{rows} ||= 10;
   return $self->{pager} ||= Data::Page->new(
-    $self->_count, $attrs->{rows}, $self->{attrs}{page});
+    $self->__count, $attrs->{rows}, $self->{attrs}{page});
 }
 
 =head2 page
@@ -1776,6 +1817,37 @@ sub _is_deterministic_value {
   return 0;
 }
 
+# _has_attr
+#
+# determines if the resultset defines at least one
+# of the attributes supplied
+#
+# used to determine if a subquery is neccessary
+
+sub _has_attr {
+  my ($self, @attr_names) = @_;
+
+  my $attrs = $self->_resolved_attrs;
+
+  my $join_check_req;
+
+  for my $n (@attr_names) {
+    return 1 if defined $attrs->{$n};
+    ++$join_check_req if $n =~ /join/;
+  }
+
+  # a join can be expressed as a multi-level from
+  return 1 if (
+    $join_check_req
+      and
+    ref $attrs->{from} eq 'ARRAY'
+      and
+    @{$attrs->{from}} > 1 
+  );
+
+  return 0;
+}
+
 # _collapse_cond
 #
 # Recursively collapse the condition.
diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm
index 8df1894..8eb67fa 100644
--- a/lib/DBIx/Class/Storage/DBI.pm
+++ b/lib/DBIx/Class/Storage/DBI.pm
@@ -1051,6 +1051,85 @@ sub delete {
   return $self->_execute('delete' => [], $source, $bind_attrs, @_);
 }
 
+# We were sent here because the $rs contains a complex search
+# which will require a subquery to select the correct rows
+# (i.e. joined or limited resultsets)
+#
+# Genarating a single PK column subquery is trivial and supported
+# by all RDBMS. However if we have a multicolumn PK, things get ugly.
+# Look at multipk_update_delete()
+sub subq_update_delete {
+  my $self = shift;
+  my ($rs, $op, $values) = @_;
+
+  if ($rs->result_source->primary_columns == 1) {
+    return $self->_onepk_update_delete (@_);
+  }
+  else {
+    return $self->_multipk_update_delete (@_);
+  }
+}
+
+# Generally a single PK resultset operation is trivially expressed
+# with PK IN (subquery). However some databases (mysql) do not support
+# modification of a table mentioned in the subselect. This method
+# should be overriden in the appropriate storage class to be smarter
+# in such situations
+sub _onepk_update_delete {
+
+  my $self = shift;
+  my ($rs, $op, $values) = @_;
+
+  my $rsrc = $rs->result_source;
+  my @pcols = $rsrc->primary_columns;
+
+  return $self->$op (
+    $rsrc,
+    $op eq 'update' ? $values : (),
+    { $pcols[0] => { -in => $rs->get_column ($pcols[0])->as_query } },
+  );
+}
+
+# ANSI SQL does not provide a reliable way to perform a multicol-PK
+# resultset update/delete involving subqueries. So resort to simple
+# (and inefficient) delete_all style per-row opearations, while allowing
+# specific storages to override this with a faster implementation.
+#
+# We do not use $row->$op style queries, because resultset update/delete
+# is not expected to cascade (this is what delete_all/update_all is for).
+#
+# There should be no race conditions as the entire operation is rolled
+# in a transaction.
+sub _multipk_update_delete {
+  my $self = shift;
+  my ($rs, $op, $values) = @_;
+
+  my $rsrc = $rs->result_source;
+  my @pcols = $rsrc->primary_columns;
+
+  my $guard = $self->txn_scope_guard;
+
+  my $subrs_cur = $rs->search ({}, { columns => \@pcols })->cursor;
+  while (my @pks = $subrs_cur->next) {
+
+    my $cond;
+    for my $i (0.. $#pcols) {
+      $cond->{$pcols[$i]} = $pks[$i];
+    }
+
+    $self->$op (
+      $rsrc,
+      $op eq 'update' ? $values : (),
+      $cond,
+    );
+  }
+
+  $guard->commit;
+
+  return 1;
+}
+
+
 sub _select {
   my $self = shift;
   my $sql_maker = $self->sql_maker;
diff --git a/t/19quotes.t b/t/19quotes.t
index 622eefb..75b4668 100644
--- a/t/19quotes.t
+++ b/t/19quotes.t
@@ -35,9 +35,12 @@ $rs = $schema->resultset('CD')->search(
            { join => 'artist' });
 eval { $rs->count };
 is_same_sql_bind(
-  $sql, \@bind,
-  "SELECT COUNT( * ) FROM `cd` `me`  JOIN `artist` `artist` ON ( `artist`.`artistid` = `me`.`artist` ) WHERE ( `artist`.`name` = ? AND `me`.`year` = ? )", ["'Caterwauler McCrae'", "'2001'"],
-  'got correct SQL for count query with quoting'
+  $sql,
+  \@bind,
+  "SELECT COUNT( * ) FROM (SELECT `me`.`cdid` FROM `cd` `me`  JOIN `artist` `artist` ON `artist`.`artistid` = `me`.`artist` WHERE ( ( `artist`.`name` = ? AND `me`.`year` = ? ) ) GROUP BY `me`.`cdid`) `count_subq`",
+  ["'Caterwauler McCrae'", "'2001'"],
+
+  'got correct SQL for joined count query with quoting'
 );
 
 my $order = 'year DESC';
@@ -59,8 +62,10 @@ $rs = $schema->resultset('CD')->search(
            { join => 'artist' });
 eval { $rs->count };
 is_same_sql_bind(
-  $sql, \@bind,
-  "SELECT COUNT( * ) FROM [cd] [me]  JOIN [artist] [artist] ON ( [artist].[artistid] = [me].[artist] ) WHERE ( [artist].[name] = ? AND [me].[year] = ? )", ["'Caterwauler McCrae'", "'2001'"],
+  $sql,
+  \@bind,
+  "SELECT COUNT( * ) FROM (SELECT [me].[cdid] FROM [cd] [me]  JOIN [artist] [artist] ON [artist].[artistid] = [me].[artist] WHERE ( ( [artist].[name] = ? AND [me].[year] = ? ) ) GROUP BY [me].[cdid]) [count_subq]",
+  ["'Caterwauler McCrae'", "'2001'"],
   'got correct SQL for count query with bracket quoting'
 );
 
diff --git a/t/19quotes_newstyle.t b/t/19quotes_newstyle.t
index 80e6d04..4e49117 100644
--- a/t/19quotes_newstyle.t
+++ b/t/19quotes_newstyle.t
@@ -41,8 +41,10 @@ $rs = $schema->resultset('CD')->search(
            { join => 'artist' });
 eval { $rs->count };
 is_same_sql_bind(
-  $sql, \@bind,
-  "SELECT COUNT( * ) FROM `cd` `me`  JOIN `artist` `artist` ON ( `artist`.`artistid` = `me`.`artist` ) WHERE ( `artist`.`name` = ? AND `me`.`year` = ? )", ["'Caterwauler McCrae'", "'2001'"],
+  $sql,
+  \@bind,
+  "SELECT COUNT( * ) FROM (SELECT `me`.`cdid` FROM `cd` `me`  JOIN `artist` `artist` ON `artist`.`artistid` = `me`.`artist` WHERE ( ( `artist`.`name` = ? AND `me`.`year` = ? ) ) GROUP BY `me`.`cdid`) `count_subq`",
+  ["'Caterwauler McCrae'", "'2001'"],
   'got correct SQL for count query with quoting'
 );
 
@@ -72,8 +74,10 @@ $rs = $schema->resultset('CD')->search(
            { join => 'artist' });
 eval { $rs->count };
 is_same_sql_bind(
-  $sql, \@bind,
-  "SELECT COUNT( * ) FROM [cd] [me]  JOIN [artist] [artist] ON ( [artist].[artistid] = [me].[artist] ) WHERE ( [artist].[name] = ? AND [me].[year] = ? )", ["'Caterwauler McCrae'", "'2001'"],
+  $sql,
+  \@bind,
+  "SELECT COUNT( * ) FROM (SELECT [me].[cdid] FROM [cd] [me]  JOIN [artist] [artist] ON [artist].[artistid] = [me].[artist] WHERE ( ( [artist].[name] = ? AND [me].[year] = ? ) ) GROUP BY [me].[cdid]) [count_subq]",
+  ["'Caterwauler McCrae'", "'2001'"],
   'got correct SQL for count query with bracket quoting'
 );
 
diff --git a/t/60core.t b/t/60core.t
index 187164d..a568c6e 100644
--- a/t/60core.t
+++ b/t/60core.t
@@ -221,8 +221,13 @@ my $search = [ { 'tags.tag' => 'Cheesy' }, { 'tags.tag' => 'Blue' } ];
 
 my( $or_rs ) = $schema->resultset("CD")->search_rs($search, { join => 'tags',
                                                   order_by => 'cdid' });
-
-is($or_rs->count, 5, 'Search with OR ok');
+# At this point in the test there are:
+# 1 artist with the cheesy AND blue tag
+# 1 artist with the cheesy tag
+# 2 artists with the blue tag
+#
+# Formerly this test expected 5 as there was no collapsing of the AND condition
+is($or_rs->count, 4, 'Search with OR ok');
 
 my $distinct_rs = $schema->resultset("CD")->search($search, { join => 'tags', distinct => 1 });
 is($distinct_rs->all, 4, 'DISTINCT search with OR ok');
@@ -260,7 +265,13 @@ my $tag_rs = $schema->resultset('Tag')->search(
 
 my $rel_rs = $tag_rs->search_related('cd');
 
-is($rel_rs->count, 5, 'Related search ok');
+# At this point in the test there are:
+# 1 artist with the cheesy AND blue tag
+# 1 artist with the cheesy tag
+# 2 artists with the blue tag
+#
+# Formerly this test expected 5 as there was no collapsing of the AND condition
+is($rel_rs->count, 4, 'Related search ok');
 
 is($or_rs->next->cdid, $rel_rs->next->cdid, 'Related object ok');
 $or_rs->reset;
diff --git a/t/90join_torture.t b/t/90join_torture.t
index a277475..494237f 100644
--- a/t/90join_torture.t
+++ b/t/90join_torture.t
@@ -45,10 +45,24 @@ my @cds = $artists2[0]->cds;
 cmp_ok(scalar @cds, '==', 1, "condition based on inherited join okay");
 
 my $rs3 = $rs2->search_related('cds');
-cmp_ok(scalar($rs3->all), '==', 45, "All cds for artist returned");
 
+# $rs3 selects * from cds_2, with the following join map
+#
+# artist -> cds_2
+#   |
+#   V
+#  cds -> cd_to_producer -> producer
+#   |
+#   |\--> tags
+#   V
+# tracks
+#
+# For some reason it is expected to return an exploded set of rows instead of the
+# logical 3, even for a rowobject retrieval - why?
+#
+cmp_ok(scalar($rs3->all), '==', 45, "All cds for artist returned");
 
-cmp_ok($rs3->count, '==', 45, "All cds for artist returned via count");
+cmp_ok($rs3->count, '==', 3, "All cds for artist returned via count");
 
 my $rs4 = $schema->resultset("CD")->search({ 'artist.artistid' => '1' }, { join => ['tracks', 'artist'], prefetch => 'artist' });
 my @rs4_results = $rs4->all;
diff --git a/t/count/count_distinct.t b/t/count/distinct.t
similarity index 100%
rename from t/count/count_distinct.t
rename to t/count/distinct.t
diff --git a/t/count/count_joined.t b/t/count/joined.t
similarity index 81%
rename from t/count/count_joined.t
rename to t/count/joined.t
index 992d23b..139f9cd 100644
--- a/t/count/count_joined.t
+++ b/t/count/joined.t
@@ -11,8 +11,7 @@ plan tests => 1;
 
 my $schema = DBICTest->init_schema();
 
-TODO: {
-  local $TODO = 'Needs -paren fixes in SQLA before it can work';
+{
   my $cds = $schema->resultset("CD")->search({ cdid => 1 }, { join => { cd_to_producer => 'producer' } });
   is($cds->count, 1, "extra joins do not explode single entity count");
 }
diff --git a/t/delete/related.t b/t/delete/related.t
index f3fb78b..39aa429 100644
--- a/t/delete/related.t
+++ b/t/delete/related.t
@@ -4,7 +4,7 @@ use warnings;
 use lib qw(t/lib);
 use DBICTest;
 
-plan tests => 3;
+plan tests => 6;
 
 my $schema = DBICTest->init_schema();
 
@@ -43,3 +43,46 @@ is ($cdrs->count, $total_cds -= 2, 'related + condition delete ok');
 # test that related deletion with limit condition works
 $a2_cds->search ({}, { rows => 1})->delete;
 is ($cdrs->count, $total_cds -= 1, 'related + limit delete ok');
+
+my $tkfk = $schema->resultset('FourKeys_to_TwoKeys');
+
+my ($fa, $fb) = $tkfk->related_resultset ('fourkeys')->populate ([
+  [qw/foo bar hello goodbye sensors/],
+  [qw/1   1   1     1       a      /],
+  [qw/2   2   2     2       b      /],
+]);
+
+# This is already provided by DBICTest
+#my ($ta, $tb) = $tkfk->related_resultset ('twokeys')->populate ([
+#  [qw/artist  cd /],
+#  [qw/1       1  /],
+#  [qw/2       2  /],
+#]);
+my ($ta, $tb) = $schema->resultset ('TwoKeys')
+                  ->search ( [ { artist => 1, cd => 1 }, { artist => 2, cd => 2 } ])
+                    ->all;
+
+my $tkfk_cnt = $tkfk->count;
+
+my $non_void_ctx = $tkfk->populate ([
+  { autopilot => 'a', fourkeys =>  $fa, twokeys => $ta },
+  { autopilot => 'b', fourkeys =>  $fb, twokeys => $tb },
+  { autopilot => 'x', fourkeys =>  $fa, twokeys => $tb },
+  { autopilot => 'y', fourkeys =>  $fb, twokeys => $ta },
+]);
+is ($tkfk->count, $tkfk_cnt += 4, 'FourKeys_to_TwoKeys populated succesfully');
+
+my $sub_rs = $tkfk->search (
+  [ 
+    { map { $_ => 1 } qw/artist.artistid cd.cdid fourkeys.foo fourkeys.bar fourkeys.hello fourkeys.goodbye/ },
+    { map { $_ => 2 } qw/artist.artistid cd.cdid fourkeys.foo fourkeys.bar fourkeys.hello fourkeys.goodbye/ },
+  ],
+  {
+    join => [ 'fourkeys', { twokeys => [qw/artist cd/] } ],
+  },
+);
+
+is ($sub_rs->count, 2, 'Only two rows from fourkeys match');
+$sub_rs->delete;
+
+is ($tkfk->count, $tkfk_cnt -= 2, 'Only two rows deleted');