From: Peter Rabbitson Date: Sat, 16 May 2009 07:35:47 +0000 (+0000) Subject: Merge 'trunk' into 'joined_count' X-Git-Tag: v0.08103~64^2~7 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=5597834e4341123c5d6782ec8c0f66b4e7934e1d;hp=d6191de1a556525075eae5803a90f9fe5a05b5ad;p=dbsrgits%2FDBIx-Class.git Merge 'trunk' into 'joined_count' r6264@Thesaurus (orig r6263): ribasushi | 2009-05-14 15:40:19 +0200 r6121@Thesaurus (orig r6120): caelum | 2009-05-04 17:13:22 +0200 Making new branch for storage tweaking r6122@Thesaurus (orig r6121): caelum | 2009-05-04 20:07:47 +0200 support hashrefs for connect_replicants r6123@Thesaurus (orig r6122): caelum | 2009-05-04 23:07:43 +0200 ::Replicated - test hashref for connect_replicants and croak on coderef, switch to MX::Types, make test less noisy r6143@Thesaurus (orig r6142): caelum | 2009-05-06 05:13:56 +0200 fix ::DBI::Replicated::all_storages r6144@Thesaurus (orig r6143): caelum | 2009-05-06 05:25:04 +0200 Replicated - fixup types and namespace::clean r6147@Thesaurus (orig r6146): caelum | 2009-05-06 15:29:39 +0200 ::DBI:Replicated - merge connect_info from master to replicants r6184@Thesaurus (orig r6183): caelum | 2009-05-08 18:08:29 +0200 support ::DBI::Replicated opts in connect_info r6190@Thesaurus (orig r6189): caelum | 2009-05-09 05:31:15 +0200 ::DBI::Replicated - add master_read_weight to ::Random balancer_type r6191@Thesaurus (orig r6190): caelum | 2009-05-09 12:50:25 +0200 ::DBI::Replicated - fix fallback to master, test for the warning, other cleanups r6193@Thesaurus (orig r6192): caelum | 2009-05-09 13:52:52 +0200 updated Changes r6194@Thesaurus (orig r6193): caelum | 2009-05-09 14:21:44 +0200 ::DBI::Replicated - don't build pool/balancer from connect_info unless necessary r6268@Thesaurus (orig r6267): caelum | 2009-05-15 04:04:12 +0200 minor replication changes - use a real hash merge, clarify master_read_weight, really done with this now. r6272@Thesaurus (orig r6271): abraxxa | 2009-05-15 13:45:54 +0200 added Static sub-classing DBIx::Class result classes section to the cookbook r6277@Thesaurus (orig r6276): ribasushi | 2009-05-16 00:46:00 +0200 Optimize some Ordered.pm code r6278@Thesaurus (orig r6277): ribasushi | 2009-05-16 08:02:18 +0200 Cleanup tests r6280@Thesaurus (orig r6279): ribasushi | 2009-05-16 09:30:05 +0200 Not sure what this part of the test is for, but it breaks custom resultsets, and the test passes without it. Removing as a possible remnant of an ancient civilization r6281@Thesaurus (orig r6280): ribasushi | 2009-05-16 09:33:24 +0200 Add default resultclass/resultsetclass to the entire test schema, as I am tired of typing extra shit for debugging purposes r6282@Thesaurus (orig r6281): ribasushi | 2009-05-16 09:34:21 +0200 Now we can do diag $rs->hri_dump, ain't that nice --- diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm index e21feeb..079e0e4 100644 --- a/lib/DBIx/Class/ResultSet.pm +++ b/lib/DBIx/Class/ResultSet.pm @@ -1150,11 +1150,45 @@ on the resultset and counts the results of that. =cut +my @count_via_subq_attrs = qw/join seen_join group_by/; sub count { my $self = shift; return $self->search(@_)->count if @_ and defined $_[0]; return scalar @{ $self->get_cache } if $self->get_cache; - my $count = $self->_count; + + my @check_attrs = @count_via_subq_attrs; + + # if we are not paged - we are simply asking for a limit + if (not $self->{attrs}{page} and not $self->{attrs}{software_limit}) { + push @check_attrs, qw/rows offset/; + } + + return $self->_has_attr (@check_attrs) + ? $self->_count_subq + : $self->_count_simple +} + +sub _count_subq { + my $self = shift; + + my $attrs = { %{$self->_resolved_attrs} }; + + my $select_cols = $attrs->{group_by} || [ map { "$attrs->{alias}.$_" } ($self->result_source->primary_columns) ]; + $attrs->{from} = [{ + count_subq => $self->search ({}, { columns => $select_cols, group_by => $select_cols }) + ->as_query + }]; + + # the subquery above will integrate everything, including 'where' and any pagers + delete $attrs->{$_} for (@count_via_subq_attrs, qw/where rows offset pager page/ ); + + return $self->__count ($attrs); +} + +sub _count_simple { + my $self = shift; + + my $count = $self->__count; return 0 unless $count; # need to take offset from resolved attrs @@ -1166,26 +1200,20 @@ sub count { return $count; } -sub _count { # Separated out so pager can get the full count - my $self = shift; - my $attrs = { %{$self->_resolved_attrs} }; - - if (my $group_by = $attrs->{group_by}) { - delete $attrs->{order_by}; +sub __count { + my ($self, $attrs) = @_; - $attrs->{select} = $group_by; - $attrs->{from} = [ { 'mesub' => (ref $self)->new($self->result_source, $attrs)->cursor->as_query } ]; - delete $attrs->{where}; - } + $attrs ||= { %{$self->{attrs}} }; $attrs->{select} = { count => '*' }; $attrs->{as} = [qw/count/]; - # offset, order by, group by, where and page are not needed to count. record_filter is cdbi - delete $attrs->{$_} for qw/rows offset order_by group_by page pager record_filter/; + # take off any pagers, record_filter is cdbi, and no point of ordering a count + delete $attrs->{$_} for qw/rows offset page pager order_by record_filter/; my $tmp_rs = (ref $self)->new($self->result_source, $attrs); my ($count) = $tmp_rs->cursor->next; + return $count; } @@ -1298,6 +1326,18 @@ sub first { return $_[0]->reset->next; } + +# _update_delete_via_subq +# +# Presence of some rs attributes requires a subquery to reliably +# update/deletre +# + +sub _update_delete_via_subq { + return $_[0]->_has_attr (qw/join seen_join group_by row offset page/); +} + + # _cond_for_update_delete # # update/delete require the condition to be modified to handle @@ -1312,18 +1352,6 @@ sub _cond_for_update_delete { # No-op. No condition, we're updating/deleting everything return $cond unless ref $full_cond; - # Some attributes when present require a subquery - # This might not work on some database (mysql), but... - # it won't work without the subquery either so who cares - if (grep { defined $self->{attrs}{$_} } qw/join seen_join from rows group_by/) { - - foreach my $pk ($self->result_source->primary_columns) { - $cond->{$pk} = { IN => $self->get_column($pk)->as_query }; - } - - return $cond; - } - if (ref $full_cond eq 'ARRAY') { $cond = [ map { @@ -1363,7 +1391,7 @@ sub _cond_for_update_delete { else { $self->throw_exception("Can't update/delete on resultset with condition unless hash or array"); } - + return $cond; } @@ -1386,11 +1414,16 @@ if no records were updated; exact type of success value is storage-dependent. sub update { my ($self, $values) = @_; - $self->throw_exception("Values for update must be a hash") + $self->throw_exception('Values for update must be a hash') unless ref $values eq 'HASH'; + # rs operations with subqueries are Storage dependent - delegate + if ($self->_update_delete_via_subq) { + return $self->result_source->storage->subq_update_delete($self, 'update', $values); + } + my $cond = $self->_cond_for_update_delete; - + return $self->result_source->storage->update( $self->result_source, $values, $cond ); @@ -1413,7 +1446,7 @@ will run DBIC cascade triggers, while L will not. sub update_all { my ($self, $values) = @_; - $self->throw_exception("Values for update must be a hash") + $self->throw_exception('Values for update_all must be a hash') unless ref $values eq 'HASH'; foreach my $obj ($self->all) { $obj->set_columns($values)->update; @@ -1444,9 +1477,14 @@ need to respecify your query in a way that can be expressed without a join. =cut sub delete { - my ($self) = @_; - $self->throw_exception("Delete should not be passed any arguments") - if $_[1]; + my $self = shift; + $self->throw_exception('delete does not accept any arguments') + if @_; + + # rs operations with subqueries are Storage dependent - delegate + if ($self->_update_delete_via_subq) { + return $self->result_source->storage->subq_update_delete($self, 'delete'); + } my $cond = $self->_cond_for_update_delete; @@ -1470,7 +1508,10 @@ will run DBIC cascade triggers, while L will not. =cut sub delete_all { - my ($self) = @_; + my $self = shift; + $self->throw_exception('delete_all does not accept any arguments') + if @_; + $_->delete for $self->all; return 1; } @@ -1672,7 +1713,7 @@ sub pager { unless $self->{attrs}{page}; $attrs->{rows} ||= 10; return $self->{pager} ||= Data::Page->new( - $self->_count, $attrs->{rows}, $self->{attrs}{page}); + $self->__count, $attrs->{rows}, $self->{attrs}{page}); } =head2 page @@ -1776,6 +1817,37 @@ sub _is_deterministic_value { return 0; } +# _has_attr +# +# determines if the resultset defines at least one +# of the attributes supplied +# +# used to determine if a subquery is neccessary + +sub _has_attr { + my ($self, @attr_names) = @_; + + my $attrs = $self->_resolved_attrs; + + my $join_check_req; + + for my $n (@attr_names) { + return 1 if defined $attrs->{$n}; + ++$join_check_req if $n =~ /join/; + } + + # a join can be expressed as a multi-level from + return 1 if ( + $join_check_req + and + ref $attrs->{from} eq 'ARRAY' + and + @{$attrs->{from}} > 1 + ); + + return 0; +} + # _collapse_cond # # Recursively collapse the condition. diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm index 8df1894..8eb67fa 100644 --- a/lib/DBIx/Class/Storage/DBI.pm +++ b/lib/DBIx/Class/Storage/DBI.pm @@ -1051,6 +1051,85 @@ sub delete { return $self->_execute('delete' => [], $source, $bind_attrs, @_); } +# We were sent here because the $rs contains a complex search +# which will require a subquery to select the correct rows +# (i.e. joined or limited resultsets) +# +# Genarating a single PK column subquery is trivial and supported +# by all RDBMS. However if we have a multicolumn PK, things get ugly. +# Look at multipk_update_delete() +sub subq_update_delete { + my $self = shift; + my ($rs, $op, $values) = @_; + + if ($rs->result_source->primary_columns == 1) { + return $self->_onepk_update_delete (@_); + } + else { + return $self->_multipk_update_delete (@_); + } +} + +# Generally a single PK resultset operation is trivially expressed +# with PK IN (subquery). However some databases (mysql) do not support +# modification of a table mentioned in the subselect. This method +# should be overriden in the appropriate storage class to be smarter +# in such situations +sub _onepk_update_delete { + + my $self = shift; + my ($rs, $op, $values) = @_; + + my $rsrc = $rs->result_source; + my @pcols = $rsrc->primary_columns; + + return $self->$op ( + $rsrc, + $op eq 'update' ? $values : (), + { $pcols[0] => { -in => $rs->get_column ($pcols[0])->as_query } }, + ); +} + +# ANSI SQL does not provide a reliable way to perform a multicol-PK +# resultset update/delete involving subqueries. So resort to simple +# (and inefficient) delete_all style per-row opearations, while allowing +# specific storages to override this with a faster implementation. +# +# We do not use $row->$op style queries, because resultset update/delete +# is not expected to cascade (this is what delete_all/update_all is for). +# +# There should be no race conditions as the entire operation is rolled +# in a transaction. +sub _multipk_update_delete { + my $self = shift; + my ($rs, $op, $values) = @_; + + my $rsrc = $rs->result_source; + my @pcols = $rsrc->primary_columns; + + my $guard = $self->txn_scope_guard; + + my $subrs_cur = $rs->search ({}, { columns => \@pcols })->cursor; + while (my @pks = $subrs_cur->next) { + + my $cond; + for my $i (0.. $#pcols) { + $cond->{$pcols[$i]} = $pks[$i]; + } + + $self->$op ( + $rsrc, + $op eq 'update' ? $values : (), + $cond, + ); + } + + $guard->commit; + + return 1; +} + + sub _select { my $self = shift; my $sql_maker = $self->sql_maker; diff --git a/t/19quotes.t b/t/19quotes.t index 622eefb..75b4668 100644 --- a/t/19quotes.t +++ b/t/19quotes.t @@ -35,9 +35,12 @@ $rs = $schema->resultset('CD')->search( { join => 'artist' }); eval { $rs->count }; is_same_sql_bind( - $sql, \@bind, - "SELECT COUNT( * ) FROM `cd` `me` JOIN `artist` `artist` ON ( `artist`.`artistid` = `me`.`artist` ) WHERE ( `artist`.`name` = ? AND `me`.`year` = ? )", ["'Caterwauler McCrae'", "'2001'"], - 'got correct SQL for count query with quoting' + $sql, + \@bind, + "SELECT COUNT( * ) FROM (SELECT `me`.`cdid` FROM `cd` `me` JOIN `artist` `artist` ON `artist`.`artistid` = `me`.`artist` WHERE ( ( `artist`.`name` = ? AND `me`.`year` = ? ) ) GROUP BY `me`.`cdid`) `count_subq`", + ["'Caterwauler McCrae'", "'2001'"], + + 'got correct SQL for joined count query with quoting' ); my $order = 'year DESC'; @@ -59,8 +62,10 @@ $rs = $schema->resultset('CD')->search( { join => 'artist' }); eval { $rs->count }; is_same_sql_bind( - $sql, \@bind, - "SELECT COUNT( * ) FROM [cd] [me] JOIN [artist] [artist] ON ( [artist].[artistid] = [me].[artist] ) WHERE ( [artist].[name] = ? AND [me].[year] = ? )", ["'Caterwauler McCrae'", "'2001'"], + $sql, + \@bind, + "SELECT COUNT( * ) FROM (SELECT [me].[cdid] FROM [cd] [me] JOIN [artist] [artist] ON [artist].[artistid] = [me].[artist] WHERE ( ( [artist].[name] = ? AND [me].[year] = ? ) ) GROUP BY [me].[cdid]) [count_subq]", + ["'Caterwauler McCrae'", "'2001'"], 'got correct SQL for count query with bracket quoting' ); diff --git a/t/19quotes_newstyle.t b/t/19quotes_newstyle.t index 80e6d04..4e49117 100644 --- a/t/19quotes_newstyle.t +++ b/t/19quotes_newstyle.t @@ -41,8 +41,10 @@ $rs = $schema->resultset('CD')->search( { join => 'artist' }); eval { $rs->count }; is_same_sql_bind( - $sql, \@bind, - "SELECT COUNT( * ) FROM `cd` `me` JOIN `artist` `artist` ON ( `artist`.`artistid` = `me`.`artist` ) WHERE ( `artist`.`name` = ? AND `me`.`year` = ? )", ["'Caterwauler McCrae'", "'2001'"], + $sql, + \@bind, + "SELECT COUNT( * ) FROM (SELECT `me`.`cdid` FROM `cd` `me` JOIN `artist` `artist` ON `artist`.`artistid` = `me`.`artist` WHERE ( ( `artist`.`name` = ? AND `me`.`year` = ? ) ) GROUP BY `me`.`cdid`) `count_subq`", + ["'Caterwauler McCrae'", "'2001'"], 'got correct SQL for count query with quoting' ); @@ -72,8 +74,10 @@ $rs = $schema->resultset('CD')->search( { join => 'artist' }); eval { $rs->count }; is_same_sql_bind( - $sql, \@bind, - "SELECT COUNT( * ) FROM [cd] [me] JOIN [artist] [artist] ON ( [artist].[artistid] = [me].[artist] ) WHERE ( [artist].[name] = ? AND [me].[year] = ? )", ["'Caterwauler McCrae'", "'2001'"], + $sql, + \@bind, + "SELECT COUNT( * ) FROM (SELECT [me].[cdid] FROM [cd] [me] JOIN [artist] [artist] ON [artist].[artistid] = [me].[artist] WHERE ( ( [artist].[name] = ? AND [me].[year] = ? ) ) GROUP BY [me].[cdid]) [count_subq]", + ["'Caterwauler McCrae'", "'2001'"], 'got correct SQL for count query with bracket quoting' ); diff --git a/t/60core.t b/t/60core.t index 187164d..a568c6e 100644 --- a/t/60core.t +++ b/t/60core.t @@ -221,8 +221,13 @@ my $search = [ { 'tags.tag' => 'Cheesy' }, { 'tags.tag' => 'Blue' } ]; my( $or_rs ) = $schema->resultset("CD")->search_rs($search, { join => 'tags', order_by => 'cdid' }); - -is($or_rs->count, 5, 'Search with OR ok'); +# At this point in the test there are: +# 1 artist with the cheesy AND blue tag +# 1 artist with the cheesy tag +# 2 artists with the blue tag +# +# Formerly this test expected 5 as there was no collapsing of the AND condition +is($or_rs->count, 4, 'Search with OR ok'); my $distinct_rs = $schema->resultset("CD")->search($search, { join => 'tags', distinct => 1 }); is($distinct_rs->all, 4, 'DISTINCT search with OR ok'); @@ -260,7 +265,13 @@ my $tag_rs = $schema->resultset('Tag')->search( my $rel_rs = $tag_rs->search_related('cd'); -is($rel_rs->count, 5, 'Related search ok'); +# At this point in the test there are: +# 1 artist with the cheesy AND blue tag +# 1 artist with the cheesy tag +# 2 artists with the blue tag +# +# Formerly this test expected 5 as there was no collapsing of the AND condition +is($rel_rs->count, 4, 'Related search ok'); is($or_rs->next->cdid, $rel_rs->next->cdid, 'Related object ok'); $or_rs->reset; diff --git a/t/90join_torture.t b/t/90join_torture.t index a277475..494237f 100644 --- a/t/90join_torture.t +++ b/t/90join_torture.t @@ -45,10 +45,24 @@ my @cds = $artists2[0]->cds; cmp_ok(scalar @cds, '==', 1, "condition based on inherited join okay"); my $rs3 = $rs2->search_related('cds'); -cmp_ok(scalar($rs3->all), '==', 45, "All cds for artist returned"); +# $rs3 selects * from cds_2, with the following join map +# +# artist -> cds_2 +# | +# V +# cds -> cd_to_producer -> producer +# | +# |\--> tags +# V +# tracks +# +# For some reason it is expected to return an exploded set of rows instead of the +# logical 3, even for a rowobject retrieval - why? +# +cmp_ok(scalar($rs3->all), '==', 45, "All cds for artist returned"); -cmp_ok($rs3->count, '==', 45, "All cds for artist returned via count"); +cmp_ok($rs3->count, '==', 3, "All cds for artist returned via count"); my $rs4 = $schema->resultset("CD")->search({ 'artist.artistid' => '1' }, { join => ['tracks', 'artist'], prefetch => 'artist' }); my @rs4_results = $rs4->all; diff --git a/t/count/count_distinct.t b/t/count/distinct.t similarity index 100% rename from t/count/count_distinct.t rename to t/count/distinct.t diff --git a/t/count/count_joined.t b/t/count/joined.t similarity index 81% rename from t/count/count_joined.t rename to t/count/joined.t index 992d23b..139f9cd 100644 --- a/t/count/count_joined.t +++ b/t/count/joined.t @@ -11,8 +11,7 @@ plan tests => 1; my $schema = DBICTest->init_schema(); -TODO: { - local $TODO = 'Needs -paren fixes in SQLA before it can work'; +{ my $cds = $schema->resultset("CD")->search({ cdid => 1 }, { join => { cd_to_producer => 'producer' } }); is($cds->count, 1, "extra joins do not explode single entity count"); } diff --git a/t/delete/related.t b/t/delete/related.t index f3fb78b..39aa429 100644 --- a/t/delete/related.t +++ b/t/delete/related.t @@ -4,7 +4,7 @@ use warnings; use lib qw(t/lib); use DBICTest; -plan tests => 3; +plan tests => 6; my $schema = DBICTest->init_schema(); @@ -43,3 +43,46 @@ is ($cdrs->count, $total_cds -= 2, 'related + condition delete ok'); # test that related deletion with limit condition works $a2_cds->search ({}, { rows => 1})->delete; is ($cdrs->count, $total_cds -= 1, 'related + limit delete ok'); + +my $tkfk = $schema->resultset('FourKeys_to_TwoKeys'); + +my ($fa, $fb) = $tkfk->related_resultset ('fourkeys')->populate ([ + [qw/foo bar hello goodbye sensors/], + [qw/1 1 1 1 a /], + [qw/2 2 2 2 b /], +]); + +# This is already provided by DBICTest +#my ($ta, $tb) = $tkfk->related_resultset ('twokeys')->populate ([ +# [qw/artist cd /], +# [qw/1 1 /], +# [qw/2 2 /], +#]); +my ($ta, $tb) = $schema->resultset ('TwoKeys') + ->search ( [ { artist => 1, cd => 1 }, { artist => 2, cd => 2 } ]) + ->all; + +my $tkfk_cnt = $tkfk->count; + +my $non_void_ctx = $tkfk->populate ([ + { autopilot => 'a', fourkeys => $fa, twokeys => $ta }, + { autopilot => 'b', fourkeys => $fb, twokeys => $tb }, + { autopilot => 'x', fourkeys => $fa, twokeys => $tb }, + { autopilot => 'y', fourkeys => $fb, twokeys => $ta }, +]); +is ($tkfk->count, $tkfk_cnt += 4, 'FourKeys_to_TwoKeys populated succesfully'); + +my $sub_rs = $tkfk->search ( + [ + { map { $_ => 1 } qw/artist.artistid cd.cdid fourkeys.foo fourkeys.bar fourkeys.hello fourkeys.goodbye/ }, + { map { $_ => 2 } qw/artist.artistid cd.cdid fourkeys.foo fourkeys.bar fourkeys.hello fourkeys.goodbye/ }, + ], + { + join => [ 'fourkeys', { twokeys => [qw/artist cd/] } ], + }, +); + +is ($sub_rs->count, 2, 'Only two rows from fourkeys match'); +$sub_rs->delete; + +is ($tkfk->count, $tkfk_cnt -= 2, 'Only two rows deleted');