From: Peter Rabbitson Date: Wed, 27 Mar 2013 11:42:15 +0000 (+0100) Subject: Another blast from the past - fix distinct/order behavior borked by d59eba65f X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=560978e22520434c67eebb2de72f0e571e47ee40;p=dbsrgits%2FDBIx-Class-Historic.git Another blast from the past - fix distinct/order behavior borked by d59eba65f While the SQL we test for is valid syntax - the result it produces makes zero sense for these resultset arguments. Reshuffle the distinct construction and reuse the codepah for order_by rewrites. This is a heavy change, but it is *extremely unlikely* anyone was relying on the previous behavior - any aggregate value would have been wrong... --- diff --git a/Changes b/Changes index 16f2251..cef5fe3 100644 --- a/Changes +++ b/Changes @@ -6,6 +6,8 @@ Revision history for DBIx::Class - Fix update/delete operations on resultsets *joining* the updated table failing on MySQL. Resolves oversights in the fixes for RT#81378 and RT#81897 + - Stop erroneously considering order_by criteria from a join under + distinct => 1 (the distinct should apply to the main source only) - Even more robust behavior of GenericSubQuery limit dialect - Stop Sybase ASE storage from generating invalid SQL in subselects when a limit without offset is encountered diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm index 2e6c783..c2e0e8c 100644 --- a/lib/DBIx/Class/ResultSet.pm +++ b/lib/DBIx/Class/ResultSet.pm @@ -3480,11 +3480,7 @@ sub _resolved_attrs { $attrs->{_grouped_by_distinct} = 1; # distinct affects only the main selection part, not what prefetch may # add below. - $attrs->{group_by} = $source->storage->_group_over_selection ( - $attrs->{from}, - $attrs->{select}, - $attrs->{order_by}, - ); + $attrs->{group_by} = $source->storage->_group_over_selection($attrs); } } diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm index 59f4441..570cf2b 100644 --- a/lib/DBIx/Class/Storage/DBI.pm +++ b/lib/DBIx/Class/Storage/DBI.pm @@ -2328,36 +2328,50 @@ sub _select_args { $attrs->{rows} = $sql_maker->__max_int; } - my ($complex_prefetch, @limit); - # see if we will need to tear the prefetch apart to satisfy group_by == select - # this is *extremely tricky* to get right + # this is *extremely tricky* to get right, I am still not sure I did # - # Follows heavy but necessary analyzis of the group_by - if it refers to any - # sort of non-root column assume the user knows what they are doing and do - # not try to be clever - if ( - $attrs->{_related_results_construction} + my ($prefetch_needs_subquery, @limit_args); + + if ( $attrs->{_grouped_by_distinct} and $attrs->{collapse} ) { + # we already know there is a valid group_by and we know it is intended + # to be based *only* on the main result columns + # short circuit the group_by parsing below + $prefetch_needs_subquery = 1; + } + elsif ( + # The rationale is that even if we do *not* have collapse, we still + # need to wrap the core grouped select/group_by in a subquery + # so that databases that care about group_by/select equivalence + # are happy (this includes MySQL in strict_mode) + # If any of the other joined tables are referenced in the group_by + # however - the user is on their own + ( $prefetch_needs_subquery or $attrs->{_related_results_construction} ) and $attrs->{group_by} and @{$attrs->{group_by}} and - my $grp_aliases = try { + my $grp_aliases = try { # try{} because $attrs->{from} may be unreadable $self->_resolve_aliastypes_from_select_args( $attrs->{from}, undef, undef, { group_by => $attrs->{group_by} } ) } ) { - $complex_prefetch = ! defined first { $_ ne $attrs->{alias} } keys %{ $grp_aliases->{grouping} || {} }; + # no aliases other than our own in group_by + # if there are - do not allow subquery even if limit is present + $prefetch_needs_subquery = ! scalar grep { $_ ne $attrs->{alias} } keys %{ $grp_aliases->{grouping} || {} }; + } + elsif ( $attrs->{rows} && $attrs->{collapse} ) { + # active collapse with a limit - that one is a no-brainer unless + # overruled by a group_by above + $prefetch_needs_subquery = 1; } - $complex_prefetch ||= ( $attrs->{rows} && $attrs->{collapse} ); - - if ($complex_prefetch) { + if ($prefetch_needs_subquery) { ($ident, $select, $where, $attrs) = $self->_adjust_select_args_for_complex_prefetch ($ident, $select, $where, $attrs); } elsif (! $attrs->{software_limit} ) { - push @limit, ( + push @limit_args, ( $attrs->{rows} || (), $attrs->{offset} || (), ); @@ -2365,7 +2379,7 @@ sub _select_args { # try to simplify the joinmap further (prune unreferenced type-single joins) if ( - ! $complex_prefetch + ! $prefetch_needs_subquery # already pruned and ref $ident and @@ -2386,7 +2400,7 @@ sub _select_args { # invoked, and that's just bad... ### - return ('select', $ident, $select, $where, $attrs, @limit); + return ('select', $ident, $select, $where, $attrs, @limit_args); } # Returns a counting SELECT for a simple count diff --git a/lib/DBIx/Class/Storage/DBIHacks.pm b/lib/DBIx/Class/Storage/DBIHacks.pm index b3b0177..5b4b56e 100644 --- a/lib/DBIx/Class/Storage/DBIHacks.pm +++ b/lib/DBIx/Class/Storage/DBIHacks.pm @@ -146,7 +146,7 @@ sub _adjust_select_args_for_complex_prefetch { # We can not just fetch everything because a potential has_many restricting # join collapse *will not work* on heavy data types. my $connecting_aliastypes = $self->_resolve_aliastypes_from_select_args( - [grep { ref($_) eq 'ARRAY' or ref($_) eq 'HASH' } @{$from}[$root_node_offset .. $#$from]], + $from, [], $where, $inner_attrs @@ -186,7 +186,8 @@ sub _adjust_select_args_for_complex_prefetch { if ( $inner_aliastypes->{multiplying} and - !$inner_aliastypes->{grouping} # if there are groups - assume user knows wtf they are up to + # if there are user-supplied groups - assume user knows wtf they are up to + ( ! $inner_aliastypes->{grouping} or $inner_attrs->{_grouped_by_distinct} ) and my @multipliers = grep { $_ ne $root_alias } keys %{$inner_aliastypes->{multiplying}} ) { @@ -200,9 +201,11 @@ sub _adjust_select_args_for_complex_prefetch { ) { my $unprocessed_order_chunks; - ($inner_attrs->{group_by}, $unprocessed_order_chunks) = $self->_group_over_selection ( - $inner_from, $inner_select, $inner_attrs->{order_by} - ); + ($inner_attrs->{group_by}, $unprocessed_order_chunks) = $self->_group_over_selection ({ + %$inner_attrs, + from => $inner_from, + select => $inner_select, + }); $self->throw_exception ( 'A required group_by clause could not be constructed automatically due to a complex ' @@ -221,17 +224,6 @@ sub _adjust_select_args_for_complex_prefetch { # for DESC, and group_by the root columns. The end result should be # exactly what we expect - # FIXME REMOVE LATER - (just a sanity check) - if (defined ( my $impostor = first - { $_ ne $root_alias } - keys %{ $inner_aliastypes->{selecting} } - ) ) { - $self->throw_exception(sprintf - 'Unexpected inner selection during complex prefetch (%s)...', - join ', ', keys %{ $inner_aliastypes->{joining}{$impostor}{-seen_columns} || {} } - ); - } - # supplement the main selection with pks if not already there, # as they will have to be a part of the group_by to colapse # things properly @@ -298,9 +290,11 @@ sub _adjust_select_args_for_complex_prefetch { # do not care about leftovers here - it will be all the functions # we just created - ($inner_attrs->{group_by}) = $self->_group_over_selection ( - $inner_from, $inner_select, $inner_attrs->{order_by} - ); + ($inner_attrs->{group_by}) = $self->_group_over_selection ({ + %$inner_attrs, + from => $inner_from, + select => $inner_select, + }); } } @@ -334,7 +328,7 @@ sub _adjust_select_args_for_complex_prefetch { # we may not be the head if ($root_node_offset) { - # first generate the outer_from, up to the substitution point + # first generate the outer_from, up and including the substitution point @outer_from = splice @$from, 0, $root_node_offset; push @outer_from, [ @@ -354,7 +348,7 @@ sub _adjust_select_args_for_complex_prefetch { }; } - shift @$from; # it's replaced in @outer_from already + shift @$from; # what we just replaced above # scan the *remaining* from spec against different attributes, and see which joins are needed # in what role @@ -386,9 +380,11 @@ sub _adjust_select_args_for_complex_prefetch { if ( $need_outer_group_by and $attrs->{_grouped_by_distinct} ) { my $unprocessed_order_chunks; - ($outer_attrs->{group_by}, $unprocessed_order_chunks) = $self->_group_over_selection ( - \@outer_from, $outer_select, $outer_attrs->{order_by} - ); + ($outer_attrs->{group_by}, $unprocessed_order_chunks) = $self->_group_over_selection ({ + %$outer_attrs, + from => \@outer_from, + select => $outer_select, + }); $self->throw_exception ( 'A required group_by clause could not be constructed automatically due to a complex ' @@ -571,43 +567,49 @@ sub _resolve_aliastypes_from_select_args { # This is the engine behind { distinct => 1 } sub _group_over_selection { - my ($self, $from, $select, $order_by) = @_; + my ($self, $attrs) = @_; - my $rs_column_list = $self->_resolve_column_info ($from); + my $colinfos = $self->_resolve_column_info ($attrs->{from}); my (@group_by, %group_index); # the logic is: if it is a { func => val } we assume an aggregate, # otherwise if \'...' or \[...] we assume the user knows what is # going on thus group over it - for (@$select) { + for (@{$attrs->{select}}) { if (! ref($_) or ref ($_) ne 'HASH' ) { push @group_by, $_; $group_index{$_}++; - if ($rs_column_list->{$_} and $_ !~ /\./ ) { + if ($colinfos->{$_} and $_ !~ /\./ ) { # add a fully qualified version as well - $group_index{"$rs_column_list->{$_}{-source_alias}.$_"}++; + $group_index{"$colinfos->{$_}{-source_alias}.$_"}++; } } } - # add any order_by parts that are not already present in the group_by + # add any order_by parts *from the main source* that are not already + # present in the group_by # we need to be careful not to add any named functions/aggregates # i.e. order_by => [ ... { count => 'foo' } ... ] my @leftovers; - for ($self->_extract_order_criteria($order_by)) { + for ($self->_extract_order_criteria($attrs->{order_by})) { # only consider real columns (for functions the user got to do an explicit group_by) if (@$_ != 1) { push @leftovers, $_; next; } my $chunk = $_->[0]; - my $colinfo = $rs_column_list->{$chunk} or do { + + if ( + !$colinfos->{$chunk} + or + $colinfos->{$chunk}{-source_alias} ne $attrs->{alias} + ) { push @leftovers, $_; next; - }; + } - $chunk = "$colinfo->{-source_alias}.$chunk" if $chunk !~ /\./; + $chunk = $colinfos->{$chunk}{-fq_colname}; push @group_by, $chunk unless $group_index{$chunk}++; } diff --git a/t/prefetch/grouped.t b/t/prefetch/grouped.t index b7ec8eb..e967307 100644 --- a/t/prefetch/grouped.t +++ b/t/prefetch/grouped.t @@ -9,6 +9,7 @@ use DBIC::SqlMakerTest; use DBIx::Class::SQLMaker::LimitDialects; my $ROWS = DBIx::Class::SQLMaker::LimitDialects->__rows_bindtype; +my $OFFSET = DBIx::Class::SQLMaker::LimitDialects->__offset_bindtype; my $schema = DBICTest->init_schema(); my $sdebug = $schema->storage->debug; @@ -398,28 +399,118 @@ for ($cd_rs->all) { ); } +# make sure distinct applies to the CD part only, not to the order_by part { - my $rs = $schema->resultset('CD')->search({}, - { - '+select' => [{ count => 'tags.tag' }], - '+as' => ['test_count'], - prefetch => ['tags'], - distinct => 1, - order_by => {'-asc' => 'tags.tag'}, - rows => 1 - } + my $rs = $schema->resultset('CD')->search({}, { + columns => [qw( cdid title )], + '+select' => [{ count => 'tags.tag' }], + '+as' => ['test_count'], + prefetch => ['tags'], + distinct => 1, + order_by => {'-desc' => 'tags.tag'}, + offset => 1, + rows => 3, + }); + + is_same_sql_bind($rs->as_query, + '( + SELECT me.cdid, me.title, me.test_count, + tags.tagid, tags.cd, tags.tag + FROM ( + SELECT me.cdid, me.title, + COUNT( tags.tag ) AS test_count + FROM cd me + LEFT JOIN tags tags + ON tags.cd = me.cdid + GROUP BY me.cdid, me.title + ORDER BY MAX( tags.tag ) DESC + LIMIT ? + OFFSET ? + ) me + LEFT JOIN tags tags + ON tags.cd = me.cdid + ORDER BY tags.tag DESC + )', + [ [$ROWS => 3], [$OFFSET => 1] ], + 'Expected limited prefetch with distinct SQL', + ); + + my $expected_hri = [ + { cdid => 4, test_count => 2, title => "Generic Manufactured Singles", tags => [ + { cd => 4, tag => "Shiny", tagid => 9 }, + { cd => 4, tag => "Cheesy", tagid => 6 }, + ]}, + { + cdid => 5, test_count => 2, title => "Come Be Depressed With Us", tags => [ + { cd => 5, tag => "Cheesy", tagid => 7 }, + { cd => 5, tag => "Blue", tagid => 4 }, + ]}, + { + cdid => 1, test_count => 1, title => "Spoonful of bees", tags => [ + { cd => 1, tag => "Blue", tagid => 1 }, + ]}, + ]; + + is_deeply ( + $rs->all_hri, + $expected_hri, + 'HRI dump of limited prefetch with distinct as expected' + ); + + # pre-multiplied main source also should work + $rs = $schema->resultset('CD')->search_related('artist')->search_related('cds', {}, { + columns => [qw( cdid title )], + '+select' => [{ count => 'tags.tag' }], + '+as' => ['test_count'], + prefetch => ['tags'], + distinct => 1, + order_by => {'-desc' => 'tags.tag'}, + offset => 1, + rows => 3, + }); + + is_same_sql_bind($rs->as_query, + '( + SELECT cds.cdid, cds.title, cds.test_count, + tags.tagid, tags.cd, tags.tag + FROM cd me + JOIN artist artist + ON artist.artistid = me.artist + JOIN ( + SELECT cds.cdid, cds.title, + COUNT( tags.tag ) AS test_count, + cds.artist + FROM cd me + JOIN artist artist + ON artist.artistid = me.artist + JOIN cd cds + ON cds.artist = artist.artistid + LEFT JOIN tags tags + ON tags.cd = cds.cdid + GROUP BY cds.cdid, cds.title, cds.artist + ORDER BY MAX( tags.tag ) DESC + LIMIT ? + OFFSET ? + ) cds + ON cds.artist = artist.artistid + LEFT JOIN tags tags + ON tags.cd = cds.cdid + ORDER BY tags.tag DESC + )', + [ [$ROWS => 3], [$OFFSET => 1] ], + 'Expected limited prefetch with distinct SQL on premultiplied head', + ); + + # Tag counts are multiplied by the cd->artist->cds multiplication + # I would *almost* call this "expected" without wraping an as_subselect_rs + { + local $TODO = 'Not sure if we can stop the count/group of premultiplication abstraction leak'; + is_deeply ( + $rs->all_hri, + $expected_hri, + 'HRI dump of limited prefetch with distinct as expected on premultiplid head' ); - is_same_sql_bind($rs->as_query, q{ - (SELECT me.cdid, me.artist, me.title, me.year, me.genreid, me.single_track, me.test_count, tags.tagid, tags.cd, tags.tag - FROM (SELECT me.cdid, me.artist, me.title, me.year, me.genreid, me.single_track, COUNT( tags.tag ) AS test_count - FROM cd me LEFT JOIN tags tags ON tags.cd = me.cdid - GROUP BY me.cdid, me.artist, me.title, me.year, me.genreid, me.single_track, tags.tag - ORDER BY tags.tag ASC LIMIT ?) - me - LEFT JOIN tags tags ON tags.cd = me.cdid - ORDER BY tags.tag ASC - ) - }, [[$ROWS => 1]]); + } } done_testing;