From: Peter Rabbitson Date: Sun, 17 Nov 2013 12:31:37 +0000 (+0100) Subject: Stop correlated subqueries from affecting the aliastypes analysis X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=0dadd60d51482230dca8d235d4d1fbd71235904a;p=dbsrgits%2FDBIx-Class-Historic.git Stop correlated subqueries from affecting the aliastypes analysis This should make mind-bending correlations like the ones frew likes to (ab)use work even when combined with Getty-levels of prefetch extravaganza --- diff --git a/Changes b/Changes index 010fdde..6f7daab 100644 --- a/Changes +++ b/Changes @@ -7,6 +7,8 @@ Revision history for DBIx::Class - More robust handling of circular relationship declarations by loading foreign classes less frequently (should resolve issues like http://lists.scsys.co.uk/pipermail/dbix-class/2013-June/011374.html) + - Fix multiple edge cases of complex prefetch combining incorrectly + with correlated subquery selections - Fix multiple edge cases steming from interaction of a non-selecting order_by specification and distinct and/or complex prefetch - Clarify ambiguous behavior of distinct when used with ResultSetColumn diff --git a/lib/DBIx/Class/Storage/DBIHacks.pm b/lib/DBIx/Class/Storage/DBIHacks.pm index 8a3cb02..80283dc 100644 --- a/lib/DBIx/Class/Storage/DBIHacks.pm +++ b/lib/DBIx/Class/Storage/DBIHacks.pm @@ -429,15 +429,46 @@ sub _resolve_aliastypes_from_select_args { ), ], selecting => [ - $sql_maker->_recurse_fields ($attrs->{select}), + map { $sql_maker->_recurse_fields($_) } @{$attrs->{select}}, ], ordering => [ map { $_->[0] } $self->_extract_order_criteria ($attrs->{order_by}, $sql_maker), ], }; - # throw away empty chunks - $_ = [ map { $_ || () } @$_ ] for values %$to_scan; + # throw away empty chunks and all 2-value arrayrefs: the thinking is that these are + # bind value specs left in by the sloppy renderer above. It is ok to do this + # at this point, since we are going to end up rewriting this crap anyway + for my $v (values %$to_scan) { + my @nv; + for (@$v) { + next if ( + ! defined $_ + or + ( + ref $_ eq 'ARRAY' + and + ( @$_ == 0 or @$_ == 2 ) + ) + ); + + if (ref $_) { + require Data::Dumper::Concise; + $self->throw_exception("Unexpected ref in scan-plan: " . Data::Dumper::Concise::Dumper($v) ); + } + + push @nv, $_; + } + + $v = \@nv; + } + + # kill all selectors which look like a proper subquery + # this is a sucky heuristic *BUT* - if we get it wrong the query will simply + # fail to run, so we are relatively safe + $to_scan->{selecting} = [ grep { + $_ !~ / \A \s* \( \s* SELECT \s+ .+? \s+ FROM \s+ .+? \) \s* \z /xsi + } @{ $to_scan->{selecting} || [] } ]; # first see if we have any exact matches (qualified or unqualified) for my $type (keys %$to_scan) { diff --git a/t/prefetch/correlated.t b/t/prefetch/correlated.t index 8d99ff8..98bc031 100644 --- a/t/prefetch/correlated.t +++ b/t/prefetch/correlated.t @@ -138,4 +138,99 @@ is_same_sql_bind( 'Expected SQL on correlated realiased subquery' ); +# test for subselect identifier leakage +# NOTE - the hodge-podge mix of literal and regular identifuers is *deliberate* +for my $quote_names (0,1) { + my $schema = DBICTest->init_schema( quote_names => $quote_names ); + + my ($ql, $qr) = $schema->storage->sql_maker->_quote_chars; + + my $art_rs = $schema->resultset('Artist')->search ({}, { + order_by => 'me.artistid', + prefetch => 'cds', + rows => 2, + }); + + my $inner_lim_bindtype = { sqlt_datatype => 'integer' }; + + for my $inner_relchain (qw( cds_unordered cds ) ) { + + my $stupid_latest_competition_release_query = $schema->resultset('Artist')->search( + { 'competition.artistid' => { '!=', { -ident => 'me.artistid' } } }, + { alias => 'competition' }, + )->search_related( $inner_relchain, {}, { + rows => 1, order_by => 'year', columns => { year => \'year' }, distinct => 1 + })->get_column(\'year')->max_rs; + + my $final_query = $art_rs->search( {}, { + '+columns' => { max_competition_release => \[ + @${ $stupid_latest_competition_release_query->as_query } + ]}, + }); + + is_deeply ( + $final_query->all_hri, + [ + { artistid => 1, charfield => undef, max_competition_release => 1998, name => "Caterwauler McCrae", rank => 13, cds => [ + { artist => 1, cdid => 3, genreid => undef, single_track => undef, title => "Caterwaulin' Blues", year => 1997 }, + { artist => 1, cdid => 2, genreid => undef, single_track => undef, title => "Forkful of bees", year => 2001 }, + { artist => 1, cdid => 1, genreid => 1, single_track => undef, title => "Spoonful of bees", year => 1999 }, + ] }, + { artistid => 2, charfield => undef, max_competition_release => 1997, name => "Random Boy Band", rank => 13, cds => [ + { artist => 2, cdid => 4, genreid => undef, single_track => undef, title => "Generic Manufactured Singles", year => 2001 }, + ] }, + ], + "Expected result from weird query", + ); + + # the decomposition to sql/bind is *deliberate* in both instances + # we want to ensure this keeps working for lietral sql, even when + # as_query switches to return an overloaded dq node + my ($sql, @bind) = @${ $final_query->as_query }; + + my $correlated_sql = qq{ ( + SELECT MAX( year ) + FROM ( + SELECT year + FROM ${ql}artist${qr} ${ql}competition${qr} + JOIN cd ${ql}${inner_relchain}${qr} + ON ${ql}${inner_relchain}${qr}.${ql}artist${qr} = ${ql}competition${qr}.${ql}artistid${qr} + WHERE ${ql}competition${qr}.${ql}artistid${qr} != ${ql}me${qr}.${ql}artistid${qr} + GROUP BY year + ORDER BY MIN( ${ql}year${qr} ) + LIMIT ? + ) ${ql}${inner_relchain}${qr} + )}; + + is_same_sql_bind( + $sql, + \@bind, + qq{ ( + SELECT ${ql}me${qr}.${ql}artistid${qr}, ${ql}me${qr}.${ql}name${qr}, ${ql}me${qr}.${ql}rank${qr}, ${ql}me${qr}.${ql}charfield${qr}, + $correlated_sql, + ${ql}cds${qr}.${ql}cdid${qr}, ${ql}cds${qr}.${ql}artist${qr}, ${ql}cds${qr}.${ql}title${qr}, ${ql}cds${qr}.${ql}year${qr}, ${ql}cds${qr}.${ql}genreid${qr}, ${ql}cds${qr}.${ql}single_track${qr} + FROM ( + SELECT ${ql}me${qr}.${ql}artistid${qr}, ${ql}me${qr}.${ql}name${qr}, ${ql}me${qr}.${ql}rank${qr}, ${ql}me${qr}.${ql}charfield${qr}, + $correlated_sql + FROM ${ql}artist${qr} ${ql}me${qr} + ORDER BY ${ql}me${qr}.${ql}artistid${qr} + LIMIT ? + ) ${ql}me${qr} + LEFT JOIN cd ${ql}cds${qr} + ON ${ql}cds${qr}.${ql}artist${qr} = ${ql}me${qr}.${ql}artistid${qr} + ORDER BY ${ql}me${qr}.${ql}artistid${qr} + ) }, + [ + [ $inner_lim_bindtype + => 1 ], + [ $inner_lim_bindtype + => 1 ], + [ { sqlt_datatype => 'integer' } + => 2 ], + ], + "No leakage of correlated subquery identifiers (quote_names => $quote_names, inner alias '$inner_relchain')" + ); + } +} + done_testing;