From: Peter Rabbitson Date: Sun, 30 May 2010 22:17:29 +0000 (+0000) Subject: Rewrite GenericSubQ from SQLA::L to be actually useful X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=75f025cf247e75869dcdfe46b37e24d4e0b15e8b;p=dbsrgits%2FDBIx-Class-Historic.git Rewrite GenericSubQ from SQLA::L to be actually useful Since it now works it is no longer necessary to turn on softlimit when genericsubq is detected Switch all sprintf()ed limit/offset specs to unsigned integers Lower the default rows-without-offset to 2^32 --- diff --git a/Changes b/Changes index 0f93936..04b33b2 100644 --- a/Changes +++ b/Changes @@ -22,6 +22,9 @@ Revision history for DBIx::Class resultsets - MSSQL limits now don't require nearly as many applications of the unsafe_subselect_ok attribute, due to optimized queries + - Support for Generic Subquery limit "emulation" - awfully slow + and inefficient but works on almost any db, and is preferred + to a soft-limit - Fix as_subselect_rs to not inject resultset class-wide where conditions outside of the resulting subquery - Fix nasty potentially data-eating bug when deleting/updating diff --git a/lib/DBIx/Class/SQLAHacks.pm b/lib/DBIx/Class/SQLAHacks.pm index 40bc7a3..d2dc16e 100644 --- a/lib/DBIx/Class/SQLAHacks.pm +++ b/lib/DBIx/Class/SQLAHacks.pm @@ -48,29 +48,25 @@ sub new { # !!! THIS IS ALSO HORRIFIC !!! /me ashamed # -# generate inner/outer select lists for various limit dialects +# Generates inner/outer select lists for various limit dialects # which result in one or more subqueries (e.g. RNO, Top, RowNum) # Any non-root-table columns need to have their table qualifier # turned into a column alias (otherwise names in subqueries clash # and/or lose their source table) # -# returns inner/outer strings of SQL QUOTED selectors with aliases +# Returns inner/outer strings of SQL QUOTED selectors with aliases # (to be used in whatever select statement), and an alias index hashref # of QUOTED SEL => QUOTED ALIAS pairs (to maybe be used for string-subst -# higher up) -# -# If the $scan_order option is supplied, it signals that the limit dialect -# needs to order the outer side of the query, which in turn means that the -# inner select needs to bring out columns used in implicit (non-selected) -# orders, and the order condition itself needs to be realiased to the proper -# names in the outer query. -# -# In this case ($scan_order os true) we also return a hashref (order doesn't -# matter) of QUOTED EXTRA-SEL => QUOTED ALIAS pairs, which is a list of extra -# selectors that do *not* exist in the original select list +# higher up). +# If an order_by is supplied, the inner select needs to bring out columns +# used in implicit (non-selected) orders, and the order condition itself +# needs to be realiased to the proper names in the outer query. Thus we +# also return a hashref (order doesn't matter) of QUOTED EXTRA-SEL => +# QUOTED ALIAS pairs, which is a list of extra selectors that do *not* +# exist in the original select list sub _subqueried_limit_attrs { - my ($self, $rs_attrs, $scan_order) = @_; + my ($self, $rs_attrs) = @_; croak 'Limit dialect implementation usable only in the context of DBIC (missing $rs_attrs)' unless ref ($rs_attrs) eq 'HASH'; @@ -104,10 +100,12 @@ sub _subqueried_limit_attrs { $in_sel_index->{$sql_sel}++; $in_sel_index->{$self->_quote ($sql_alias)}++ if $sql_alias; -# this *may* turn out to be necessary, not sure yet -# my ($sql_unqualified_sel) = $sql_sel =~ / $re_sep (.+) $/x -# if ! ref $s; -# $in_sel_index->{$sql_unqualified_sel}++; + # record unqualified versions too, so we do not have + # to reselect the same column twice (in qualified and + # unqualified form) + if (! ref $s && $sql_sel =~ / $re_sep (.+) $/x) { + $in_sel_index->{$1}++; + } } @@ -130,20 +128,20 @@ sub _subqueried_limit_attrs { } } + # see if the order gives us anything my %extra_order_sel; - if ($scan_order) { - for my $chunk ($self->_order_by_chunks ($rs_attrs->{order_by})) { - # order with bind - $chunk = $chunk->[0] if (ref $chunk) eq 'ARRAY'; - $chunk =~ s/\s+ (?: ASC|DESC ) \s* $//ix; + for my $chunk ($self->_order_by_chunks ($rs_attrs->{order_by})) { + # order with bind + $chunk = $chunk->[0] if (ref $chunk) eq 'ARRAY'; + $chunk =~ s/\s+ (?: ASC|DESC ) \s* $//ix; - next if $in_sel_index->{$chunk}; + next if $in_sel_index->{$chunk}; - $extra_order_sel{$chunk} ||= $self->_quote ( - 'ORDER__BY__' . scalar keys %extra_order_sel - ); - } + $extra_order_sel{$chunk} ||= $self->_quote ( + 'ORDER__BY__' . scalar keys %extra_order_sel + ); } + return ( (map { join (', ', @$_ ) } ( \@in_sel, @@ -163,9 +161,8 @@ sub _RowNumberOver { or croak "Unrecognizable SELECT: $sql"; # get selectors, and scan the order_by (if any) - my ($in_sel, $out_sel, $alias_map, $extra_order_sel) = $self->_subqueried_limit_attrs ( - $rs_attrs, 'scan_order_by', - ); + my ($in_sel, $out_sel, $alias_map, $extra_order_sel) + = $self->_subqueried_limit_attrs ( $rs_attrs ); # make up an order if none exists my $requested_order = (delete $rs_attrs->{order_by}) || $self->_rno_default_order; @@ -207,7 +204,7 @@ SELECT $out_sel FROM ( SELECT $mid_sel, ROW_NUMBER() OVER( $rno_ord ) AS $idx_name FROM ( SELECT $in_sel ${sql}${group_having} ) $qalias -) $qalias WHERE $idx_name BETWEEN %d AND %d +) $qalias WHERE $idx_name BETWEEN %u AND %u EOS @@ -229,10 +226,10 @@ sub _SkipFirst { return sprintf ('SELECT %s%s%s%s', $offset - ? sprintf ('SKIP %d ', $offset) + ? sprintf ('SKIP %u ', $offset) : '' , - sprintf ('FIRST %d ', $rows), + sprintf ('FIRST %u ', $rows), $sql, $self->_parse_rs_attrs ($rs_attrs), ); @@ -246,9 +243,9 @@ sub _FirstSkip { or croak "Unrecognizable SELECT: $sql"; return sprintf ('SELECT %s%s%s%s', - sprintf ('FIRST %d ', $rows), + sprintf ('FIRST %u ', $rows), $offset - ? sprintf ('SKIP %d ', $offset) + ? sprintf ('SKIP %u ', $offset) : '' , $sql, @@ -276,7 +273,7 @@ SELECT $outsel FROM ( SELECT $outsel, ROWNUM $idx_name FROM ( SELECT $insel ${sql}${order_group_having} ) $qalias -) $qalias WHERE $idx_name BETWEEN %d AND %d +) $qalias WHERE $idx_name BETWEEN %u AND %u EOS @@ -294,7 +291,7 @@ sub _Top { # get selectors my ($in_sel, $out_sel, $alias_map, $extra_order_sel) - = $self->_subqueried_limit_attrs ($rs_attrs, 'outer_order_by'); + = $self->_subqueried_limit_attrs ($rs_attrs); my $requested_order = delete $rs_attrs->{order_by}; @@ -359,7 +356,7 @@ sub _Top { my $quoted_rs_alias = $self->_quote ($rs_attrs->{alias}); - $sql = sprintf ('SELECT TOP %d %s %s %s %s', + $sql = sprintf ('SELECT TOP %u %s %s %s %s', $rows + ($offset||0), $in_sel, $sql, @@ -367,7 +364,7 @@ sub _Top { $order_by_inner, ); - $sql = sprintf ('SELECT TOP %d %s FROM ( %s ) %s %s', + $sql = sprintf ('SELECT TOP %u %s FROM ( %s ) %s %s', $rows, $mid_sel, $sql, @@ -375,7 +372,7 @@ sub _Top { $order_by_reversed, ) if $offset; - $sql = sprintf ('SELECT TOP %d %s FROM ( %s ) %s %s', + $sql = sprintf ('SELECT TOP %u %s FROM ( %s ) %s %s', $rows, $out_sel, $sql, @@ -383,6 +380,112 @@ sub _Top { $order_by_requested, ) if ( ($offset && $order_by_requested) || ($mid_sel ne $out_sel) ); + $sql =~ s/\s*\n\s*/ /g; # easier to read in the debugger + return $sql; +} + +# This is the most evil limit "dialect" (more of a hack) for *really* +# stupid databases. It works by ordering the set by some unique column, +# and calculating amount of rows that have a less-er value (thus +# emulating a RowNum-like index). Of course this implies the set can +# only be ordered by a single unique columns. +sub _GenericSubQ { + my ($self, $sql, $rs_attrs, $rows, $offset) = @_; + + my $root_rsrc = $rs_attrs->{_rsroot_source_handle}->resolve; + my $root_tbl_name = $root_rsrc->name; + + # mangle the input sql as we will be replacing the selector + $sql =~ s/^ \s* SELECT \s+ .+? \s+ (?= \b FROM \b )//ix + or croak "Unrecognizable SELECT: $sql"; + + my ($order_by, @rest) = do { + local $self->{quote_char}; + $self->_order_by_chunks ($rs_attrs->{order_by}) + }; + + unless ( + $order_by + && + ! @rest + && + ( ! ref $order_by + || + ( ref $order_by eq 'ARRAY' and @$order_by == 1 ) + ) + ) { + croak ( + 'Generic Subquery Limit does not work on resultsets without an order, or resultsets ' + . 'with complex order criteria (multicolumn and/or functions). Provide a single, ' + . 'unique-column order criteria.' + ); + } + + ($order_by) = @$order_by if ref $order_by; + + $order_by =~ s/\s+ ( ASC|DESC ) \s* $//ix; + my $direction = lc ($1 || 'asc'); + + my ($unq_sort_col) = $order_by =~ /(?:^|\.)([^\.]+)$/; + + my $inf = $root_rsrc->storage->_resolve_column_info ( + $rs_attrs->{from}, [$order_by, $unq_sort_col] + ); + + my $ord_colinfo = $inf->{$order_by} || croak "Unable to determine source of order-criteria '$order_by'"; + + if ($ord_colinfo->{-result_source}->name ne $root_tbl_name) { + croak "Generic Subquery Limit order criteria can be only based on the root-source '" + . $root_rsrc->source_name . "' (aliased as '$rs_attrs->{alias}')"; + } + + # make sure order column is qualified + $order_by = "$rs_attrs->{alias}.$order_by" + unless $order_by =~ /^$rs_attrs->{alias}\./; + + my $is_u; + my $ucs = { $root_rsrc->unique_constraints }; + for (values %$ucs ) { + if (@$_ == 1 && "$rs_attrs->{alias}.$_->[0]" eq $order_by) { + $is_u++; + last; + } + } + croak "Generic Subquery Limit order criteria column '$order_by' must be unique (no unique constraint found)" + unless $is_u; + + my ($in_sel, $out_sel, $alias_map, $extra_order_sel) + = $self->_subqueried_limit_attrs ($rs_attrs); + + my $cmp_op = $direction eq 'desc' ? '>' : '<'; + my $count_tbl_alias = 'rownum__emulation'; + + my $order_group_having = $self->_parse_rs_attrs($rs_attrs); + + # add the order supplement (if any) as this is what will be used for the outer WHERE + $in_sel .= ", $_" for keys %{$extra_order_sel||{}}; + + $sql = sprintf (<_quote ($_) } ( + $rs_attrs->{alias}, + $root_tbl_name, + $count_tbl_alias, + "$count_tbl_alias.$unq_sort_col", + $order_by, + )), + $offset + ? sprintf ('BETWEEN %u AND %u', $offset, $offset + $rows - 1) + : sprintf ('< %u', $rows ) + , + ); + + $sql =~ s/\s*\n\s*/ /g; # easier to read in the debugger return $sql; } diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm index 0efff67..1320f16 100644 --- a/lib/DBIx/Class/Storage/DBI.pm +++ b/lib/DBIx/Class/Storage/DBI.pm @@ -1922,19 +1922,13 @@ sub _select_args { } # adjust limits - if ( - $attrs->{software_limit} - || - $sql_maker->_default_limit_syntax eq "GenericSubQ" - ) { - $attrs->{software_limit} = 1; - } - else { + if (defined $attrs->{rows}) { $self->throw_exception("rows attribute must be positive if present") - if (defined($attrs->{rows}) && !($attrs->{rows} > 0)); - + unless $attrs->{rows} > 0; + } + elsif (defined $attrs->{offset}) { # MySQL actually recommends this approach. I cringe. - $attrs->{rows} = 2**48 if not defined $attrs->{rows} and defined $attrs->{offset}; + $attrs->{rows} = 2**32; } my @limit; diff --git a/t/lib/DBICTest/Schema/BooksInLibrary.pm b/t/lib/DBICTest/Schema/BooksInLibrary.pm index 8da54e6..24d5657 100644 --- a/t/lib/DBICTest/Schema/BooksInLibrary.pm +++ b/t/lib/DBICTest/Schema/BooksInLibrary.pm @@ -27,6 +27,8 @@ __PACKAGE__->add_columns( ); __PACKAGE__->set_primary_key('id'); +__PACKAGE__->add_unique_constraint (['title']); + __PACKAGE__->resultset_attributes({where => { source => "Library" } }); __PACKAGE__->belongs_to ( owner => 'DBICTest::Schema::Owners', 'owner' ); diff --git a/t/sqlahacks/limit_dialects/generic_subq.t b/t/sqlahacks/limit_dialects/generic_subq.t new file mode 100644 index 0000000..7f924d1 --- /dev/null +++ b/t/sqlahacks/limit_dialects/generic_subq.t @@ -0,0 +1,125 @@ +use strict; +use warnings; + +use Test::More; +use lib qw(t/lib); +use DBICTest; +use DBIC::SqlMakerTest; + +my $schema = DBICTest->init_schema; + +$schema->storage->_sql_maker->limit_dialect ('GenericSubQ'); + +my $rs = $schema->resultset ('BooksInLibrary')->search ({}, { + '+columns' => [{ owner_name => 'owner.name' }], + join => 'owner', + rows => 2, + order_by => 'me.title', +}); + +is_same_sql_bind( + $rs->as_query, + '( + SELECT id, source, owner, title, price, + owner_name + FROM ( + SELECT me.id, me.source, me.owner, me.title, me.price, + owner.name AS owner_name + FROM books me + JOIN owners owner ON owner.id = me.owner + WHERE ( source = ? ) + ORDER BY me.title + ) me + WHERE + ( + SELECT COUNT(*) + FROM books rownum__emulation + WHERE rownum__emulation.title < me.title + ) < 2 + )', + [ [ 'source', 'Library' ] ], +); + +is_deeply ( + [ $rs->get_column ('title')->all ], + ['Best Recipe Cookbook', 'Dynamical Systems'], + 'Correct columns selected with rows', +); + +$schema->storage->_sql_maker->quote_char ('"'); +$schema->storage->_sql_maker->name_sep ('.'); + +$rs = $schema->resultset ('BooksInLibrary')->search ({}, { + order_by => { -desc => 'title' }, + '+select' => ['owner.name'], + '+as' => ['owner.name'], + join => 'owner', + rows => 3, + offset => 1, +}); + +is_same_sql_bind( + $rs->as_query, + '( + SELECT "id", "source", "owner", "title", "price", + "owner__name" + FROM ( + SELECT "me"."id", "me"."source", "me"."owner", "me"."title", "me"."price", + "owner"."name" AS "owner__name" + FROM "books" "me" + JOIN "owners" "owner" ON "owner"."id" = "me"."owner" + WHERE ( "source" = ? ) + ORDER BY "title" DESC + ) "me" + WHERE + ( + SELECT COUNT(*) + FROM "books" "rownum__emulation" + WHERE "rownum__emulation"."title" > "me"."title" + ) BETWEEN 1 AND 3 + )', + [ [ 'source', 'Library' ] ], +); + +is_deeply ( + [ $rs->get_column ('title')->all ], + [ 'Dynamical Systems', 'Best Recipe Cookbook' ], + 'Correct columns selected with rows', +); + +$rs = $schema->resultset ('BooksInLibrary')->search ({}, { + order_by => 'title', + 'select' => ['owner.name'], + 'as' => ['owner_name'], + join => 'owner', + offset => 1, +}); + +is_same_sql_bind( + $rs->as_query, + '( + SELECT "owner_name" + FROM ( + SELECT "owner"."name" AS "owner_name", "title" + FROM "books" "me" + JOIN "owners" "owner" ON "owner"."id" = "me"."owner" + WHERE ( "source" = ? ) + ORDER BY "title" + ) "me" + WHERE + ( + SELECT COUNT(*) + FROM "books" "rownum__emulation" + WHERE "rownum__emulation"."title" < "me"."title" + ) BETWEEN 1 AND 4294967295 + )', + [ [ 'source', 'Library' ] ], +); + +is_deeply ( + [ $rs->get_column ('owner_name')->all ], + [ ('Newton') x 2 ], + 'Correct columns selected with rows', +); + +done_testing; diff --git a/t/sqlahacks/limit_dialects/toplimit.t b/t/sqlahacks/limit_dialects/toplimit.t index b2840c2..16f64dc 100644 --- a/t/sqlahacks/limit_dialects/toplimit.t +++ b/t/sqlahacks/limit_dialects/toplimit.t @@ -153,19 +153,19 @@ is_same_sql_bind ( SELECT TOP 2 id, source, owner, title, price FROM ( SELECT TOP 2 - id, source, owner, title, price, ORDER__BY__1 + id, source, owner, title, price FROM ( SELECT TOP 5 - me.id, me.source, me.owner, me.title, me.price, title AS ORDER__BY__1 + me.id, me.source, me.owner, me.title, me.price FROM books me JOIN owners owner ON owner.id = me.owner WHERE ( source = ? ) GROUP BY title ORDER BY title ) me - ORDER BY ORDER__BY__1 DESC + ORDER BY title DESC ) me - ORDER BY ORDER__BY__1 + ORDER BY title ) me JOIN owners owner ON owner.id = me.owner WHERE ( source = ? )