X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBIx%2FClass%2FStorage%2FDBIHacks.pm;h=26cc4c88b0770abf9b7ef0d6db698dbe04dd2846;hb=92705f7f05161f7dba36d9b09dc6e893af7b2773;hp=d88975265a9b49a6d925d74fd29a9be403f19f63;hpb=90c9dd1da2eab272f75dc62a504f378a07737218;p=dbsrgits%2FDBIx-Class.git diff --git a/lib/DBIx/Class/Storage/DBIHacks.pm b/lib/DBIx/Class/Storage/DBIHacks.pm index d889752..26cc4c8 100644 --- a/lib/DBIx/Class/Storage/DBIHacks.pm +++ b/lib/DBIx/Class/Storage/DBIHacks.pm @@ -2,9 +2,24 @@ package #hide from PAUSE DBIx::Class::Storage::DBIHacks; # -# This module contains code that should never have seen the light of day, -# does not belong in the Storage, or is otherwise unfit for public -# display. The arrival of SQLA2 should immediately obsolete 90% of this +# This module contains code supporting a battery of special cases and tests for +# many corner cases pushing the envelope of what DBIC can do. When work on +# these utilities began in mid 2009 (51a296b402c) it wasn't immediately obvious +# that these pieces, despite their misleading on-first-sighe-flakiness, will +# become part of the generic query rewriting machinery of DBIC, allowing it to +# both generate and process queries representing incredibly complex sets with +# reasonable efficiency. +# +# Now (end of 2015), more than 6 years later the routines in this class have +# stabilized enough, and are meticulously covered with tests, to a point where +# an effort to formalize them into user-facing APIs might be worthwhile. +# +# An implementor working on publicizing and/or replacing the routines with a +# more modern SQL generation framework should keep in mind that pretty much all +# existing tests are constructed on the basis of real-world code used in +# production somewhere. +# +# Please hack on this responsibly ;) # use strict; @@ -13,9 +28,8 @@ use warnings; use base 'DBIx::Class::Storage'; use mro 'c3'; -use List::Util 'first'; use Scalar::Util 'blessed'; -use DBIx::Class::_Util qw(UNRESOLVABLE_CONDITION serialize); +use DBIx::Class::_Util qw(UNRESOLVABLE_CONDITION serialize dump_value); use SQL::Abstract qw(is_plain_value is_literal_value); use DBIx::Class::Carp; use namespace::clean; @@ -40,7 +54,11 @@ sub _prune_unused_joins { $self->_use_join_optimizer ); - my $orig_aliastypes = $self->_resolve_aliastypes_from_select_args($attrs); + my $orig_aliastypes = + $attrs->{_precalculated_aliastypes} + || + $self->_resolve_aliastypes_from_select_args($attrs) + ; my $new_aliastypes = { %$orig_aliastypes }; @@ -170,18 +188,27 @@ sub _adjust_select_args_for_complex_prefetch { push @{$inner_attrs->{as}}, $attrs->{as}[$i]; } - # We will need to fetch all native columns in the inner subquery, which may + my $inner_aliastypes = $self->_resolve_aliastypes_from_select_args($inner_attrs); + + # In the inner subq we will need to fetch *only* native columns which may # be a part of an *outer* join condition, or an order_by (which needs to be # preserved outside), or wheres. In other words everything but the inner # selector # We can not just fetch everything because a potential has_many restricting # join collapse *will not work* on heavy data types. - my $connecting_aliastypes = $self->_resolve_aliastypes_from_select_args({ - %$inner_attrs, - select => [], - }); - for (sort map { keys %{$_->{-seen_columns}||{}} } map { values %$_ } values %$connecting_aliastypes) { + # essentially a map of all non-selecting seen columns + # the sort is there for a nicer select list + for ( + sort + map + { keys %{$_->{-seen_columns}||{}} } + map + { values %{$inner_aliastypes->{$_}} } + grep + { $_ ne 'selecting' } + keys %$inner_aliastypes + ) { my $ci = $colinfo->{$_} or next; if ( $ci->{-source_alias} eq $root_alias @@ -204,8 +231,11 @@ sub _adjust_select_args_for_complex_prefetch { local $self->{_use_join_optimizer} = 1; # throw away multijoins since we def. do not care about those inside the subquery - ($inner_attrs->{from}, my $inner_aliastypes) = $self->_prune_unused_joins ({ - %$inner_attrs, _force_prune_multiplying_joins => 1 + # $inner_aliastypes *will* be redefined at this point + ($inner_attrs->{from}, $inner_aliastypes ) = $self->_prune_unused_joins ({ + %$inner_attrs, + _force_prune_multiplying_joins => 1, + _precalculated_aliastypes => $inner_aliastypes, }); # uh-oh a multiplier (which is not us) left in, this is a problem for limits @@ -313,7 +343,7 @@ sub _adjust_select_args_for_complex_prefetch { ) { push @outer_from, $j } - elsif (first { $_->{$alias} } @outer_nonselecting_chains ) { + elsif (grep { $_->{$alias} } @outer_nonselecting_chains ) { push @outer_from, $j; $may_need_outer_group_by ||= $outer_aliastypes->{multiplying}{$alias} ? 1 : 0; } @@ -330,27 +360,53 @@ sub _adjust_select_args_for_complex_prefetch { }); } - # This is totally horrific - the {where} ends up in both the inner and outer query - # Unfortunately not much can be done until SQLA2 introspection arrives, and even - # then if where conditions apply to the *right* side of the prefetch, you may have - # to both filter the inner select (e.g. to apply a limit) and then have to re-filter - # the outer select to exclude joins you didn't want in the first place + # FIXME: The {where} ends up in both the inner and outer query, i.e. *twice* + # + # This is rather horrific, and while we currently *do* have enough + # introspection tooling available to attempt a stab at properly deciding + # whether or not to include the where condition on the outside, the + # machinery is still too slow to apply it here. + # Thus for the time being we do not attempt any sanitation of the where + # clause and just pass it through on both sides of the subquery. This *will* + # be addressed at a later stage, most likely after folding the SQL generator + # into SQLMaker proper # # OTOH it can be seen as a plus: (notes that this query would make a DBA cry ;) + # return $outer_attrs; } +# This is probably the ickiest, yet most relied upon part of the codebase: +# this is the place where we take arbitrary SQL input and break it into its +# constituent parts, making sure we know which *sources* are used in what +# *capacity* ( selecting / restricting / grouping / ordering / joining, etc ) +# Although the method is pretty horrific, the worst thing that can happen is +# for a classification failure, which in turn will result in a vocal exception, +# and will lead to a relatively prompt fix. +# The code has been slowly improving and is covered with a formiddable battery +# of tests, so can be considered "reliably stable" at this point (Oct 2015). +# +# A note to implementors attempting to "replace" this - keep in mind that while +# there are multiple optimization avenues, the actual "scan literal elements" +# part *MAY NEVER BE REMOVED*, even if it is limited only ot the (future) AST +# nodes that are deemed opaque (i.e. contain literal expressions). The use of +# blackbox literals is at this point firmly a user-facing API, and is one of +# *the* reasons DBIC remains as flexible as it is. In other words, when working +# on this keep in mind that the following is widespread and *encouraged* way +# of using DBIC in the wild when push comes to shove: +# +# $rs->search( {}, { +# select => \[ $random, @stuff], +# from => \[ $random, @stuff ], +# where => \[ $random, @stuff ], +# group_by => \[ $random, @stuff ], +# order_by => \[ $random, @stuff ], +# } ) # -# I KNOW THIS SUCKS! GET SQLA2 OUT THE DOOR SO THIS CAN DIE! +# Various incarnations of the above are reflected in many of the tests. If one +# gets to fail, you get to fix it. A "this is crazy, nobody does that" is not +# acceptable going forward. # -# Due to a lack of SQLA2 we fall back to crude scans of all the -# select/where/order/group attributes, in order to determine what -# aliases are needed to fulfill the query. This information is used -# throughout the code to prune unnecessary JOINs from the queries -# in an attempt to reduce the execution time. -# Although the method is pretty horrific, the worst thing that can -# happen is for it to fail due to some scalar SQL, which in turn will -# result in a vocal exception. sub _resolve_aliastypes_from_select_args { my ( $self, $attrs ) = @_; @@ -448,27 +504,31 @@ sub _resolve_aliastypes_from_select_args { ], }; + # we will be bulk-scanning anyway - pieces will not matter in that case, + # thus join everything up # throw away empty-string chunks, and make sure no binds snuck in # note that we operate over @{$to_scan->{$type}}, hence the # semi-mindbending ... map ... for values ... - ( $_ = [ map { + ( $_ = join ' ', map { - (not $_) ? () - : (length ref $_) ? (require Data::Dumper::Concise && $self->throw_exception( - "Unexpected ref in scan-plan: " . Data::Dumper::Concise::Dumper($_) - )) - : $_ + ( ! defined $_ ) ? () + : ( length ref $_ ) ? $self->throw_exception( + "Unexpected ref in scan-plan: " . dump_value $_ + ) + : ( $_ =~ /^\s*$/ ) ? () + : $_ - } @$_ ] ) for values %$to_scan; + } @$_ ) for values %$to_scan; # throw away empty to-scan's ( - @{$to_scan->{$_}} + length $to_scan->{$_} or delete $to_scan->{$_} ) for keys %$to_scan; + # these will be used for matching in the loop below my $all_aliases = join ' | ', map { quotemeta $_ } keys %$alias_list; my $fq_col_re = qr/ @@ -477,6 +537,7 @@ sub _resolve_aliastypes_from_select_args { \b ( $all_aliases ) \. ( [^\s\)\($rquote]+ )? /x; + my $all_unq_columns = join ' | ', map { quotemeta $_ } @@ -486,7 +547,11 @@ sub _resolve_aliastypes_from_select_args { keys %$colinfo ; my $unq_col_re = $all_unq_columns - ? qr/ $lquote ( $all_unq_columns ) $rquote /x + ? qr/ + $lquote ( $all_unq_columns ) $rquote + | + (?: \A | \s ) ( $all_unq_columns ) (?: \s | \z ) + /x : undef ; @@ -494,26 +559,13 @@ sub _resolve_aliastypes_from_select_args { # the actual scan, per type for my $type (keys %$to_scan) { - # first see if we have any exact matches (qualified or unqualified) - for my $piece (@{$to_scan->{$type}}) { - if ($colinfo->{$piece} and my $alias = $colinfo->{$piece}{-source_alias}) { - $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] }; - $aliases_by_type->{$type}{$alias}{-seen_columns}{$colinfo->{$piece}{-fq_colname}} = $piece; - } - } - - - # we will be bulk-scanning anyway - pieces will not matter in that case - # (unlike in the direct-equivalence above) - my $scan_string = join ' ', @{$to_scan->{$type}}; - # now loop through all fully qualified columns and get the corresponding # alias (should work even if they are in scalarrefs) # - # The regex matches in multiples of 4, with one of the two pairs being + # The regex captures in multiples of 4, with one of the two pairs being # undef. There may be a *lot* of matches, hence the convoluted loop - my @matches = $scan_string =~ /$fq_col_re/g; + my @matches = $to_scan->{$type} =~ /$fq_col_re/g; my $i = 0; while( $i < $#matches ) { @@ -535,7 +587,10 @@ sub _resolve_aliastypes_from_select_args { # now loop through unqualified column names, and try to locate them within # the chunks, if there are any unqualified columns in the 1st place next unless $unq_col_re; - for ( $scan_string =~ /$unq_col_re/g ) { + + # The regex captures in multiples of 2, one of the two being undef + for ( $to_scan->{$type} =~ /$unq_col_re/g ) { + defined $_ or next; my $alias = $colinfo->{$_}{-source_alias} or next; $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] }; $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_ @@ -663,12 +718,9 @@ sub _group_over_selection { # of the external order and convert them to MIN(X) for ASC or MAX(X) # for DESC, and group_by the root columns. The end result should be # exactly what we expect + # - # FIXME - this code is a joke, will need to be completely rewritten in - # the DQ branch. But I need to push a POC here, otherwise the - # pesky tests won't pass - # wrap any part of the order_by that "responds" to an ordering alias - # into a MIN/MAX + # both populated on the first loop over $o_idx $sql_maker ||= $self->sql_maker; $order_chunks ||= [ map { ref $_ eq 'ARRAY' ? $_ : [ $_ ] } $sql_maker->_order_by_chunks($attrs->{order_by}) @@ -676,9 +728,11 @@ sub _group_over_selection { my ($chunk, $is_desc) = $sql_maker->_split_order_chunk($order_chunks->[$o_idx][0]); + # we reached that far - wrap any part of the order_by that "responded" + # to an ordering alias into a MIN/MAX $new_order_by[$o_idx] = \[ sprintf( '%s( %s )%s', - ($is_desc ? 'MAX' : 'MIN'), + $self->_minmax_operator_for_datatype($chunk_ci->{data_type}, $is_desc), $chunk, ($is_desc ? ' DESC' : ''), ), @@ -706,6 +760,12 @@ sub _group_over_selection { ); } +sub _minmax_operator_for_datatype { + #my ($self, $datatype, $want_max) = @_; + + $_[2] ? 'MAX' : 'MIN'; +} + sub _resolve_ident_sources { my ($self, $ident) = @_; @@ -1026,7 +1086,9 @@ sub _extract_colinfo_of_stable_main_source_order_by_portion { # resultset {where} stacks # # FIXME - while relatively robust, this is still imperfect, one of the first -# things to tackle with DQ +# things to tackle when we get access to a formalized AST. Note that this code +# is covered by a *ridiculous* amount of tests, so starting with porting this +# code would be a rather good exercise sub _collapse_cond { my ($self, $where, $where_is_anded_array) = @_; @@ -1047,7 +1109,7 @@ sub _collapse_cond { for (sort keys %$chunk) { # Match SQLA 1.79 behavior - if ($_ eq '') { + unless( length $_ ) { is_literal_value($chunk->{$_}) ? carp 'Hash-pairs consisting of an empty string with a literal are deprecated, use -and => [ $literal ] instead' : $self->throw_exception("Supplying an empty left hand side argument is not supported in hash-pairs") @@ -1065,7 +1127,7 @@ sub _collapse_cond { # Match SQLA 1.79 behavior $self->throw_exception("Supplying an empty left hand side argument is not supported in array-pairs") - if $where_is_anded_array and (! defined $chunk or $chunk eq ''); + if $where_is_anded_array and (! defined $chunk or ! length $chunk); push @pairs, $chunk, shift @pieces; } @@ -1260,7 +1322,7 @@ sub _collapse_cond_unroll_pairs { while (@$pairs) { my ($lhs, $rhs) = splice @$pairs, 0, 2; - if ($lhs eq '') { + if (! length $lhs) { push @conds, $self->_collapse_cond($rhs); } elsif ( $lhs =~ /^\-and$/i ) { @@ -1291,11 +1353,10 @@ sub _collapse_cond_unroll_pairs { # extra sanity check if (keys %$p > 1) { - require Data::Dumper::Concise; local $Data::Dumper::Deepcopy = 1; $self->throw_exception( "Internal error: unexpected collapse unroll:" - . Data::Dumper::Concise::Dumper { in => { $lhs => $rhs }, out => $p } + . dump_value { in => { $lhs => $rhs }, out => $p } ); }