X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBIx%2FClass%2FStorage%2FDBIHacks.pm;h=14410b700ddb47db9f593c7e811bb60773a12fef;hb=d52c4a75538bd15d1fa531a63c6ec45b8943dfd0;hp=ef890921d6c179e3715a0ea165bfddc459c81126;hpb=b5ce6748f58040ca877fd05e8f004b14d46b2ba9;p=dbsrgits%2FDBIx-Class.git diff --git a/lib/DBIx/Class/Storage/DBIHacks.pm b/lib/DBIx/Class/Storage/DBIHacks.pm index ef89092..14410b7 100644 --- a/lib/DBIx/Class/Storage/DBIHacks.pm +++ b/lib/DBIx/Class/Storage/DBIHacks.pm @@ -2,9 +2,24 @@ package #hide from PAUSE DBIx::Class::Storage::DBIHacks; # -# This module contains code that should never have seen the light of day, -# does not belong in the Storage, or is otherwise unfit for public -# display. The arrival of SQLA2 should immediately obsolete 90% of this +# This module contains code supporting a battery of special cases and tests for +# many corner cases pushing the envelope of what DBIC can do. When work on +# these utilities began in mid 2009 (51a296b402c) it wasn't immediately obvious +# that these pieces, despite their misleading on-first-sighe-flakiness, will +# become part of the generic query rewriting machinery of DBIC, allowing it to +# both generate and process queries representing incredibly complex sets with +# reasonable efficiency. +# +# Now (end of 2015), more than 6 years later the routines in this class have +# stabilized enough, and are meticulously covered with tests, to a point where +# an effort to formalize them into user-facing APIs might be worthwhile. +# +# An implementor working on publicizing and/or replacing the routines with a +# more modern SQL generation framework should keep in mind that pretty much all +# existing tests are constructed on the basis of real-world code used in +# production somewhere. +# +# Please hack on this responsibly ;) # use strict; @@ -15,9 +30,9 @@ use mro 'c3'; use List::Util 'first'; use Scalar::Util 'blessed'; -use Sub::Name 'subname'; -use DBIx::Class::_Util 'UNRESOLVABLE_CONDITION'; +use DBIx::Class::_Util qw(UNRESOLVABLE_CONDITION serialize); use SQL::Abstract qw(is_plain_value is_literal_value); +use DBIx::Class::Carp; use namespace::clean; # @@ -40,7 +55,11 @@ sub _prune_unused_joins { $self->_use_join_optimizer ); - my $orig_aliastypes = $self->_resolve_aliastypes_from_select_args($attrs); + my $orig_aliastypes = + $attrs->{_precalculated_aliastypes} + || + $self->_resolve_aliastypes_from_select_args($attrs) + ; my $new_aliastypes = { %$orig_aliastypes }; @@ -112,8 +131,8 @@ sub _adjust_select_args_for_complex_prefetch { my $outer_attrs = { %$attrs }; delete @{$outer_attrs}{qw(from bind rows offset group_by _grouped_by_distinct having)}; - my $inner_attrs = { %$attrs }; - delete @{$inner_attrs}{qw(for collapse select as _related_results_construction)}; + my $inner_attrs = { %$attrs, _simple_passthrough_construction => 1 }; + delete @{$inner_attrs}{qw(for collapse select as)}; # there is no point of ordering the insides if there is no limit delete $inner_attrs->{order_by} if ( @@ -170,18 +189,27 @@ sub _adjust_select_args_for_complex_prefetch { push @{$inner_attrs->{as}}, $attrs->{as}[$i]; } - # We will need to fetch all native columns in the inner subquery, which may + my $inner_aliastypes = $self->_resolve_aliastypes_from_select_args($inner_attrs); + + # In the inner subq we will need to fetch *only* native columns which may # be a part of an *outer* join condition, or an order_by (which needs to be # preserved outside), or wheres. In other words everything but the inner # selector # We can not just fetch everything because a potential has_many restricting # join collapse *will not work* on heavy data types. - my $connecting_aliastypes = $self->_resolve_aliastypes_from_select_args({ - %$inner_attrs, - select => [], - }); - for (sort map { keys %{$_->{-seen_columns}||{}} } map { values %$_ } values %$connecting_aliastypes) { + # essentially a map of all non-selecting seen columns + # the sort is there for a nicer select list + for ( + sort + map + { keys %{$_->{-seen_columns}||{}} } + map + { values %{$inner_aliastypes->{$_}} } + grep + { $_ ne 'selecting' } + keys %$inner_aliastypes + ) { my $ci = $colinfo->{$_} or next; if ( $ci->{-source_alias} eq $root_alias @@ -204,8 +232,11 @@ sub _adjust_select_args_for_complex_prefetch { local $self->{_use_join_optimizer} = 1; # throw away multijoins since we def. do not care about those inside the subquery - ($inner_attrs->{from}, my $inner_aliastypes) = $self->_prune_unused_joins ({ - %$inner_attrs, _force_prune_multiplying_joins => 1 + # $inner_aliastypes *will* be redefined at this point + ($inner_attrs->{from}, $inner_aliastypes ) = $self->_prune_unused_joins ({ + %$inner_attrs, + _force_prune_multiplying_joins => 1, + _precalculated_aliastypes => $inner_aliastypes, }); # uh-oh a multiplier (which is not us) left in, this is a problem for limits @@ -330,27 +361,53 @@ sub _adjust_select_args_for_complex_prefetch { }); } - # This is totally horrific - the {where} ends up in both the inner and outer query - # Unfortunately not much can be done until SQLA2 introspection arrives, and even - # then if where conditions apply to the *right* side of the prefetch, you may have - # to both filter the inner select (e.g. to apply a limit) and then have to re-filter - # the outer select to exclude joins you didn't want in the first place + # FIXME: The {where} ends up in both the inner and outer query, i.e. *twice* + # + # This is rather horrific, and while we currently *do* have enough + # introspection tooling available to attempt a stab at properly deciding + # whether or not to include the where condition on the outside, the + # machinery is still too slow to apply it here. + # Thus for the time being we do not attempt any sanitation of the where + # clause and just pass it through on both sides of the subquery. This *will* + # be addressed at a later stage, most likely after folding the SQL generator + # into SQLMaker proper # # OTOH it can be seen as a plus: (notes that this query would make a DBA cry ;) + # return $outer_attrs; } +# This is probably the ickiest, yet most relied upon part of the codebase: +# this is the place where we take arbitrary SQL input and break it into its +# constituent parts, making sure we know which *sources* are used in what +# *capacity* ( selecting / restricting / grouping / ordering / joining, etc ) +# Although the method is pretty horrific, the worst thing that can happen is +# for a classification failure, which in turn will result in a vocal exception, +# and will lead to a relatively prompt fix. +# The code has been slowly improving and is covered with a formiddable battery +# of tests, so can be considered "reliably stable" at this point (Oct 2015). +# +# A note to implementors attempting to "replace" this - keep in mind that while +# there are multiple optimization avenues, the actual "scan literal elements" +# part *MAY NEVER BE REMOVED*, even if it is limited only ot the (future) AST +# nodes that are deemed opaque (i.e. contain literal expressions). The use of +# blackbox literals is at this point firmly a user-facing API, and is one of +# *the* reasons DBIC remains as flexible as it is. In other words, when working +# on this keep in mind that the following is widespread and *encouraged* way +# of using DBIC in the wild when push comes to shove: # -# I KNOW THIS SUCKS! GET SQLA2 OUT THE DOOR SO THIS CAN DIE! +# $rs->search( {}, { +# select => \[ $random, @stuff], +# from => \[ $random, @stuff ], +# where => \[ $random, @stuff ], +# group_by => \[ $random, @stuff ], +# order_by => \[ $random, @stuff ], +# } ) +# +# Various incarnations of the above are reflected in many of the tests. If one +# gets to fail, you get to fix it. A "this is crazy, nobody does that" is not +# acceptable going forward. # -# Due to a lack of SQLA2 we fall back to crude scans of all the -# select/where/order/group attributes, in order to determine what -# aliases are needed to fulfill the query. This information is used -# throughout the code to prune unnecessary JOINs from the queries -# in an attempt to reduce the execution time. -# Although the method is pretty horrific, the worst thing that can -# happen is for it to fail due to some scalar SQL, which in turn will -# result in a vocal exception. sub _resolve_aliastypes_from_select_args { my ( $self, $attrs ) = @_; @@ -417,7 +474,7 @@ sub _resolve_aliastypes_from_select_args { # generate sql chunks my $to_scan = { restricting => [ - $sql_maker->_recurse_where ($attrs->{where}), + ($sql_maker->_recurse_where ($attrs->{where}))[0], $sql_maker->_parse_rs_attrs ({ having => $attrs->{having} }), ], grouping => [ @@ -430,111 +487,143 @@ sub _resolve_aliastypes_from_select_args { ), ], selecting => [ - map { ($sql_maker->_recurse_fields($_))[0] } @{$attrs->{select}}, + # kill all selectors which look like a proper subquery + # this is a sucky heuristic *BUT* - if we get it wrong the query will simply + # fail to run, so we are relatively safe + grep + { $_ !~ / \A \s* \( \s* SELECT \s+ .+? \s+ FROM \s+ .+? \) \s* \z /xsi } + map + { ($sql_maker->_recurse_fields($_))[0] } + @{$attrs->{select}} ], - ordering => [ - map { $_->[0] } $self->_extract_order_criteria ($attrs->{order_by}, $sql_maker), + ordering => [ map + { + ( my $sql = (ref $_ ? $_->[0] : $_) ) =~ s/ \s+ (?: ASC | DESC ) \s* \z //xi; + $sql; + } + $sql_maker->_order_by_chunks( $attrs->{order_by} ), ], }; - # throw away empty chunks and all 2-value arrayrefs: the thinking is that these are - # bind value specs left in by the sloppy renderer above. It is ok to do this - # at this point, since we are going to end up rewriting this crap anyway - for my $v (values %$to_scan) { - my @nv; - for (@$v) { - next if ( - ! defined $_ - or - ( - ref $_ eq 'ARRAY' - and - ( @$_ == 0 or @$_ == 2 ) - ) - ); + # we will be bulk-scanning anyway - pieces will not matter in that case, + # thus join everything up + # throw away empty-string chunks, and make sure no binds snuck in + # note that we operate over @{$to_scan->{$type}}, hence the + # semi-mindbending ... map ... for values ... + ( $_ = join ' ', map { + + ( ! defined $_ ) ? () + : ( length ref $_ ) ? (require Data::Dumper::Concise && $self->throw_exception( + "Unexpected ref in scan-plan: " . Data::Dumper::Concise::Dumper($_) + )) + : ( $_ =~ /^\s*$/ ) ? () + : $_ + + } @$_ ) for values %$to_scan; + + # throw away empty to-scan's + ( + length $to_scan->{$_} + or + delete $to_scan->{$_} + ) for keys %$to_scan; - if (ref $_) { - require Data::Dumper::Concise; - $self->throw_exception("Unexpected ref in scan-plan: " . Data::Dumper::Concise::Dumper($v) ); - } - push @nv, $_; - } - $v = \@nv; - } + # these will be used for matching in the loop below + my $all_aliases = join ' | ', map { quotemeta $_ } keys %$alias_list; + my $fq_col_re = qr/ + $lquote ( $all_aliases ) $rquote $sep (?: $lquote ([^$rquote]+) $rquote )? + | + \b ( $all_aliases ) \. ( [^\s\)\($rquote]+ )? + /x; - # kill all selectors which look like a proper subquery - # this is a sucky heuristic *BUT* - if we get it wrong the query will simply - # fail to run, so we are relatively safe - $to_scan->{selecting} = [ grep { - $_ !~ / \A \s* \( \s* SELECT \s+ .+? \s+ FROM \s+ .+? \) \s* \z /xsi - } @{ $to_scan->{selecting} || [] } ]; - # first see if we have any exact matches (qualified or unqualified) + my $all_unq_columns = join ' | ', + map + { quotemeta $_ } + grep + # using a regex here shows up on profiles, boggle + { index( $_, '.') < 0 } + keys %$colinfo + ; + my $unq_col_re = $all_unq_columns + ? qr/ + $lquote ( $all_unq_columns ) $rquote + | + (?: \A | \s ) ( $all_unq_columns ) (?: \s | \z ) + /x + : undef + ; + + + # the actual scan, per type for my $type (keys %$to_scan) { - for my $piece (@{$to_scan->{$type}}) { - if ($colinfo->{$piece} and my $alias = $colinfo->{$piece}{-source_alias}) { - $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] }; - $aliases_by_type->{$type}{$alias}{-seen_columns}{$colinfo->{$piece}{-fq_colname}} = $piece; - } - } - } - # now loop through all fully qualified columns and get the corresponding - # alias (should work even if they are in scalarrefs) - for my $alias (keys %$alias_list) { - my $al_re = qr/ - $lquote $alias $rquote $sep (?: $lquote ([^$rquote]+) $rquote )? - | - \b $alias \. ([^\s\)\($rquote]+)? - /x; - - for my $type (keys %$to_scan) { - for my $piece (@{$to_scan->{$type}}) { - if (my @matches = $piece =~ /$al_re/g) { - $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] }; - $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = "$alias.$_" - for grep { defined $_ } @matches; - } + + # now loop through all fully qualified columns and get the corresponding + # alias (should work even if they are in scalarrefs) + # + # The regex captures in multiples of 4, with one of the two pairs being + # undef. There may be a *lot* of matches, hence the convoluted loop + my @matches = $to_scan->{$type} =~ /$fq_col_re/g; + my $i = 0; + while( $i < $#matches ) { + + if ( + defined $matches[$i] + ) { + $aliases_by_type->{$type}{$matches[$i]} ||= { -parents => $alias_list->{$matches[$i]}{-join_path}||[] }; + + $aliases_by_type->{$type}{$matches[$i]}{-seen_columns}{"$matches[$i].$matches[$i+1]"} = "$matches[$i].$matches[$i+1]" + if defined $matches[$i+1]; + + $i += 2; } + + $i += 2; } - } - # now loop through unqualified column names, and try to locate them within - # the chunks - for my $col (keys %$colinfo) { - next if $col =~ / \. /x; # if column is qualified it was caught by the above - my $col_re = qr/ $lquote ($col) $rquote /x; + # now loop through unqualified column names, and try to locate them within + # the chunks, if there are any unqualified columns in the 1st place + next unless $unq_col_re; - for my $type (keys %$to_scan) { - for my $piece (@{$to_scan->{$type}}) { - if ( my @matches = $piece =~ /$col_re/g) { - my $alias = $colinfo->{$col}{-source_alias}; - $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] }; - $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_ - for grep { defined $_ } @matches; - } - } + # The regex captures in multiples of 2, one of the two being undef + for ( $to_scan->{$type} =~ /$unq_col_re/g ) { + defined $_ or next; + my $alias = $colinfo->{$_}{-source_alias} or next; + $aliases_by_type->{$type}{$alias} ||= { -parents => $alias_list->{$alias}{-join_path}||[] }; + $aliases_by_type->{$type}{$alias}{-seen_columns}{"$alias.$_"} = $_ } } + # Add any non-left joins to the restriction list (such joins are indeed restrictions) - for my $j (values %$alias_list) { - my $alias = $j->{-alias} or next; - $aliases_by_type->{restricting}{$alias} ||= { -parents => $j->{-join_path}||[] } if ( - (not $j->{-join_type}) + ( + $_->{-alias} + and + ! $aliases_by_type->{restricting}{ $_->{-alias} } + and + ( + not $_->{-join_type} or - ($j->{-join_type} !~ /^left (?: \s+ outer)? $/xi) - ); - } + $_->{-join_type} !~ /^left (?: \s+ outer)? $/xi + ) + and + $aliases_by_type->{restricting}{ $_->{-alias} } = { -parents => $_->{-join_path}||[] } + ) for values %$alias_list; - for (keys %$aliases_by_type) { - delete $aliases_by_type->{$_} unless keys %{$aliases_by_type->{$_}}; - } - return $aliases_by_type; + # final cleanup + ( + keys %{$aliases_by_type->{$_}} + or + delete $aliases_by_type->{$_} + ) for keys %$aliases_by_type; + + + $aliases_by_type; } # This is the engine behind { distinct => 1 } and the general @@ -630,12 +719,7 @@ sub _group_over_selection { # of the external order and convert them to MIN(X) for ASC or MAX(X) # for DESC, and group_by the root columns. The end result should be # exactly what we expect - - # FIXME - this code is a joke, will need to be completely rewritten in - # the DQ branch. But I need to push a POC here, otherwise the - # pesky tests won't pass - # wrap any part of the order_by that "responds" to an ordering alias - # into a MIN/MAX + # $sql_maker ||= $self->sql_maker; $order_chunks ||= [ map { ref $_ eq 'ARRAY' ? $_ : [ $_ ] } $sql_maker->_order_by_chunks($attrs->{order_by}) @@ -643,6 +727,8 @@ sub _group_over_selection { my ($chunk, $is_desc) = $sql_maker->_split_order_chunk($order_chunks->[$o_idx][0]); + # we reached that far - wrap any part of the order_by that "responded" + # to an ordering alias into a MIN/MAX $new_order_by[$o_idx] = \[ sprintf( '%s( %s )%s', ($is_desc ? 'MAX' : 'MIN'), @@ -715,53 +801,63 @@ sub _resolve_column_info { return {} if $colnames and ! @$colnames; - my $alias2src = $self->_resolve_ident_sources($ident); + my $sources = $self->_resolve_ident_sources($ident); + + $_ = { rsrc => $_, colinfos => $_->columns_info } + for values %$sources; my (%seen_cols, @auto_colnames); # compile a global list of column names, to be able to properly # disambiguate unqualified column names (if at all possible) - for my $alias (keys %$alias2src) { - my $rsrc = $alias2src->{$alias}; - for my $colname ($rsrc->columns) { - push @{$seen_cols{$colname}}, $alias; - push @auto_colnames, "$alias.$colname" unless $colnames; - } + for my $alias (keys %$sources) { + ( + ++$seen_cols{$_}{$alias} + and + ! $colnames + and + push @auto_colnames, "$alias.$_" + ) for keys %{ $sources->{$alias}{colinfos} }; } $colnames ||= [ @auto_colnames, - grep { @{$seen_cols{$_}} == 1 } (keys %seen_cols), + ( grep { keys %{$seen_cols{$_}} == 1 } keys %seen_cols ), ]; - my (%return, $colinfos); - foreach my $col (@$colnames) { - my ($source_alias, $colname) = $col =~ m/^ (?: ([^\.]+) \. )? (.+) $/x; + my %return; + for (@$colnames) { + my ($colname, $source_alias) = reverse split /\./, $_; - # if the column was seen exactly once - we know which rsrc it came from - $source_alias ||= $seen_cols{$colname}[0] - if ($seen_cols{$colname} and @{$seen_cols{$colname}} == 1); + my $assumed_alias = + $source_alias + || + # if the column was seen exactly once - we know which rsrc it came from + ( + $seen_cols{$colname} + and + keys %{$seen_cols{$colname}} == 1 + and + ( %{$seen_cols{$colname}} )[0] + ) + || + next + ; - next unless $source_alias; + $self->throw_exception( + "No such column '$colname' on source " . $sources->{$assumed_alias}{rsrc}->source_name + ) unless $seen_cols{$colname}{$assumed_alias}; - my $rsrc = $alias2src->{$source_alias} - or next; - - $return{$col} = { - %{ - ( $colinfos->{$source_alias} ||= $rsrc->columns_info )->{$colname} - || - $self->throw_exception( - "No such column '$colname' on source " . $rsrc->source_name - ); - }, - -result_source => $rsrc, - -source_alias => $source_alias, - -fq_colname => $col eq $colname ? "$source_alias.$col" : $col, + $return{$_} = { + %{ $sources->{$assumed_alias}{colinfos}{$colname} }, + -result_source => $sources->{$assumed_alias}{rsrc}, + -source_alias => $assumed_alias, + -fq_colname => "$assumed_alias.$colname", -colname => $colname, }; - $return{"$source_alias.$colname"} = $return{$col} if $col eq $colname; + $return{"$assumed_alias.$colname"} = $return{$_} + unless $source_alias; } return \%return; @@ -983,10 +1079,14 @@ sub _extract_colinfo_of_stable_main_source_order_by_portion { # resultset {where} stacks # # FIXME - while relatively robust, this is still imperfect, one of the first -# things to tackle with DQ +# things to tackle when we get access to a formalized AST. Note that this code +# is covered by a *ridiculous* amount of tests, so starting with porting this +# code would be a rather good exercise sub _collapse_cond { my ($self, $where, $where_is_anded_array) = @_; + my $fin; + if (! $where) { return; } @@ -999,17 +1099,33 @@ sub _collapse_cond { my $chunk = shift @pieces; if (ref $chunk eq 'HASH') { - push @pairs, map { [ $_ => $chunk->{$_} ] } sort keys %$chunk; + for (sort keys %$chunk) { + + # Match SQLA 1.79 behavior + unless( length $_ ) { + is_literal_value($chunk->{$_}) + ? carp 'Hash-pairs consisting of an empty string with a literal are deprecated, use -and => [ $literal ] instead' + : $self->throw_exception("Supplying an empty left hand side argument is not supported in hash-pairs") + ; + } + + push @pairs, $_ => $chunk->{$_}; + } } elsif (ref $chunk eq 'ARRAY') { - push @pairs, [ -or => $chunk ] + push @pairs, -or => $chunk if @$chunk; } - elsif ( ! ref $chunk) { - push @pairs, [ $chunk, shift @pieces ]; + elsif ( ! length ref $chunk) { + + # Match SQLA 1.79 behavior + $self->throw_exception("Supplying an empty left hand side argument is not supported in array-pairs") + if $where_is_anded_array and (! defined $chunk or ! length $chunk); + + push @pairs, $chunk, shift @pieces; } else { - push @pairs, [ '', $chunk ]; + push @pairs, '', $chunk; } } @@ -1019,25 +1135,31 @@ sub _collapse_cond { or return; # Consolidate various @conds back into something more compact - my $fin; - for my $c (@conds) { if (ref $c ne 'HASH') { push @{$fin->{-and}}, $c; } else { for my $col (sort keys %$c) { - if (exists $fin->{$col}) { - my ($l, $r) = ($fin->{$col}, $c->{$col}); - (ref $_ ne 'ARRAY' or !@$_) and $_ = [ -and => $_ ] for ($l, $r); - - if (@$l and @$r and $l->[0] eq $r->[0] and $l->[0] eq '-and') { - $fin->{$col} = [ -and => map { @$_[1..$#$_] } ($l, $r) ]; - } - else { - $fin->{$col} = [ -and => $fin->{$col}, $c->{$col} ]; - } + # consolidate all -and nodes + if ($col =~ /^\-and$/i) { + push @{$fin->{-and}}, + ref $c->{$col} eq 'ARRAY' ? @{$c->{$col}} + : ref $c->{$col} eq 'HASH' ? %{$c->{$col}} + : { $col => $c->{$col} } + ; + } + elsif ($col =~ /^\-/) { + push @{$fin->{-and}}, { $col => $c->{$col} }; + } + elsif (exists $fin->{$col}) { + $fin->{$col} = [ -and => map { + (ref $_ eq 'ARRAY' and ($_->[0]||'') =~ /^\-and$/i ) + ? @{$_}[1..$#$_] + : $_ + ; + } ($fin->{$col}, $c->{$col}) ]; } else { $fin->{$col} = $c->{$col}; @@ -1045,57 +1167,144 @@ sub _collapse_cond { } } } - - if ( ref $fin->{-and} eq 'ARRAY' and @{$fin->{-and}} == 1 ) { - my $piece = (delete $fin->{-and})->[0]; - if (ref $piece eq 'ARRAY') { - $fin->{-or} = $fin->{-or} ? [ $piece, $fin->{-or} ] : $piece; - } - elsif (! exists $fin->{''}) { - $fin->{''} = $piece; - } - } - - return $fin; } elsif (ref $where eq 'ARRAY') { - my @w = @$where; + # we are always at top-level here, it is safe to dump empty *standalone* pieces + my $fin_idx; - while ( @w and ( - (ref $w[0] eq 'ARRAY' and ! @{$w[0]} ) - or - (ref $w[0] eq 'HASH' and ! keys %{$w[0]}) - )) { shift @w }; + for (my $i = 0; $i <= $#$where; $i++ ) { - return unless @w; + # Match SQLA 1.79 behavior + $self->throw_exception( + "Supplying an empty left hand side argument is not supported in array-pairs" + ) if (! defined $where->[$i] or ! length $where->[$i]); - if ( @w == 1 ) { - return ( ref $w[0] ) - ? $self->_collapse_cond($w[0]) - : { $w[0] => undef } - ; - } - elsif ( @w == 2 and ! ref $w[0]) { - if ( ( $w[0]||'' ) =~ /^\-and$/i ) { - return (ref $w[1] eq 'HASH' or ref $w[1] eq 'ARRAY') - ? $self->_collapse_cond($w[1], (ref $w[1] eq 'ARRAY') ) - : $self->throw_exception("Unsupported top-level op/arg pair: [ $w[0] => $w[1] ]") - ; + my $logic_mod = lc ( ($where->[$i] =~ /^(\-(?:and|or))$/i)[0] || '' ); + + if ($logic_mod) { + $i++; + $self->throw_exception("Unsupported top-level op/arg pair: [ $logic_mod => $where->[$i] ]") + unless ref $where->[$i] eq 'HASH' or ref $where->[$i] eq 'ARRAY'; + + my $sub_elt = $self->_collapse_cond({ $logic_mod => $where->[$i] }) + or next; + + my @keys = keys %$sub_elt; + if ( @keys == 1 and $keys[0] !~ /^\-/ ) { + $fin_idx->{ "COL_$keys[0]_" . serialize $sub_elt } = $sub_elt; + } + else { + $fin_idx->{ "SER_" . serialize $sub_elt } = $sub_elt; + } + } + elsif (! length ref $where->[$i] ) { + my $sub_elt = $self->_collapse_cond({ @{$where}[$i, $i+1] }) + or next; + + $fin_idx->{ "COL_$where->[$i]_" . serialize $sub_elt } = $sub_elt; + $i++; } else { - return $self->_collapse_cond({ @w }); + $fin_idx->{ "SER_" . serialize $where->[$i] } = $self->_collapse_cond( $where->[$i] ) || next; } } + + if (! $fin_idx) { + return; + } + elsif ( keys %$fin_idx == 1 ) { + $fin = (values %$fin_idx)[0]; + } else { - return { -or => \@w }; + my @or; + + # at this point everything is at most one level deep - unroll if needed + for (sort keys %$fin_idx) { + if ( ref $fin_idx->{$_} eq 'HASH' and keys %{$fin_idx->{$_}} == 1 ) { + my ($l, $r) = %{$fin_idx->{$_}}; + + if ( + ref $r eq 'ARRAY' + and + ( + ( @$r == 1 and $l =~ /^\-and$/i ) + or + $l =~ /^\-or$/i + ) + ) { + push @or, @$r + } + + elsif ( + ref $r eq 'HASH' + and + keys %$r == 1 + and + $l =~ /^\-(?:and|or)$/i + ) { + push @or, %$r; + } + + else { + push @or, $l, $r; + } + } + else { + push @or, $fin_idx->{$_}; + } + } + + $fin->{-or} = \@or; } } else { # not a hash not an array - return { '' => $where }; + $fin = { -and => [ $where ] }; } - die 'should not get here'; + # unroll single-element -and's + while ( + $fin->{-and} + and + @{$fin->{-and}} < 2 + ) { + my $and = delete $fin->{-and}; + last if @$and == 0; + + # at this point we have @$and == 1 + if ( + ref $and->[0] eq 'HASH' + and + ! grep { exists $fin->{$_} } keys %{$and->[0]} + ) { + $fin = { + %$fin, %{$and->[0]} + }; + } + else { + $fin->{-and} = $and; + last; + } + } + + # compress same-column conds found in $fin + for my $col ( grep { $_ !~ /^\-/ } keys %$fin ) { + next unless ref $fin->{$col} eq 'ARRAY' and ($fin->{$col}[0]||'') =~ /^\-and$/i; + my $val_bag = { map { + (! defined $_ ) ? ( UNDEF => undef ) + : ( ! length ref $_ or is_plain_value $_ ) ? ( "VAL_$_" => $_ ) + : ( ( 'SER_' . serialize $_ ) => $_ ) + } @{$fin->{$col}}[1 .. $#{$fin->{$col}}] }; + + if (keys %$val_bag == 1 ) { + ($fin->{$col}) = values %$val_bag; + } + else { + $fin->{$col} = [ -and => map { $val_bag->{$_} } sort keys %$val_bag ]; + } + } + + return keys %$fin ? $fin : (); } sub _collapse_cond_unroll_pairs { @@ -1104,9 +1313,9 @@ sub _collapse_cond_unroll_pairs { my @conds; while (@$pairs) { - my ($lhs, $rhs) = @{ shift @$pairs }; + my ($lhs, $rhs) = splice @$pairs, 0, 2; - if ($lhs eq '') { + if (! length $lhs) { push @conds, $self->_collapse_cond($rhs); } elsif ( $lhs =~ /^\-and$/i ) { @@ -1121,6 +1330,7 @@ sub _collapse_cond_unroll_pairs { if (ref $rhs eq 'HASH' and ! keys %$rhs) { # FIXME - SQLA seems to be doing... nothing...? } + # normalize top level -ident, for saner extract_fixed_condition_columns code elsif (ref $rhs eq 'HASH' and keys %$rhs == 1 and exists $rhs->{-ident}) { push @conds, { $lhs => { '=', $rhs } }; } @@ -1128,11 +1338,11 @@ sub _collapse_cond_unroll_pairs { push @conds, { $lhs => $rhs->{-value} }; } elsif (ref $rhs eq 'HASH' and keys %$rhs == 1 and exists $rhs->{'='}) { - if( is_literal_value $rhs->{'='}) { + if ( length ref $rhs->{'='} and is_literal_value $rhs->{'='} ) { push @conds, { $lhs => $rhs }; } else { - for my $p ($self->_collapse_cond_unroll_pairs([ [ $lhs => $rhs->{'='} ] ])) { + for my $p ($self->_collapse_cond_unroll_pairs([ $lhs => $rhs->{'='} ])) { # extra sanity check if (keys %$p > 1) { @@ -1146,7 +1356,14 @@ sub _collapse_cond_unroll_pairs { my ($l, $r) = %$p; - push @conds, ( ! length ref $r or is_plain_value($r) ) + push @conds, ( + ! length ref $r + or + # the unroller recursion may return a '=' prepended value already + ref $r eq 'HASH' and keys %$rhs == 1 and exists $rhs->{'='} + or + is_plain_value($r) + ) ? { $l => $r } : { $l => { '=' => $r } } ; @@ -1164,23 +1381,35 @@ sub _collapse_cond_unroll_pairs { if @$rhs == 1; if( $rhs->[0] =~ /^\-and$/i ) { - unshift @$pairs, map { [ $lhs => $_ ] } @{$rhs}[1..$#$rhs]; + unshift @$pairs, map { $lhs => $_ } @{$rhs}[1..$#$rhs]; } # if not an AND then it's an OR elsif(@$rhs == 2) { - unshift @$pairs, [ $lhs => $rhs->[1] ]; + unshift @$pairs, $lhs => $rhs->[1]; } else { - push @conds, { $lhs => $rhs }; + push @conds, { $lhs => [ @{$rhs}[1..$#$rhs] ] }; } } elsif (@$rhs == 1) { - unshift @$pairs, [ $lhs => $rhs->[0] ]; + unshift @$pairs, $lhs => $rhs->[0]; } else { push @conds, { $lhs => $rhs }; } } + # unroll func + { -value => ... } + elsif ( + ref $rhs eq 'HASH' + and + ( my ($subop) = keys %$rhs ) == 1 + and + length ref ((values %$rhs)[0]) + and + my $vref = is_plain_value( (values %$rhs)[0] ) + ) { + push @conds, { $lhs => { $subop => $$vref } } + } else { push @conds, { $lhs => $rhs }; } @@ -1206,7 +1435,6 @@ sub _collapse_cond_unroll_pairs { # is instead used to infer inambiguous values from conditions # (e.g. the inheritance of resultset conditions on new_result) # -my $undef_marker = \ do{ my $x = 'undef' }; sub _extract_fixed_condition_columns { my ($self, $where, $consider_nulls) = @_; my $where_hash = $self->_collapse_cond($_[1]); @@ -1217,7 +1445,7 @@ sub _extract_fixed_condition_columns { my $vals; if (!defined ($v = $where_hash->{$c}) ) { - $vals->{$undef_marker} = $v if $consider_nulls + $vals->{UNDEF} = $v if $consider_nulls } elsif ( ref $v eq 'HASH' @@ -1226,15 +1454,23 @@ sub _extract_fixed_condition_columns { ) { if (exists $v->{-value}) { if (defined $v->{-value}) { - $vals->{$v->{-value}} = $v->{-value} + $vals->{"VAL_$v->{-value}"} = $v->{-value} } elsif( $consider_nulls ) { - $vals->{$undef_marker} = $v->{-value}; + $vals->{UNDEF} = $v->{-value}; } } # do not need to check for plain values - _collapse_cond did it for us - elsif(ref $v->{'='} and is_literal_value($v->{'='}) ) { - $vals->{$v->{'='}} = $v->{'='}; + elsif( + length ref $v->{'='} + and + ( + ( ref $v->{'='} eq 'HASH' and keys %{$v->{'='}} == 1 and exists $v->{'='}{-ident} ) + or + is_literal_value($v->{'='}) + ) + ) { + $vals->{ 'SER_' . serialize $v->{'='} } = $v->{'='}; } } elsif ( @@ -1242,19 +1478,23 @@ sub _extract_fixed_condition_columns { or is_plain_value ($v) ) { - $vals->{$v} = $v; + $vals->{"VAL_$v"} = $v; } elsif (ref $v eq 'ARRAY' and ($v->[0]||'') eq '-and') { for ( @{$v}[1..$#$v] ) { my $subval = $self->_extract_fixed_condition_columns({ $c => $_ }, 'consider nulls'); # always fish nulls out on recursion next unless exists $subval->{$c}; # didn't find anything - $vals->{defined $subval->{$c} ? $subval->{$c} : $undef_marker} = $subval->{$c}; + $vals->{ + ! defined $subval->{$c} ? 'UNDEF' + : ( ! length ref $subval->{$c} or is_plain_value $subval->{$c} ) ? "VAL_$subval->{$c}" + : ( 'SER_' . serialize $subval->{$c} ) + } = $subval->{$c}; } } if (keys %$vals == 1) { ($res->{$c}) = (values %$vals) - unless !$consider_nulls and exists $vals->{$undef_marker}; + unless !$consider_nulls and exists $vals->{UNDEF}; } elsif (keys %$vals > 1) { $res->{$c} = UNRESOLVABLE_CONDITION;