Major overhaul of select/as resolution handling (fixes RT#61235)

[dbsrgits/DBIx-Class.git] / lib / DBIx / Class / ResultSet.pm
diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm

index 3c879a3..5609572 100644 (file)
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -10,10 +10,17 @@ use Storable;
 use DBIx::Class::ResultSetColumn;
 use DBIx::Class::ResultSourceHandle;
 use List::Util ();
+use Hash::Merge ();
 use Scalar::Util qw/blessed weaken/;
 use Try::Tiny;
+use Storable qw/nfreeze thaw/;
 use namespace::clean;
 
+BEGIN {
+  # De-duplication in _merge_attr() is disabled, but left in for reference
+  *__HM_DEDUP = sub () { 0 };
+}
+
 use overload
         '0+'     => "count",
         'bool'   => "_bool",
@@ -244,12 +251,32 @@ documentation for the first argument, see L<SQL::Abstract>.
 
 For more help on using joins with search, see L<DBIx::Class::Manual::Joining>.
 
+=head3 CAVEAT
+
+Note that L</search> does not process/deflate any of the values passed in the
+L<SQL::Abstract>-compatible search condition structure. This is unlike other
+condition-bound methods L</new>, L</create> and L</find>. The user must ensure
+manually that any value passed to this method will stringify to something the
+RDBMS knows how to deal with. A notable example is the handling of L<DateTime>
+objects, for more info see:
+L<DBIx::Class::Manual::Cookbook/Formatting_DateTime_objects_in_queries>.
+
 =cut
 
 sub search {
   my $self = shift;
   my $rs = $self->search_rs( @_ );
-  return (wantarray ? $rs->all : $rs);
+
+  my $want = wantarray;
+  if ($want) {
+    return $rs->all;
+  }
+  elsif (defined $want) {
+    return $rs;
+  }
+  else {
+    $self->throw_exception ('->search is *not* a mutator, calling it in void context makes no sense');
+  }
 }
 
 =head2 search_rs
@@ -276,7 +303,9 @@ sub search_rs {
   }
 
   my $call_attrs = {};
-  $call_attrs = pop(@_) if @_ > 1 and ref $_[-1] eq 'HASH';
+  $call_attrs = pop(@_) if (
+   @_ > 1 and ( ! defined $_[-1] or ref $_[-1] eq 'HASH' )
+  );
 
   # see if we can keep the cache (no $rs changes)
   my $cache;
@@ -303,7 +332,11 @@ sub search_rs {
   my $new_attrs = { %{$old_attrs}, %{$call_attrs} };
 
   # merge new attrs into inherited
-  foreach my $key (qw/join prefetch +select +as +columns include_columns bind/) {
+  foreach my $key (qw/join prefetch/) {
+    next unless exists $call_attrs->{$key};
+    $new_attrs->{$key} = $self->_merge_joinpref_attr($old_attrs->{$key}, $call_attrs->{$key});
+  }
+  foreach my $key (qw/+select +as +columns include_columns bind/) {
     next unless exists $call_attrs->{$key};
     $new_attrs->{$key} = $self->_merge_attr($old_attrs->{$key}, $call_attrs->{$key});
   }
@@ -326,6 +359,9 @@ sub search_rs {
 
   } if @_;
 
+  carp 'search( %condition ) is deprecated, use search( \%condition ) instead'
+    if (@_ > 1 and ! $self->result_source->result_class->isa('DBIx::Class::CDBICompat') );
+
   for ($old_where, $call_cond) {
     if (defined $_) {
       $new_attrs->{where} = $self->_stack_cond (
@@ -408,25 +444,56 @@ sub search_literal {
 
 =over 4
 
-=item Arguments: @values | \%cols, \%attrs?
+=item Arguments: \%columns_values | @pk_values, \%attrs?
 
 =item Return Value: $row_object | undef
 
 =back
 
-Finds a row based on its primary key or unique constraint. For example, to find
-a row by its primary key:
+Finds and returns a single row based on supplied criteria. Takes either a
+hashref with the same format as L</create> (including inference of foreign
+keys from related objects), or a list of primary key values in the same
+order as the L<primary columns|DBIx::Class::ResultSource/primary_columns>
+declaration on the L</result_source>.
+
+In either case an attempt is made to combine conditions already existing on
+the resultset with the condition passed to this method.
+
+To aid with preparing the correct query for the storage you may supply the
+C<key> attribute, which is the name of a
+L<unique constraint|DBIx::Class::ResultSource/add_unique_constraint> (the
+unique constraint corresponding to the
+L<primary columns|DBIx::Class::ResultSource/primary_columns> is always named
+C<primary>). If the C<key> attribute has been supplied, and DBIC is unable
+to construct a query that satisfies the named unique constraint fully (
+non-NULL values for each column member of the constraint) an exception is
+thrown.
+
+If no C<key> is specified, the search is carried over all unique constraints
+which are fully defined by the available condition.
+
+If no such constraint is found, C<find> currently defaults to a simple
+C<< search->(\%column_values) >> which may or may not do what you expect.
+Note that this fallback behavior may be deprecated in further versions. If
+you need to search with arbitrary conditions - use L</search>. If the query
+resulting from this fallback produces more than one row, a warning to the
+effect is issued, though only the first row is constructed and returned as
+C<$row_object>.
 
-  my $cd = $schema->resultset('CD')->find(5);
+In addition to C<key>, L</find> recognizes and applies standard
+L<resultset attributes|/ATTRIBUTES> in the same way as L</search> does.
 
-You can also find a row by a specific unique constraint using the C<key>
-attribute. For example:
+Note that if you have extra concerns about the correctness of the resulting
+query you need to specify the C<key> attribute and supply the entire condition
+as an argument to find (since it is not always possible to perform the
+combination of the resultset condition with the supplied one, especially if
+the resultset condition contains literal sql).
 
-  my $cd = $schema->resultset('CD')->find('Massive Attack', 'Mezzanine', {
-    key => 'cd_artist_title'
-  });
+For example, to find a row by its primary key:
+
+  my $cd = $schema->resultset('CD')->find(5);
 
-Additionally, you can specify the columns explicitly by name:
+You can also find a row by a specific unique constraint:
 
   my $cd = $schema->resultset('CD')->find(
     {
@@ -436,24 +503,7 @@ Additionally, you can specify the columns explicitly by name:
     { key => 'cd_artist_title' }
   );
 
-If the C<key> is specified as C<primary>, it searches only on the primary key.
-
-If no C<key> is specified, it searches on all unique constraints defined on the
-source for which column data is provided, including the primary key.
-
-If your table does not have a primary key, you B<must> provide a value for the
-C<key> attribute matching one of the unique constraints on the source.
-
-In addition to C<key>, L</find> recognizes and applies standard
-L<resultset attributes|/ATTRIBUTES> in the same way as L</search> does.
-
-Note: If your query does not return only one row, a warning is generated:
-
-  Query returned more than one row
-
-See also L</find_or_create> and L</update_or_create>. For information on how to
-declare unique constraints, see
-L<DBIx::Class::ResultSource/add_unique_constraint>.
+See also L</find_or_create> and L</update_or_create>.
 
 =cut
 
@@ -545,7 +595,7 @@ sub find {
 
     $final_cond = @unique_queries
       ? [ map { $self->_qualify_cond_columns($_, $alias) } @unique_queries ]
-      : $self->_qualify_cond_columns($call_cond, $alias)
+      : $self->_non_unique_find_fallback ($call_cond, $attrs)
     ;
   }
 
@@ -561,6 +611,30 @@ sub find {
   }
 }
 
+# This is a stop-gap method as agreed during the discussion on find() cleanup:
+# http://lists.scsys.co.uk/pipermail/dbix-class/2010-October/009535.html
+#
+# It is invoked when find() is called in legacy-mode with insufficiently-unique
+# condition. It is provided for overrides until a saner way forward is devised
+#
+# *NOTE* This is not a public method, and it's *GUARANTEED* to disappear down
+# the road. Please adjust your tests accordingly to catch this situation early
+# DBIx::Class::ResultSet->can('_non_unique_find_fallback') is reasonable
+#
+# The method will not be removed without an adequately complete replacement
+# for strict-mode enforcement
+sub _non_unique_find_fallback {
+  my ($self, $cond, $attrs) = @_;
+
+  return $self->_qualify_cond_columns(
+    $cond,
+    exists $attrs->{alias}
+      ? $attrs->{alias}
+      : $self->{attrs}{alias}
+  );
+}
+
+
 sub _qualify_cond_columns {
   my ($self, $cond, $alias) = @_;
 
@@ -589,7 +663,7 @@ sub _build_unique_cond {
   $final_cond = { map { $_ => $final_cond->{$_} } @c_cols };
 
   if (my @missing = grep { ! defined $final_cond->{$_} } (@c_cols) ) {
-    $self->throw_exception( sprintf ( "Unable to satisfy constraint '%s', no values for column(s): %s",
+    $self->throw_exception( sprintf ( "Unable to satisfy requested constraint '%s', no values for column(s): %s",
       $constraint_name,
       join (', ', map { "'$_'" } @missing),
     ) );
@@ -1457,6 +1531,15 @@ The return value is a pass through of what the underlying
 storage backend returned, and may vary. See L<DBI/execute> for the most
 common case.
 
+=head3 CAVEAT
+
+Note that L</update> does not process/deflate any of the values passed in.
+This is unlike the corresponding L<DBIx::Class::Row/update>. The user must
+ensure manually that any value passed to this method will stringify to
+something the RDBMS knows how to deal with. A notable example is the
+handling of L<DateTime> objects, for more info see:
+L<DBIx::Class::Manual::Cookbook/Formatting_DateTime_objects_in_queries>.
+
 =cut
 
 sub update {
@@ -1824,7 +1907,8 @@ my $mk_lazy_count_wizard = sub {
 
 # the tie class for 5.8.1
 {
-  package DBIx::Class::__DBIC_LAZY_RS_COUNT__;
+  package # hide from pause
+    DBIx::Class::__DBIC_LAZY_RS_COUNT__;
   use base qw/Tie::Hash/;
 
   sub FIRSTKEY { my $dummy = scalar keys %{$_[0]{data}}; each %{$_[0]{data}} }
@@ -2212,17 +2296,18 @@ sub as_query {
   $cd->cd_to_producer->find_or_new({ producer => $producer },
                                    { key => 'primary });
 
-Find an existing record from this resultset, based on its primary
-key, or a unique constraint. If none exists, instantiate a new result
-object and return it. The object will not be saved into your storage
-until you call L<DBIx::Class::Row/insert> on it.
+Find an existing record from this resultset using L</find>. if none exists,
+instantiate a new result object and return it. The object will not be saved
+into your storage until you call L<DBIx::Class::Row/insert> on it.
+
+You most likely want this method when looking for existing rows using a unique
+constraint that is not the primary key, or looking for related rows.
 
-You most likely want this method when looking for existing rows using
-a unique constraint that is not the primary key, or looking for
-related rows.
+If you want objects to be saved immediately, use L</find_or_create> instead.
 
-If you want objects to be saved immediately, use L</find_or_create>
-instead.
+B<Note>: Make sure to read the documentation of L</find> and understand the
+significance of the C<key> attribute, as its lack may skew your search, and
+subsequently result in spurious new objects.
 
 B<Note>: Take care when using C<find_or_new> with a table having
 columns with default values that you intend to be automatically
@@ -2364,6 +2449,10 @@ constraint. For example:
     { key => 'cd_artist_title' }
   );
 
+B<Note>: Make sure to read the documentation of L</find> and understand the
+significance of the C<key> attribute, as its lack may skew your search, and
+subsequently result in spurious row creation.
+
 B<Note>: Because find_or_create() reads from the database and then
 possibly inserts based on the result, this method is subject to a race
 condition. Another process could create a record in the table after
@@ -2397,16 +2486,15 @@ sub find_or_create {
 
 =item Arguments: \%col_values, { key => $unique_constraint }?
 
-=item Return Value: $rowobject
+=item Return Value: $row_object
 
 =back
 
   $resultset->update_or_create({ col => $val, ... });
 
-First, searches for an existing row matching one of the unique constraints
-(including the primary key) on the source of this resultset. If a row is
-found, updates it with the other given column values. Otherwise, creates a new
-row.
+Like L</find_or_create>, but if a row is found it is immediately updated via
+C<< $found_row->update (\%col_values) >>.
+
 
 Takes an optional C<key> attribute to search on a specific unique constraint.
 For example:
@@ -2428,14 +2516,9 @@ For example:
     key => 'primary',
   });
 
-
-If no C<key> is specified, it searches on all unique constraints defined on the
-source, including the primary key.
-
-If the C<key> is specified as C<primary>, it searches only on the primary key.
-
-See also L</find> and L</find_or_create>. For information on how to declare
-unique constraints, see L<DBIx::Class::ResultSource/add_unique_constraint>.
+B<Note>: Make sure to read the documentation of L</find> and understand the
+significance of the C<key> attribute, as its lack may skew your search, and
+subsequently result in spurious row creation.
 
 B<Note>: Take care when using C<update_or_create> with a table having
 columns with default values that you intend to be automatically
@@ -2443,6 +2526,9 @@ supplied by the database (e.g. an auto_increment primary key column).
 In normal usage, the value of such columns should NOT be included at
 all in the call to C<update_or_create>, even when set to C<undef>.
 
+See also L</find> and L</find_or_create>. For information on how to declare
+unique constraints, see L<DBIx::Class::ResultSource/add_unique_constraint>.
+
 =cut
 
 sub update_or_create {
@@ -2471,13 +2557,9 @@ sub update_or_create {
 
   $resultset->update_or_new({ col => $val, ... });
 
-First, searches for an existing row matching one of the unique constraints
-(including the primary key) on the source of this resultset. If a row is
-found, updates it with the other given column values. Otherwise, instantiate
-a new result object and return it. The object will not be saved into your storage
-until you call L<DBIx::Class::Row/insert> on it.
+Like L</find_or_new> but if a row is found it is immediately updated via
+C<< $found_row->update (\%col_values) >>.
 
-Takes an optional C<key> attribute to search on a specific unique constraint.
 For example:
 
   # In your application
@@ -2498,13 +2580,17 @@ For example:
       $cd->insert;
   }
 
+B<Note>: Make sure to read the documentation of L</find> and understand the
+significance of the C<key> attribute, as its lack may skew your search, and
+subsequently result in spurious new objects.
+
 B<Note>: Take care when using C<update_or_new> with a table having
 columns with default values that you intend to be automatically
 supplied by the database (e.g. an auto_increment primary key column).
 In normal usage, the value of such columns should NOT be included at
 all in the call to C<update_or_new>, even when set to C<undef>.
 
-See also L</find>, L</find_or_create> and L</find_or_new>.
+See also L</find>, L</find_or_create> and L</find_or_new>. 
 
 =cut
 
@@ -2837,7 +2923,7 @@ sub _chain_relationship {
 
   # we need to take the prefetch the attrs into account before we
   # ->_resolve_join as otherwise they get lost - captainL
-  my $join = $self->_merge_attr( $attrs->{join}, $attrs->{prefetch} );
+  my $join = $self->_merge_joinpref_attr( $attrs->{join}, $attrs->{prefetch} );
 
   delete @{$attrs}{qw/join prefetch collapse group_by distinct select as columns +select +as +columns/};
 
@@ -2855,7 +2941,7 @@ sub _chain_relationship {
     # are resolved (prefetch is useless - we are wrapping
     # a subquery anyway).
     my $rs_copy = $self->search;
-    $rs_copy->{attrs}{join} = $self->_merge_attr (
+    $rs_copy->{attrs}{join} = $self->_merge_joinpref_attr (
       $rs_copy->{attrs}{join},
       delete $rs_copy->{attrs}{prefetch},
     );
@@ -2938,97 +3024,171 @@ sub _resolved_attrs {
   my $source = $self->result_source;
   my $alias  = $attrs->{alias};
 
-  $attrs->{columns} ||= delete $attrs->{cols} if exists $attrs->{cols};
-  my @colbits;
+########
+# resolve selectors, this one is quite hairy
 
-  # build columns (as long as select isn't set) into a set of as/select hashes
-  unless ( $attrs->{select} ) {
+  my $selection_pieces;
 
-    my @cols;
-    if ( ref $attrs->{columns} eq 'ARRAY' ) {
-      @cols = @{ delete $attrs->{columns}}
-    } elsif ( defined $attrs->{columns} ) {
-      @cols = delete $attrs->{columns}
-    } else {
-      @cols = $source->columns
-    }
+  $attrs->{columns} ||= delete $attrs->{cols}
+    if exists $attrs->{cols};
 
-    for (@cols) {
-      if ( ref $_ eq 'HASH' ) {
-        push @colbits, $_
-      } else {
-        my $key = /^\Q${alias}.\E(.+)$/
-          ? "$1"
-          : "$_";
-        my $value = /\./
-          ? "$_"
-          : "${alias}.$_";
-        push @colbits, { $key => $value };
-      }
-    }
-  }
+  # disassemble columns / +columns
+  (
+    $selection_pieces->{columns}{select},
+    $selection_pieces->{columns}{as},
+    $selection_pieces->{'+columns'}{select},
+    $selection_pieces->{'+columns'}{as},
+  ) = map
+    {
+      my (@sel, @as);
 
-  # add the additional columns on
-  foreach (qw{include_columns +columns}) {
-    if ( $attrs->{$_} ) {
-      my @list = ( ref($attrs->{$_}) eq 'ARRAY' )
-        ? @{ delete $attrs->{$_} }
-        : delete $attrs->{$_};
-      for (@list) {
-        if ( ref($_) eq 'HASH' ) {
-          push @colbits, $_
-        } else {
-          my $key = ( split /\./, $_ )[-1];
-          my $value = ( /\./ ? $_ : "$alias.$_" );
-          push @colbits, { $key => $value };
+      for my $colbit (@$_) {
+
+        if (ref $colbit eq 'HASH') {
+          for my $as (keys %$colbit) {
+            push @sel, $colbit->{$as};
+            push @as, $as;
+          }
+        }
+        elsif ($colbit) {
+          push @sel, $colbit;
+          push @as, $colbit;
         }
       }
+
+      (\@sel, \@as)
+    }
+    (
+      (ref $attrs->{columns} eq 'ARRAY' ? delete $attrs->{columns} : [ delete $attrs->{columns} ]),
+      # include_columns is a legacy add-on to +columns
+      [ map { ref $_ eq 'ARRAY' ? @$_ : ($_ || () ) } delete @{$attrs}{qw/+columns include_columns/} ] )
+  ;
+
+  # make copies of select/as and +select/+as
+  (
+    $selection_pieces->{'select/as'}{select},
+    $selection_pieces->{'select/as'}{as},
+    $selection_pieces->{'+select/+as'}{select},
+    $selection_pieces->{'+select/+as'}{as},
+  ) = map
+    { $_ ? [ ref $_ eq 'ARRAY' ? @$_ : $_ ] : [] }
+    ( delete @{$attrs}{qw/select as +select +as/} )
+  ;
+
+  # default to * only when neither no non-plus selectors are available
+  if (
+    ! @{$selection_pieces->{'select/as'}{select}}
+      and
+    ! @{$selection_pieces->{'columns'}{select}}
+  ) {
+    for ($source->columns) {
+      push @{$selection_pieces->{'select/as'}{select}}, $_;
+      push @{$selection_pieces->{'select/as'}{as}}, $_;
     }
   }
 
-  # start with initial select items
-  if ( $attrs->{select} ) {
-    $attrs->{select} =
-        ( ref $attrs->{select} eq 'ARRAY' )
-      ? [ @{ $attrs->{select} } ]
-      : [ $attrs->{select} ];
+  # final composition order (important)
+  my @sel_pairs = grep {
+    $selection_pieces->{$_}
+      &&
+    (
+      ( $selection_pieces->{$_}{select} && @{$selection_pieces->{$_}{select}} )
+        ||
+      ( $selection_pieces->{$_}{as} && @{$selection_pieces->{$_}{as}} )
+    )
+  } qw|columns select/as +columns +select/+as|;
+
+  # fill in missing as bits for each pair
+  # if it's the last pair we can let things slide ( bare +select is sadly popular)
+  my $out_of_sync;
 
-    if ( $attrs->{as} ) {
-      $attrs->{as} =
-        (
-          ref $attrs->{as} eq 'ARRAY'
-            ? [ @{ $attrs->{as} } ]
-            : [ $attrs->{as} ]
-        )
-    } else {
-      $attrs->{as} = [ map {
-         m/^\Q${alias}.\E(.+)$/
-           ? $1
-           : $_
-         } @{ $attrs->{select} }
-      ]
+  for my $i (0 .. $#sel_pairs) {
+
+    my $pairname = $sel_pairs[$i];
+
+    my ($sel, $as) = @{$selection_pieces->{$pairname}}{qw/select as/};
+
+    $self->throw_exception(
+      "Unable to assemble final selection list: $pairname specified in addition to unbalanced $sel_pairs[$i-1]"
+    ) if ($out_of_sync);
+
+    if (@$sel == @$as) {
+      next;
+    }
+    elsif (@$sel < @$as) {
+      $self->throw_exception(
+        "More 'as' elements than 'select' elements for $pairname, unable to continue"
+      );
+    }
+    else {
+      # try to deduce the 'as' part, will work only if all the selectors are "plain", or contain an explicit -as
+      # if we can not deduce something - stop right there and leave the rest of the selector un-as'ed
+      # if there is an extra selection pair coming after that - it will die due to out_of_sync being set
+      for my $j ($#$as+1 .. $#$sel) {
+        if (my $ref = ref $sel->[$j]) {
+          if ($ref eq 'HASH' and exists $sel->[$j]{-as}) {
+            push @$as, $sel->[$j]{-as};
+          }
+          else {
+            $out_of_sync++;
+            last;
+          }
+        }
+        else {
+          push @$as, $sel->[$j];
+        }
+      }
     }
   }
-  else {
 
-    # otherwise we intialise select & as to empty
-    $attrs->{select} = [];
-    $attrs->{as}     = [];
+  # assume all unqualified selectors to apply to the current alias (legacy stuff)
+  # disqualify all $alias.col as-bits (collapser mandated)
+  for (values %$selection_pieces) {
+    $_->{select} = [ map { (ref $_ or $_ =~ /\./) ? $_ : "$alias.$_" } @{$_->{select}} ];
+    $_->{as} = [  map { $_ =~ /^\Q$alias.\E(.+)$/ ? $1 : $_ } @{$_->{as}} ];
+  }
+
+  # FIXME !!!
+  # Blatant bugwardness encoded into multiple tests.
+  # While columns behaves sensibly, +columns is expected
+  # to dump *any* foreign columns into the main object
+  # /me vomits
+  $selection_pieces->{'+columns'}{as} = [ map
+    { (split /\./, $_)[-1] }
+    @{$selection_pieces->{'+columns'}{as}}
+  ];
+
+  # merge everything
+  for (@sel_pairs) {
+    $attrs->{select} = $self->_merge_attr ($attrs->{select}, $selection_pieces->{$_}{select});
+    $attrs->{as} = $self->_merge_attr ($attrs->{as}, $selection_pieces->{$_}{as});
+  }
+
+  # de-duplicate the result (remove *identical* select/as pairs)
+  # and also die on duplicate {as} pointing to different {select}s
+  # not using a c-style for as the condition is prone to shrinkage
+  my $seen;
+  my $i = 0;
+  while ($i <= $#{$attrs->{as}} ) {
+    my ($sel, $as) = map { $attrs->{$_}[$i] } (qw/select as/);
+
+    if ($seen->{"$sel \x00\x00 $as"}++) {
+      splice @$_, $i, 1
+        for @{$attrs}{qw/select as/};
+    }
+    elsif ($seen->{$as}++) {
+      $self->throw_exception(
+        "inflate_result() alias '$as' specified twice with different SQL-side {select}-ors"
+      );
+    }
+    else {
+      $i++;
+    }
   }
 
-  # now add colbits to select/as
-  push @{ $attrs->{select} }, map values %{$_}, @colbits;
-  push @{ $attrs->{as}     }, map keys   %{$_}, @colbits;
+## selector resolution done
+########
 
-  if ( my $adds = delete $attrs->{'+select'} ) {
-    $adds = [$adds] unless ref $adds eq 'ARRAY';
-    push @{ $attrs->{select} },
-      map { /\./ || ref $_ ? $_ : "$alias.$_" } @$adds;
-  }
-  if ( my $adds = delete $attrs->{'+as'} ) {
-    $adds = [$adds] unless ref $adds eq 'ARRAY';
-    push @{ $attrs->{as} }, @$adds;
-  }
 
   $attrs->{from} ||= [{
     -source_handle => $source->handle,
@@ -3044,7 +3204,7 @@ sub _resolved_attrs {
     my $join = delete $attrs->{join} || {};
 
     if ( defined $attrs->{prefetch} ) {
-      $join = $self->_merge_attr( $join, $attrs->{prefetch} );
+      $join = $self->_merge_joinpref_attr( $join, $attrs->{prefetch} );
     }
 
     $attrs->{from} =    # have to copy here to avoid corrupting the original
@@ -3081,40 +3241,15 @@ sub _resolved_attrs {
       carp ("Useless use of distinct on a grouped resultset ('distinct' is ignored when a 'group_by' is present)");
     }
     else {
-      my $storage = $self->result_source->schema->storage;
-      my $rs_column_list = $storage->_resolve_column_info ($attrs->{from});
-
-      my $group_spec = $attrs->{group_by} = [];
-      my %group_index;
-
-      for (@{$attrs->{select}}) {
-        if (! ref($_) or ref ($_) ne 'HASH' ) {
-          push @$group_spec, $_;
-          $group_index{$_}++;
-          if ($rs_column_list->{$_} and $_ !~ /\./ ) {
-            # add a fully qualified version as well
-            $group_index{"$rs_column_list->{$_}{-source_alias}.$_"}++;
-          }
-        }
-      }
-      # add any order_by parts that are not already present in the group_by
-      # we need to be careful not to add any named functions/aggregates
-      # i.e. select => [ ... { count => 'foo', -as 'foocount' } ... ]
-      for my $chunk ($storage->_extract_order_columns($attrs->{order_by})) {
-
-        # only consider real columns (for functions the user got to do an explicit group_by)
-        my $colinfo = $rs_column_list->{$chunk}
-          or next;
-
-        $chunk = "$colinfo->{-source_alias}.$chunk" if $chunk !~ /\./;
-        push @$group_spec, $chunk unless $group_index{$chunk}++;
-      }
+      $attrs->{group_by} = $source->storage->_group_over_selection (
+        @{$attrs}{qw/from select order_by/}
+      );
     }
   }
 
   $attrs->{collapse} ||= {};
   if ( my $prefetch = delete $attrs->{prefetch} ) {
-    $prefetch = $self->_merge_attr( {}, $prefetch );
+    $prefetch = $self->_merge_joinpref_attr( {}, $prefetch );
 
     my $prefetch_ordering = [];
 
@@ -3237,7 +3372,7 @@ sub _calculate_score {
   }
 }
 
-sub _merge_attr {
+sub _merge_joinpref_attr {
   my ($self, $orig, $import) = @_;
 
   return $import unless defined($orig);
@@ -3269,7 +3404,7 @@ sub _merge_attr {
         $orig->[$best_candidate->{position}] = $import_element;
       } elsif (ref $import_element eq 'HASH') {
         my ($key) = keys %{$orig_best};
-        $orig->[$best_candidate->{position}] = { $key => $self->_merge_attr($orig_best->{$key}, $import_element->{$key}) };
+        $orig->[$best_candidate->{position}] = { $key => $self->_merge_joinpref_attr($orig_best->{$key}, $import_element->{$key}) };
       }
     }
     $seen_keys->{$import_key} = 1; # don't merge the same key twice
@@ -3278,6 +3413,89 @@ sub _merge_attr {
   return $orig;
 }
 
+{
+  my $hm;
+
+  sub _merge_attr {
+    $hm ||= do {
+      my $hm = Hash::Merge->new;
+
+      $hm->specify_behavior({
+        SCALAR => {
+          SCALAR => sub {
+            my ($defl, $defr) = map { defined $_ } (@_[0,1]);
+
+            if ($defl xor $defr) {
+              return $defl ? $_[0] : $_[1];
+            }
+            elsif (! $defl) {
+              return ();
+            }
+            elsif (__HM_DEDUP and $_[0] eq $_[1]) {
+              return $_[0];
+            }
+            else {
+              return [$_[0], $_[1]];
+            }
+          },
+          ARRAY => sub {
+            return $_[1] if !defined $_[0];
+            return $_[1] if __HM_DEDUP and List::Util::first { $_ eq $_[0] } @{$_[1]};
+            return [$_[0], @{$_[1]}]
+          },
+          HASH  => sub {
+            return $_[1] if !defined $_[0];
+            return $_[0] if !keys %{$_[1]};
+            return [$_[0], $_[1]]
+          },
+        },
+        ARRAY => {
+          SCALAR => sub {
+            return $_[0] if !defined $_[1];
+            return $_[0] if __HM_DEDUP and List::Util::first { $_ eq $_[1] } @{$_[0]};
+            return [@{$_[0]}, $_[1]]
+          },
+          ARRAY => sub {
+            my @ret = @{$_[0]} or return $_[1];
+            return [ @ret, @{$_[1]} ] unless __HM_DEDUP;
+            my %idx = map { $_ => 1 } @ret;
+            push @ret, grep { ! defined $idx{$_} } (@{$_[1]});
+            \@ret;
+          },
+          HASH => sub {
+            return [ $_[1] ] if ! @{$_[0]};
+            return $_[0] if !keys %{$_[1]};
+            return $_[0] if __HM_DEDUP and List::Util::first { $_ eq $_[1] } @{$_[0]};
+            return [ @{$_[0]}, $_[1] ];
+          },
+        },
+        HASH => {
+          SCALAR => sub {
+            return $_[0] if !defined $_[1];
+            return $_[1] if !keys %{$_[0]};
+            return [$_[0], $_[1]]
+          },
+          ARRAY => sub {
+            return $_[0] if !@{$_[1]};
+            return $_[1] if !keys %{$_[0]};
+            return $_[1] if __HM_DEDUP and List::Util::first { $_ eq $_[0] } @{$_[1]};
+            return [ $_[0], @{$_[1]} ];
+          },
+          HASH => sub {
+            return $_[0] if !keys %{$_[1]};
+            return $_[1] if !keys %{$_[0]};
+            return $_[0] if $_[0] eq $_[1];
+            return [ $_[0], $_[1] ];
+          },
+        }
+      } => 'DBIC_RS_ATTR_MERGER');
+      $hm;
+    };
+
+    return $hm->merge ($_[1], $_[2]);
+  }
+}
+
 sub result_source {
     my $self = shift;
 
@@ -3288,6 +3506,27 @@ sub result_source {
     }
 }
 
+
+sub STORABLE_freeze {
+  my ($self, $cloning) = @_;
+  my $to_serialize = { %$self };
+
+  # A cursor in progress can't be serialized (and would make little sense anyway)
+  delete $to_serialize->{cursor};
+
+  return nfreeze($to_serialize);
+}
+
+# need this hook for symmetry
+sub STORABLE_thaw {
+  my ($self, $cloning, $serialized) = @_;
+
+  %$self = %{ thaw($serialized) };
+
+  return $self;
+}
+
+
 =head2 throw_exception
 
 See L<DBIx::Class::Schema/throw_exception> for details.