Do not artificially order the internals of a has_many prefetch subquery

[dbsrgits/DBIx-Class.git] / lib / DBIx / Class / ResultSet.pm
diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm

index a2f95f1..2f76830 100644 (file)
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -1058,8 +1058,9 @@ sub single {
     $attrs->{from}, $attrs->{select},
     $attrs->{where}, $attrs
   )];
-
-  return @$data ? $self->_construct_objects($data)->[0] : undef;
+  return undef unless @$data;
+  $self->{stashed_rows} = [ $data ];
+  $self->_construct_objects->[0];
 }
 
 
@@ -1236,90 +1237,129 @@ sub next {
   return shift @{$self->{stashed_objects}};
 }
 
-# takes a single DBI-row of data and coinstructs as many objects
-# as the resultset attributes call for.
-# This can be a bit of an action at a distance - it takes as an argument
-# the *current* cursor-row (already taken off the $sth), but if
-# collapsing is requested it will keep advancing the cursor either
-# until the current row-object is assembled (the collapser was able to
-# order the result sensibly) OR until the cursor is exhausted (an
-# unordered collapsing resultset effectively triggers ->all)
+# Constructs as many objects as it can in one pass while respecting
+# cursor laziness. Several modes of operation:
+#
+# * Always builds everything present in @{$self->{stashed_rows}}
+# * If called with $fetch_all true - pulls everything off the cursor and
+#   builds all objects in one pass
+# * If $self->_resolved_attrs->{collapse} is true, checks the order_by
+#   and if the resultset is ordered properly by the left side:
+#   * Fetches stuff off the cursor until the "master object" changes,
+#     and saves the last extra row (if any) in @{$self->{stashed_rows}}
+#   OR
+#   * Just fetches, and collapses/constructs everything as if $fetch_all
+#     was requested (there is no other way to collapse except for an
+#     eager cursor)
+# * If no collapse is requested - just get the next row, construct and
+#   return
 sub _construct_objects {
-  my ($self, $fetched_row, $fetch_all) = @_;
+  my ($self, $fetch_all) = @_;
 
+  my $rsrc = $self->result_source;
   my $attrs = $self->_resolved_attrs;
-  my $unordered = 0;  # will deal with this later
+  my $cursor = $self->cursor;
 
   # this will be used as both initial raw-row collector AND as a RV of
-  # _construct_objects. Not regrowing the   # array twice matters a lot...
+  # _construct_objects. Not regrowing the array twice matters a lot...
   # a suprising amount actually
-  my $rows;
-
-  # $fetch_all implies all() which means all stashes have been cleared
-  # and the cursor reset
+  my $rows = (delete $self->{stashed_rows}) || [];
   if ($fetch_all) {
-    # FIXME - we can do better, cursor->all (well a diff. method) should return a ref
-    $rows = [ $self->cursor->all ];
+    # FIXME SUBOPTIMAL - we can do better, cursor->next/all (well diff. methods) should return a ref
+    $rows = [ @$rows, $cursor->all ];
   }
-  elsif ($unordered) {
-    $rows = [
-      $fetched_row||(),
-      @{ delete $self->{stashed_rows} || []},
-      $self->cursor->all,
-    ];
+  elsif (!$attrs->{collapse}) {
+    # FIXME SUBOPTIMAL - we can do better, cursor->next/all (well diff. methods) should return a ref
+    push @$rows, do { my @r = $cursor->next; @r ? \@r : () }
+      unless @$rows;
   }
-  else {  # simple single object
-    $rows = [ $fetched_row || ( @{$self->{stashed_rows}||[]} ? shift @{$self->{stashed_rows}} : [$self->cursor->next] ) ];
+  else {
+    $attrs->{_ordered_for_collapse} ||= (!$attrs->{order_by}) ? undef : do {
+      my $st = $rsrc->schema->storage;
+      my @ord_cols = map
+        { $_->[0] }
+        ( $st->_extract_order_criteria($attrs->{order_by}) )
+      ;
+
+      my $colinfos = $st->_resolve_column_info($attrs->{from}, \@ord_cols);
+
+      for (0 .. $#ord_cols) {
+        if (
+          ! $colinfos->{$ord_cols[$_]}
+            or
+          $colinfos->{$ord_cols[$_]}{-result_source} != $rsrc
+        ) {
+          splice @ord_cols, $_;
+          last;
+        }
+      }
+
+      # since all we check here are the start of the order_by belonging to the
+      # top level $rsrc, a present identifying set will mean that the resultset
+      # is ordered by its leftmost table in a tsable manner
+      (@ord_cols and $rsrc->_identifying_column_set({ map
+        { $colinfos->{$_}{-colname} => $colinfos->{$_} }
+        @ord_cols
+      })) ? 1 : 0;
+    };
+
+    if ($attrs->{_ordered_for_collapse}) {
+      push @$rows, do { my @r = $cursor->next; @r ? \@r : () };
+    }
+    # instead of looping over ->next, use ->all in stealth mode
+    # FIXME - encapsulation breach, got to be a better way
+    elsif (! $cursor->{done}) {
+      push @$rows, $cursor->all;
+      $cursor->{done} = 1;
+      $fetch_all = 1;
+    }
   }
 
-  return undef unless @{$rows->[0]||[]};
+  return undef unless @$rows;
 
-  my $rsrc = $self->result_source;
   my $res_class = $self->result_class;
   my $inflator = $res_class->can ('inflate_result')
     or $self->throw_exception("Inflator $res_class does not provide an inflate_result() method");
 
-  # construct a much simpler array->hash folder for the one-table cases right here
-  if ($attrs->{_single_object_inflation} and ! $attrs->{collapse}) {
-    # FIXME this is a very very very hot spot
+  my $infmap = $attrs->{as};
+
+  if (!$attrs->{collapse} and $attrs->{_single_object_inflation}) {
+    # construct a much simpler array->hash folder for the one-table cases right here
+
+    # FIXME SUBOPTIMAL this is a very very very hot spot
     # while rather optimal we can *still* do much better, by
     # building a smarter [Row|HRI]::inflate_result(), and
-    # switch to feeding it data via some leaner interface
+    # switch to feeding it data via a much leaner interface
     #
-    my $infmap = $attrs->{as};
-    my @as_idx = 0..$#$infmap;
-    for my $r (@$rows) {
-      $r = [{ map { $infmap->[$_] => $r->[$_] } @as_idx }]
+    # crude unscientific benchmarking indicated the shortcut eval is not worth it for
+    # this particular resultset size
+    if (@$rows < 60) {
+      my @as_idx = 0..$#$infmap;
+      for my $r (@$rows) {
+        $r = $inflator->($res_class, $rsrc, { map { $infmap->[$_] => $r->[$_] } @as_idx } );
+      }
+    }
+    else {
+      eval sprintf (
+        '$_ = $inflator->($res_class, $rsrc, { %s }) for @$rows',
+        join (', ', map { "\$infmap->[$_] => \$_->[$_]" } 0..$#$infmap )
+      );
     }
-
-    # FIXME - this seems to be faster than the hashmapper aove, especially
-    # on more rows, but need a better bench-environment to confirm
-    #eval sprintf (
-    #  '$_ = [{ %s }] for @$rows',
-    #  join (', ', map { "\$infmap->[$_] => \$_->[$_]" } 0..$#$infmap )
-    #);
   }
   else {
-    push @$rows, @{$self->{stashed_rows}||[]};
-
-    $rsrc->_mk_row_parser({
-      inflate_map => $attrs->{as},
+    ($self->{_row_parser} ||= eval sprintf 'sub { %s }', $rsrc->_mk_row_parser({
+      inflate_map => $infmap,
       selection => $attrs->{select},
       collapse => $attrs->{collapse},
-      unordered => $unordered,
-    })->(
-      $rows,  # modify in-place, shrinking/extending as necessary
-      ($attrs->{collapse} and ! $fetch_all and ! $unordered)
-        ? (
-            sub { my @r = $self->cursor->next or return undef; \@r },
-            ($self->{stashed_rows} = []), # this is where we empty things and prepare for leftovers
-          )
-        : ()
-      ,
-    );
-  }
+    }) or die $@)->($rows, $fetch_all ? () : (
+      # FIXME SUBOPTIMAL - we can do better, cursor->next/all (well diff. methods) should return a ref
+      sub { my @r = $cursor->next or return; \@r }, # how the collapser gets more rows
+      ($self->{stashed_rows} = []),                 # where does it stuff excess
+    ));  # modify $rows in-place, shrinking/extending as necessary
 
-  $_ = $res_class->$inflator($rsrc, @$_) for @$rows;
+    $_ = $inflator->($res_class, $rsrc, @$_) for @$rows;
+
+  }
 
   # CDBI compat stuff
   if ($attrs->{record_filter}) {
@@ -1403,8 +1443,7 @@ sub count {
 
   # this is a little optimization - it is faster to do the limit
   # adjustments in software, instead of a subquery
-  my $rows = delete $attrs->{rows};
-  my $offset = delete $attrs->{offset};
+  my ($rows, $offset) = delete @{$attrs}{qw/rows offset/};
 
   my $crs;
   if ($self->_has_resolved_attr (qw/collapse group_by/)) {
@@ -1475,7 +1514,6 @@ sub _count_rs {
   # overwrite the selector (supplied by the storage)
   $tmp_attrs->{select} = $rsrc->storage->_count_select ($rsrc, $attrs);
   $tmp_attrs->{as} = 'count';
-  delete @{$tmp_attrs}{qw/columns/};
 
   my $tmp_rs = $rsrc->resultset_class->new($rsrc, $tmp_attrs)->get_column ('count');
 
@@ -1617,8 +1655,7 @@ sub all {
     $self->throw_exception("all() doesn't take any arguments, you probably wanted ->search(...)->all()");
   }
 
-  delete $self->{stashed_rows};
-  delete $self->{stashed_objects};
+  delete @{$self}{qw/stashed_rows stashed_objects/};
 
   if (my $c = $self->get_cache) {
     return @$c;
@@ -1626,7 +1663,7 @@ sub all {
 
   $self->cursor->reset;
 
-  my $objs = $self->_construct_objects(undef, 'fetch_all') || [];
+  my $objs = $self->_construct_objects('fetch_all') || [];
 
   $self->set_cache($objs) if $self->{attrs}{cache};
 
@@ -1651,9 +1688,8 @@ another query.
 
 sub reset {
   my ($self) = @_;
-  delete $self->{_attrs};
-  delete $self->{stashed_rows};
-  delete $self->{stashed_objects};
+
+  delete @{$self}{qw/_attrs stashed_rows stashed_objects/};
 
   $self->{all_cache_position} = 0;
   $self->cursor->reset;
@@ -1703,14 +1739,19 @@ sub _rs_update_delete {
   # simplify the joinmap and maybe decide if a grouping (and thus subquery) is necessary
   my $relation_classifications;
   if (ref($attrs->{from}) eq 'ARRAY') {
-    $attrs->{from} = $storage->_prune_unused_joins ($attrs->{from}, $attrs->{select}, $cond, $attrs);
-
-    $relation_classifications = $storage->_resolve_aliastypes_from_select_args (
-      [ @{$attrs->{from}}[1 .. $#{$attrs->{from}}] ],
-      $attrs->{select},
-      $cond,
-      $attrs
-    ) unless $needs_group_by_subq;  # we already know we need a group, no point of resolving them
+    if (@{$attrs->{from}} == 1) {
+      # not a fucking JOIN at all, quit with the dickery
+      $relation_classifications = {};
+    } else {
+      $attrs->{from} = $storage->_prune_unused_joins ($attrs->{from}, $attrs->{select}, $cond, $attrs);
+
+      $relation_classifications = $storage->_resolve_aliastypes_from_select_args (
+        [ @{$attrs->{from}}[1 .. $#{$attrs->{from}}] ],
+        $attrs->{select},
+        $cond,
+        $attrs
+      ) unless $needs_group_by_subq;  # we already know we need a group, no point of resolving them
+    }
   }
   else {
     $needs_group_by_subq ||= 1; # if {from} is unparseable assume the worst
@@ -1728,21 +1769,13 @@ sub _rs_update_delete {
   ) {
     # Most databases do not allow aliasing of tables in UPDATE/DELETE. Thus
     # a condition containing 'me' or other table prefixes will not work
-    # at all. What this code tries to do (badly) is to generate a condition
-    # with the qualifiers removed, by exploiting the quote mechanism of sqla
-    #
-    # this is atrocious and should be replaced by normal sqla introspection
-    # one sunny day
-    my ($sql, @bind) = do {
-      my $sqla = $rsrc->storage->sql_maker;
-      local $sqla->{_dequalify_idents} = 1;
-      $sqla->_recurse_where($self->{cond});
-    } if $self->{cond};
-
+    # at all. Tell SQLMaker to dequalify idents via a gross hack.
+    my $sqla = $rsrc->storage->sql_maker;
+    local $sqla->{_dequalify_idents} = 1;
     return $rsrc->storage->$op(
       $rsrc,
       $op eq 'update' ? $values : (),
-      $self->{cond} ? \[$sql, @bind] : (),
+      $self->{cond},
     );
   }
 
@@ -1757,7 +1790,7 @@ sub _rs_update_delete {
   my $existing_group_by = delete $attrs->{group_by};
 
   # make a new $rs selecting only the PKs (that's all we really need for the subq)
-  delete $attrs->{$_} for qw/collapse select _prefetch_selector_range as/;
+  delete @{$attrs}{qw/collapse select _prefetch_selector_range as/};
   $attrs->{columns} = [ map { "$attrs->{alias}.$_" } @$idcols ];
   $attrs->{group_by} = \ '';  # FIXME - this is an evil hack, it causes the optimiser to kick in and throw away the LEFT joins
   my $subrs = (ref $self)->new($rsrc, $attrs);
@@ -2204,7 +2237,7 @@ sub pager {
   # throw away the paging flags and re-run the count (possibly
   # with a subselect) to get the real total count
   my $count_attrs = { %$attrs };
-  delete $count_attrs->{$_} for qw/rows offset page pager/;
+  delete @{$count_attrs}{qw/rows offset page pager/};
 
   my $total_rs = (ref $self)->new($self->result_source, $count_attrs);
 
@@ -3423,6 +3456,8 @@ sub _resolved_attrs {
     push @{ $attrs->{as} }, (map { $_->[1] } @prefetch);
   }
 
+  $attrs->{_single_object_inflation} = ! List::Util::first { $_ =~ /\./ } @{$attrs->{as}};
+
   # run through the resulting joinstructure (starting from our current slot)
   # and unset collapse if proven unnesessary
   if ($attrs->{collapse} && ref $attrs->{from} eq 'ARRAY') {
@@ -3448,7 +3483,12 @@ sub _resolved_attrs {
     }
   }
 
-  $attrs->{_single_object_inflation} = ! List::Util::first { $_ =~ /\./ } @{$attrs->{as}};
+  if (! $attrs->{order_by} and $attrs->{collapse}) {
+    # default order for collapsing unless the user asked for something
+    $attrs->{order_by} = [ map { "$alias.$_" } $source->primary_columns ];
+    $attrs->{_ordered_for_collapse} = 1;
+    $attrs->{_order_is_artificial} = 1;
+  }
 
   # if both page and offset are specified, produce a combined offset
   # even though it doesn't make much sense, this is what pre 081xx has
@@ -3670,7 +3710,8 @@ sub STORABLE_freeze {
   my $to_serialize = { %$self };
 
   # A cursor in progress can't be serialized (and would make little sense anyway)
-  delete $to_serialize->{cursor};
+  # the parser can be regenerated (and can't be serialized)
+  delete @{$to_serialize}{qw/cursor _row_parser/};
 
   # nor is it sensical to store a not-yet-fired-count pager
   if ($to_serialize->{pager} and ref $to_serialize->{pager}{total_entries} eq 'CODE') {