Merge branch 'topic/constructor_rewrite'

[dbsrgits/DBIx-Class.git] / lib / DBIx / Class / ResultSet.pm
diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm

index a2f95f1..d4c271a 100644 (file)
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -1058,8 +1058,9 @@ sub single {
     $attrs->{from}, $attrs->{select},
     $attrs->{where}, $attrs
   )];
-
-  return @$data ? $self->_construct_objects($data)->[0] : undef;
+  return undef unless @$data;
+  $self->{stashed_rows} = [ $data ];
+  $self->_construct_objects->[0];
 }
 
 
@@ -1236,90 +1237,129 @@ sub next {
   return shift @{$self->{stashed_objects}};
 }
 
-# takes a single DBI-row of data and coinstructs as many objects
-# as the resultset attributes call for.
-# This can be a bit of an action at a distance - it takes as an argument
-# the *current* cursor-row (already taken off the $sth), but if
-# collapsing is requested it will keep advancing the cursor either
-# until the current row-object is assembled (the collapser was able to
-# order the result sensibly) OR until the cursor is exhausted (an
-# unordered collapsing resultset effectively triggers ->all)
+# Constructs as many objects as it can in one pass while respecting
+# cursor laziness. Several modes of operation:
+#
+# * Always builds everything present in @{$self->{stashed_rows}}
+# * If called with $fetch_all true - pulls everything off the cursor and
+#   builds all objects in one pass
+# * If $self->_resolved_attrs->{collapse} is true, checks the order_by
+#   and if the resultset is ordered properly by the left side:
+#   * Fetches stuff off the cursor until the "master object" changes,
+#     and saves the last extra row (if any) in @{$self->{stashed_rows}}
+#   OR
+#   * Just fetches, and collapses/constructs everything as if $fetch_all
+#     was requested (there is no other way to collapse except for an
+#     eager cursor)
+# * If no collapse is requested - just get the next row, construct and
+#   return
 sub _construct_objects {
-  my ($self, $fetched_row, $fetch_all) = @_;
+  my ($self, $fetch_all) = @_;
 
+  my $rsrc = $self->result_source;
   my $attrs = $self->_resolved_attrs;
-  my $unordered = 0;  # will deal with this later
+  my $cursor = $self->cursor;
 
   # this will be used as both initial raw-row collector AND as a RV of
-  # _construct_objects. Not regrowing the   # array twice matters a lot...
+  # _construct_objects. Not regrowing the array twice matters a lot...
   # a suprising amount actually
-  my $rows;
-
-  # $fetch_all implies all() which means all stashes have been cleared
-  # and the cursor reset
+  my $rows = (delete $self->{stashed_rows}) || [];
   if ($fetch_all) {
-    # FIXME - we can do better, cursor->all (well a diff. method) should return a ref
-    $rows = [ $self->cursor->all ];
+    # FIXME SUBOPTIMAL - we can do better, cursor->next/all (well diff. methods) should return a ref
+    $rows = [ @$rows, $cursor->all ];
   }
-  elsif ($unordered) {
-    $rows = [
-      $fetched_row||(),
-      @{ delete $self->{stashed_rows} || []},
-      $self->cursor->all,
-    ];
+  elsif (!$attrs->{collapse}) {
+    # FIXME SUBOPTIMAL - we can do better, cursor->next/all (well diff. methods) should return a ref
+    push @$rows, do { my @r = $cursor->next; @r ? \@r : () }
+      unless @$rows;
   }
-  else {  # simple single object
-    $rows = [ $fetched_row || ( @{$self->{stashed_rows}||[]} ? shift @{$self->{stashed_rows}} : [$self->cursor->next] ) ];
+  else {
+    $attrs->{_ordered_for_collapse} ||= (!$attrs->{order_by}) ? undef : do {
+      my $st = $rsrc->schema->storage;
+      my @ord_cols = map
+        { $_->[0] }
+        ( $st->_extract_order_criteria($attrs->{order_by}) )
+      ;
+
+      my $colinfos = $st->_resolve_column_info($attrs->{from}, \@ord_cols);
+
+      for (0 .. $#ord_cols) {
+        if (
+          ! $colinfos->{$ord_cols[$_]}
+            or
+          $colinfos->{$ord_cols[$_]}{-result_source} != $rsrc
+        ) {
+          splice @ord_cols, $_;
+          last;
+        }
+      }
+
+      # since all we check here are the start of the order_by belonging to the
+      # top level $rsrc, a present identifying set will mean that the resultset
+      # is ordered by its leftmost table in a tsable manner
+      (@ord_cols and $rsrc->_identifying_column_set({ map
+        { $colinfos->{$_}{-colname} => $colinfos->{$_} }
+        @ord_cols
+      })) ? 1 : 0;
+    };
+
+    if ($attrs->{_ordered_for_collapse}) {
+      push @$rows, do { my @r = $cursor->next; @r ? \@r : () };
+    }
+    # instead of looping over ->next, use ->all in stealth mode
+    # FIXME - encapsulation breach, got to be a better way
+    elsif (! $cursor->{done}) {
+      push @$rows, $cursor->all;
+      $cursor->{done} = 1;
+      $fetch_all = 1;
+    }
   }
 
-  return undef unless @{$rows->[0]||[]};
+  return undef unless @$rows;
 
-  my $rsrc = $self->result_source;
   my $res_class = $self->result_class;
   my $inflator = $res_class->can ('inflate_result')
     or $self->throw_exception("Inflator $res_class does not provide an inflate_result() method");
 
-  # construct a much simpler array->hash folder for the one-table cases right here
-  if ($attrs->{_single_object_inflation} and ! $attrs->{collapse}) {
-    # FIXME this is a very very very hot spot
+  my $infmap = $attrs->{as};
+
+  if (!$attrs->{collapse} and $attrs->{_single_object_inflation}) {
+    # construct a much simpler array->hash folder for the one-table cases right here
+
+    # FIXME SUBOPTIMAL this is a very very very hot spot
     # while rather optimal we can *still* do much better, by
     # building a smarter [Row|HRI]::inflate_result(), and
-    # switch to feeding it data via some leaner interface
+    # switch to feeding it data via a much leaner interface
     #
-    my $infmap = $attrs->{as};
-    my @as_idx = 0..$#$infmap;
-    for my $r (@$rows) {
-      $r = [{ map { $infmap->[$_] => $r->[$_] } @as_idx }]
+    # crude unscientific benchmarking indicated the shortcut eval is not worth it for
+    # this particular resultset size
+    if (@$rows < 60) {
+      my @as_idx = 0..$#$infmap;
+      for my $r (@$rows) {
+        $r = $inflator->($res_class, $rsrc, { map { $infmap->[$_] => $r->[$_] } @as_idx } );
+      }
+    }
+    else {
+      eval sprintf (
+        '$_ = $inflator->($res_class, $rsrc, { %s }) for @$rows',
+        join (', ', map { "\$infmap->[$_] => \$_->[$_]" } 0..$#$infmap )
+      );
     }
-
-    # FIXME - this seems to be faster than the hashmapper aove, especially
-    # on more rows, but need a better bench-environment to confirm
-    #eval sprintf (
-    #  '$_ = [{ %s }] for @$rows',
-    #  join (', ', map { "\$infmap->[$_] => \$_->[$_]" } 0..$#$infmap )
-    #);
   }
   else {
-    push @$rows, @{$self->{stashed_rows}||[]};
-
-    $rsrc->_mk_row_parser({
-      inflate_map => $attrs->{as},
+    ($self->{_row_parser} ||= eval sprintf 'sub { %s }', $rsrc->_mk_row_parser({
+      inflate_map => $infmap,
       selection => $attrs->{select},
       collapse => $attrs->{collapse},
-      unordered => $unordered,
-    })->(
-      $rows,  # modify in-place, shrinking/extending as necessary
-      ($attrs->{collapse} and ! $fetch_all and ! $unordered)
-        ? (
-            sub { my @r = $self->cursor->next or return undef; \@r },
-            ($self->{stashed_rows} = []), # this is where we empty things and prepare for leftovers
-          )
-        : ()
-      ,
-    );
-  }
+    }) or die $@)->($rows, $fetch_all ? () : (
+      # FIXME SUBOPTIMAL - we can do better, cursor->next/all (well diff. methods) should return a ref
+      sub { my @r = $cursor->next or return; \@r }, # how the collapser gets more rows
+      ($self->{stashed_rows} = []),                 # where does it stuff excess
+    ));  # modify $rows in-place, shrinking/extending as necessary
+
+    $_ = $inflator->($res_class, $rsrc, @$_) for @$rows;
 
-  $_ = $res_class->$inflator($rsrc, @$_) for @$rows;
+  }
 
   # CDBI compat stuff
   if ($attrs->{record_filter}) {
@@ -1403,8 +1443,7 @@ sub count {
 
   # this is a little optimization - it is faster to do the limit
   # adjustments in software, instead of a subquery
-  my $rows = delete $attrs->{rows};
-  my $offset = delete $attrs->{offset};
+  my ($rows, $offset) = delete @{$attrs}{qw/rows offset/};
 
   my $crs;
   if ($self->_has_resolved_attr (qw/collapse group_by/)) {
@@ -1475,7 +1514,6 @@ sub _count_rs {
   # overwrite the selector (supplied by the storage)
   $tmp_attrs->{select} = $rsrc->storage->_count_select ($rsrc, $attrs);
   $tmp_attrs->{as} = 'count';
-  delete @{$tmp_attrs}{qw/columns/};
 
   my $tmp_rs = $rsrc->resultset_class->new($rsrc, $tmp_attrs)->get_column ('count');
 
@@ -1617,8 +1655,7 @@ sub all {
     $self->throw_exception("all() doesn't take any arguments, you probably wanted ->search(...)->all()");
   }
 
-  delete $self->{stashed_rows};
-  delete $self->{stashed_objects};
+  delete @{$self}{qw/stashed_rows stashed_objects/};
 
   if (my $c = $self->get_cache) {
     return @$c;
@@ -1626,7 +1663,7 @@ sub all {
 
   $self->cursor->reset;
 
-  my $objs = $self->_construct_objects(undef, 'fetch_all') || [];
+  my $objs = $self->_construct_objects('fetch_all') || [];
 
   $self->set_cache($objs) if $self->{attrs}{cache};
 
@@ -1651,9 +1688,8 @@ another query.
 
 sub reset {
   my ($self) = @_;
-  delete $self->{_attrs};
-  delete $self->{stashed_rows};
-  delete $self->{stashed_objects};
+
+  delete @{$self}{qw/_attrs stashed_rows stashed_objects/};
 
   $self->{all_cache_position} = 0;
   $self->cursor->reset;
@@ -1757,7 +1793,7 @@ sub _rs_update_delete {
   my $existing_group_by = delete $attrs->{group_by};
 
   # make a new $rs selecting only the PKs (that's all we really need for the subq)
-  delete $attrs->{$_} for qw/collapse select _prefetch_selector_range as/;
+  delete @{$attrs}{qw/collapse select _prefetch_selector_range as/};
   $attrs->{columns} = [ map { "$attrs->{alias}.$_" } @$idcols ];
   $attrs->{group_by} = \ '';  # FIXME - this is an evil hack, it causes the optimiser to kick in and throw away the LEFT joins
   my $subrs = (ref $self)->new($rsrc, $attrs);
@@ -2204,7 +2240,7 @@ sub pager {
   # throw away the paging flags and re-run the count (possibly
   # with a subselect) to get the real total count
   my $count_attrs = { %$attrs };
-  delete $count_attrs->{$_} for qw/rows offset page pager/;
+  delete @{$count_attrs}{qw/rows offset page pager/};
 
   my $total_rs = (ref $self)->new($self->result_source, $count_attrs);
 
@@ -3423,6 +3459,8 @@ sub _resolved_attrs {
     push @{ $attrs->{as} }, (map { $_->[1] } @prefetch);
   }
 
+  $attrs->{_single_object_inflation} = ! List::Util::first { $_ =~ /\./ } @{$attrs->{as}};
+
   # run through the resulting joinstructure (starting from our current slot)
   # and unset collapse if proven unnesessary
   if ($attrs->{collapse} && ref $attrs->{from} eq 'ARRAY') {
@@ -3448,7 +3486,11 @@ sub _resolved_attrs {
     }
   }
 
-  $attrs->{_single_object_inflation} = ! List::Util::first { $_ =~ /\./ } @{$attrs->{as}};
+  if (! $attrs->{order_by} and $attrs->{collapse}) {
+    # default order for collapsing unless the user asked for something
+    $attrs->{order_by} = [ map { "$alias.$_" } $source->primary_columns ];
+    $attrs->{_ordered_for_collapse} = 1;
+  }
 
   # if both page and offset are specified, produce a combined offset
   # even though it doesn't make much sense, this is what pre 081xx has
@@ -3670,7 +3712,8 @@ sub STORABLE_freeze {
   my $to_serialize = { %$self };
 
   # A cursor in progress can't be serialized (and would make little sense anyway)
-  delete $to_serialize->{cursor};
+  # the parser can be regenerated (and can't be serialized)
+  delete @{$to_serialize}{qw/cursor _row_parser/};
 
   # nor is it sensical to store a not-yet-fired-count pager
   if ($to_serialize->{pager} and ref $to_serialize->{pager}{total_entries} eq 'CODE') {