From: Peter Rabbitson <ribasushi@cpan.org>
Date: Wed, 22 Jan 2014 09:50:23 +0000 (+0100)
Subject: Fix long standing issue with resultset growth on repeated execution (GHPR#29)
X-Git-Tag: v0.08260~29
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?p=dbsrgits%2FDBIx-Class.git;a=commitdiff_plain;h=6ae62c5c162c519053b7354065b8f6c33e990b6e

Fix long standing issue with resultset growth on repeated execution (GHPR#29)

The "save the SQLA argstack" dirty solution introduced in 975b573a actually
resulted in an infinitely growing nested data structure, containing the
history of *every* reinvocation of the resultset. Scale back the problematic
part, though this is still an interim workaround. A DQ-based stack should
make this much cleaner.
---

diff --git a/251_TODO b/251_TODO
index 9bdd0e2..d2db9c6 100644
--- a/251_TODO
+++ b/251_TODO
@@ -2,9 +2,6 @@ List of things riba needs to clear out before next official in order
 of importance:
 (Keep Getty happy)
 
-- Figure out what fundamental leak (or group of leaks) is present in the
-  DBICTest::LeakTrace implementation itself
-- Figure out why none of them show up as failures (ton of false negatives)
 - Clarify/warn on the distinct over multiple columns get_column()
 - Incompatibly move around pieces of BlockRunner (critical - people are
   starting to rely on it)
diff --git a/Changes b/Changes
index 1c55f07..a3db389 100644
--- a/Changes
+++ b/Changes
@@ -16,6 +16,8 @@ Revision history for DBIx::Class
           with correlated subquery selections
         - Fix multiple edge cases stemming from interaction of a non-selecting
           order_by specification and distinct and/or complex prefetch
+        - Fix unbound growth of a resultset during repeated execute/exhaust
+          cycles (GHPR#29)
         - Clarify ambiguous behavior of distinct when used with ResultSetColumn
           i.e. $rs->search({}, { distinct => 1 })->get_column (...)
         - Setting quote_names propagates to SQL::Translator when producing
diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm
index 222e175..ffade21 100644
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -247,7 +247,7 @@ sub new {
     if $source->isa('DBIx::Class::ResultSourceHandle');
 
   $attrs = { %{$attrs||{}} };
-  delete @{$attrs}{qw(_sqlmaker_select_args _related_results_construction)};
+  delete @{$attrs}{qw(_last_sqlmaker_alias_map _related_results_construction)};
 
   if ($attrs->{page}) {
     $attrs->{rows} ||= 10;
@@ -1351,7 +1351,7 @@ sub _construct_results {
   return undef unless @{$rows||[]};
 
   # sanity check - people are too clever for their own good
-  if ($attrs->{collapse} and my $aliastypes = $attrs->{_sqlmaker_select_args}[3]{_aliastypes} ) {
+  if ($attrs->{collapse} and my $aliastypes = $attrs->{_last_sqlmaker_alias_map} ) {
 
     my $multiplied_selectors;
     for my $sel_alias ( grep { $_ ne $attrs->{alias} } keys %{ $aliastypes->{selecting} } ) {
@@ -2662,8 +2662,6 @@ sub as_query {
     $attrs->{from}, $attrs->{select}, $attrs->{where}, $attrs
   );
 
-  $self->{_attrs}{_sqlmaker_select_args} = $attrs->{_sqlmaker_select_args};
-
   $aq;
 }
 
diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm
index 9e340f0..bf239e6 100644
--- a/lib/DBIx/Class/Storage/DBI.pm
+++ b/lib/DBIx/Class/Storage/DBI.pm
@@ -2402,8 +2402,8 @@ sub _select_args {
   # soooooo much better now. But that is also another
   # battle...
   #return (
-  #  'select', @{$orig_attrs->{_sqlmaker_select_args}}
-  #) if $orig_attrs->{_sqlmaker_select_args};
+  #  'select', $orig_attrs->{!args_as_stored_at_the_end_of_this_method!}
+  #) if $orig_attrs->{!args_as_stored_at_the_end_of_this_method!};
 
   my $sql_maker = $self->sql_maker;
   my $alias2source = $self->_resolve_ident_sources ($ident);
@@ -2499,6 +2499,16 @@ sub _select_args {
     ($attrs->{from}, $attrs->{_aliastypes}) = $self->_prune_unused_joins ($attrs);
   }
 
+  # FIXME this is a gross, inefficient, largely incorrect and fragile hack
+  # during the result inflation stage we *need* to know what was the aliastype
+  # map as sqla saw it when the final pieces of SQL were being assembled
+  # Originally we simply carried around the entirety of $attrs, but this
+  # resulted in resultsets that are being reused growing continuously, as
+  # the hash in question grew deeper and deeper.
+  # Instead hand-pick what to take with us here (we actually don't need much
+  # at this point just the map itself)
+  $orig_attrs->{_last_sqlmaker_alias_map} = $attrs->{_aliastypes};
+
 ###
   # This would be the point to deflate anything found in $attrs->{where}
   # (and leave $attrs->{bind} intact). Problem is - inflators historically
@@ -2509,9 +2519,7 @@ sub _select_args {
   # invoked, and that's just bad...
 ###
 
-  return ( 'select', @{ $orig_attrs->{_sqlmaker_select_args} = [
-    @{$attrs}{qw(from select where)}, $attrs, @limit_args
-  ]} );
+  return ( 'select', @{$attrs}{qw(from select where)}, $attrs, @limit_args );
 }
 
 # Returns a counting SELECT for a simple count
diff --git a/t/52leaks.t b/t/52leaks.t
index af8caa2..f3fcab5 100644
--- a/t/52leaks.t
+++ b/t/52leaks.t
@@ -48,8 +48,9 @@ if ($ENV{DBICTEST_IN_PERSISTENT_ENV}) {
 use lib qw(t/lib);
 use DBICTest::RunMode;
 use DBICTest::Util::LeakTracer qw(populate_weakregistry assert_empty_weakregistry visit_refs hrefaddr);
-use Scalar::Util qw(weaken blessed);
+use Scalar::Util qw(weaken blessed reftype);
 use DBIx::Class;
+use DBIx::Class::_Util 'sigwarn_silencer';
 BEGIN {
   plan skip_all => "Your perl version $] appears to leak like a sieve - skipping test"
     if DBIx::Class::_ENV_::PEEPEENESS;
@@ -214,9 +215,6 @@ unless (DBICTest::RunMode->is_plain) {
   my $getcol_rs = $cds_rs->get_column('me.cdid');
   my $pref_getcol_rs = $cds_with_stuff->get_column('me.cdid');
 
-  # fire the column getters
-  my @throwaway = $pref_getcol_rs->all;
-
   my $base_collection = {
     resultset => $rs,
 
@@ -256,16 +254,34 @@ unless (DBICTest::RunMode->is_plain) {
     leaky_resultset_cond => $cond_rowobj,
   };
 
-  # fire all resultsets multiple times
-  # even if some of them can't find anything
-  # (notably leaky_resultset)
-  my @rsets = grep
-    { blessed $_ and $_->isa('DBIx::Class::ResultSet') }
-    values %$base_collection
-  ;
+  # fire all resultsets multiple times, once here, more below
+  # some of these can't find anything (notably leaky_resultset)
+  my @rsets = grep {
+    blessed $_
+      and
+    (
+      $_->isa('DBIx::Class::ResultSet')
+        or
+      $_->isa('DBIx::Class::ResultSetColumn')
+    )
+  } values %$base_collection;
+
+
+  my $fire_resultsets = sub {
+    local $ENV{DBIC_COLUMNS_INCLUDE_FILTER_RELS} = 1;
+    local $SIG{__WARN__} = sigwarn_silencer(
+      qr/Unable to deflate 'filter'-type relationship 'artist'.+related object primary key not retrieved/
+    );
+
+    map
+      { $_, (blessed($_) ? { $_->get_columns } : ()) }
+      map
+        { $_->all }
+        @rsets
+    ;
+  };
 
-  push @{$base_collection->{random_results}}, map { $_->all } @rsets
-    for (1,2);
+  push @{$base_collection->{random_results}}, $fire_resultsets->();
 
   # FIXME - something throws a Storable for a spin if we keep
   # the results in-collection. The same problem is seen above,
@@ -304,6 +320,68 @@ unless (DBICTest::RunMode->is_plain) {
         1;  # true means "keep descending"
       },
     );
+
+    # do a heavy-duty fire-and-compare loop on all resultsets
+    # this is expensive - not running on install
+    my $typecounts = {};
+    unless (DBICTest::RunMode->is_plain) {
+
+      # FIXME - ideally we should be able to just populate an alternative
+      # registry, subtract everything from the main one, and arrive at
+      # an "empty" resulting hash
+      # However due to gross inefficiencies in the ::ResultSet code we
+      # end up recalculating a new set of aliasmaps which could have very
+      # well been cached if it wasn't for... anyhow
+      # What we do here for the time being is similar to the lazy approach
+      # of Devel::LeakTrace - we just make sure we do not end up with more
+      # reftypes than when we started. At least we are not blanket-counting
+      # SVs like D::LT does, but going by reftype... sigh...
+
+      for (values %$weak_registry) {
+        if ( my $r = reftype($_->{weakref}) ) {
+          $typecounts->{$r}--;
+        }
+      }
+
+      # For now we can only reuse the same registry, see FIXME above/below
+      #for my $interim_wr ({}, {}) {
+      for my $interim_wr ( ($weak_registry) x 4 ) {
+
+        visit_refs(
+          refs => [ $fire_resultsets->(), @rsets ],
+          action => sub {
+            populate_weakregistry ($interim_wr, $_[0]);
+            1;  # true means "keep descending"
+          },
+        );
+
+        # FIXME - this is what *should* be here
+        #
+        ## anything we have seen so far is cool
+        #delete @{$interim_wr}{keys %$weak_registry};
+        #
+        ## I still don't get any of this...
+        #delete $interim_wr->{$_} for grep {
+        #  ref ($interim_wr->{$_}{weakref}) eq 'SCALAR'
+        #    and
+        #  ${$interim_wr->{$_}{weakref}} eq 'very closure... much wtf... wow!!!'
+        #} keys %$interim_wr;
+        #
+        ## moment of truth - the rest ought to be gone
+        #assert_empty_weakregistry($interim_wr);
+      }
+
+      for (values %$weak_registry) {
+        if ( my $r = reftype($_->{weakref}) ) {
+          $typecounts->{$r}++;
+        }
+      }
+    }
+
+    for (keys %$typecounts) {
+      fail ("Amount of $_ refs changed by $typecounts->{$_} during resultset mass-execution")
+        if ( abs ($typecounts->{$_}) > 1 ); # there is a pad caught somewhere, the +1/-1 can be ignored
+    }
   }
 
   if ($has_dt) {
diff --git a/t/lib/DBICTest/Util/LeakTracer.pm b/t/lib/DBICTest/Util/LeakTracer.pm
index ef178f9..08b9fa6 100644
--- a/t/lib/DBICTest/Util/LeakTracer.pm
+++ b/t/lib/DBICTest/Util/LeakTracer.pm
@@ -117,11 +117,20 @@ sub visit_refs {
 
     next unless length ref $r;
 
+    # no diving into weakregistries
+    next if $reg_of_regs{hrefaddr $r};
+
     next if $args->{seen_refs}{my $dec_addr = Scalar::Util::refaddr($r)}++;
 
     $visited_cnt++;
     $args->{action}->($r) or next;
 
+    # This may end up being necessarry some day, but do not slow things
+    # down for now
+    #if ( defined( my $t = tied($r) ) ) {
+    #  $visited_cnt += visit_refs({ %$args, refs => [ $t ] });
+    #}
+
     my $type = reftype $r;
     if ($type eq 'HASH') {
       $visited_cnt += visit_refs({ %$args, refs => [ map {
@@ -257,6 +266,19 @@ sub assert_empty_weakregistry {
     }
 
     $tb->diag($diag);
+
+#    if ($leaks_found == 1) {
+#      # using the fh dumper due to intermittent buffering issues
+#      # in case we decide to exit soon after (possibly via _exit)
+#      require Devel::MAT::Dumper;
+#      local $Devel::MAT::Dumper::MAX_STRING = -1;
+#      open( my $fh, '>:raw', "leaked_${addr}_pid$$.pmat" ) or die $!;
+#      Devel::MAT::Dumper::dumpfh( $fh );
+#      close ($fh) or die $!;
+#
+#      use POSIX;
+#      POSIX::_exit(1);
+#    }
   }
 
   if (! $quiet and !$leaks_found and ! $tb->in_todo) {
diff --git a/t/search/preserve_original_rs.t b/t/search/preserve_original_rs.t
index a87fe9a..cb9a306 100644
--- a/t/search/preserve_original_rs.t
+++ b/t/search/preserve_original_rs.t
@@ -97,7 +97,13 @@ for my $s (qw/a2a artw cd artw_back/) {
   is ($fresh->count_rs({ cdid => 1})->next, 1 );
 
   ok (! exists $fresh->{cursor}, 'Still no cursor on fresh rs');
-  ok (! exists $fresh->{_attrs}{_sqlmaker_select_args}, 'select args did not leak through' );
+  ok (! exists $fresh->{_attrs}{_last_sqlmaker_alias_map}, 'aliasmap did not leak through' );
+
+  my $n = $fresh->next;
+
+  # check that we are not testing for deprecated slotnames
+  ok ($fresh->{cursor}, 'Cursor at expected slot after fire');
+  ok (exists $fresh->{_attrs}{_last_sqlmaker_alias_map}, 'aliasmap at expected slot after fire' );
 }
 
 done_testing;