squash, data filter shouldnt run on arrayrefref

[dbsrgits/DBIx-Class.git] / lib / DBIx / Class / Storage / DBI.pm
diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm

index 8ff9868..f7e6222 100644 (file)
--- a/lib/DBIx/Class/Storage/DBI.pm
+++ b/lib/DBIx/Class/Storage/DBI.pm
@@ -792,7 +792,7 @@ sub dbh_do {
 
   # short circuit when we know there is no need for a runner
   #
-  # FIXME - asumption may be wrong
+  # FIXME - assumption may be wrong
   # the rationale for the txn_depth check is that if this block is a part
   # of a larger transaction, everything up to that point is screwed anyway
   return $self->$run_target($self->_get_dbh, @_)
@@ -1386,6 +1386,29 @@ sub _connect {
 
   local $DBI::connect_via = 'connect' if $INC{'Apache/DBI.pm'} && $ENV{MOD_PERL};
 
+  # this odd anonymous coderef dereference is in fact really
+  # necessary to avoid the unwanted effect described in perl5
+  # RT#75792
+  #
+  # in addition the coderef itself can't reside inside the try{} block below
+  # as it somehow triggers a leak under perl -d
+  my $dbh_error_handler_installer = sub {
+    weaken (my $weak_self = $_[0]);
+
+    # the coderef is blessed so we can distinguish it from externally
+    # supplied handles (which must be preserved)
+    $_[1]->{HandleError} = bless sub {
+      if ($weak_self) {
+        $weak_self->throw_exception("DBI Exception: $_[0]");
+      }
+      else {
+        # the handler may be invoked by something totally out of
+        # the scope of DBIC
+        DBIx::Class::Exception->throw("DBI Exception (unhandled by DBIC, ::Schema GCed): $_[0]");
+      }
+    }, '__DBIC__DBH__ERROR__HANDLER__';
+  };
+
   try {
     if(ref $info[0] eq 'CODE') {
       $dbh = $info[0]->();
@@ -1429,26 +1452,7 @@ sub _connect {
         $dbh->{RaiseError} = 1;
       }
 
-      # this odd anonymous coderef dereference is in fact really
-      # necessary to avoid the unwanted effect described in perl5
-      # RT#75792
-      sub {
-        my $weak_self = $_[0];
-        weaken $weak_self;
-
-        # the coderef is blessed so we can distinguish it from externally
-        # supplied handles (which must be preserved)
-        $_[1]->{HandleError} = bless sub {
-          if ($weak_self) {
-            $weak_self->throw_exception("DBI Exception: $_[0]");
-          }
-          else {
-            # the handler may be invoked by something totally out of
-            # the scope of DBIC
-            DBIx::Class::Exception->throw("DBI Exception (unhandled by DBIC, ::Schema GCed): $_[0]");
-          }
-        }, '__DBIC__DBH__ERROR__HANDLER__';
-      }->($self, $dbh);
+      $dbh_error_handler_installer->($self, $dbh);
     }
   }
   catch {
@@ -1960,19 +1964,76 @@ sub insert {
 sub insert_bulk {
   my ($self, $source, $cols, $data) = @_;
 
-  my @col_range = (0..$#$cols);
-
-  # FIXME SUBOPTIMAL - most likely this is not necessary at all
-  # confirm with dbi-dev whether explicit stringification is needed
+  # data can either be:
+  # 1. an array of arrays of data like [[col,col],[],...]
+  # 2. a coderef tuple generator to be passed to _execute_for_fetch
+  # 3. an arrayrefref subquery
+  # 4. an array containing any combination of the above
   #
-  # forcibly stringify whatever is stringifiable
-  # ResultSet::populate() hands us a copy - safe to mangle
-  for my $r (0 .. $#$data) {
-    for my $c (0 .. $#{$data->[$r]}) {
-      $data->[$r][$c] = "$data->[$r][$c]"
-        if ( length ref $data->[$r][$c] and overload::Method($data->[$r][$c], '""') );
+  # in the case of 4, we can flatten 1. and 2. together into a single
+  # tuple/_prep_for exectute call
+  # 3. requires a fresh _prep_for_execute call
+  if (ref $data eq 'ARRAY' &&
+        ((ref $data->[0] eq 'ARRAY' && ref $data->[0][0] eq 'ARRAY') ||
+          ref $data->[0] eq 'CODE' ||
+          ref $data->[0] eq 'REF')) {
+    # group colsets and coderefs, as we can combine them with a new tuple
+    my @chunked;
+
+    for my $datum (@$data) {
+      if ((ref $datum eq 'ARRAY' && ref $datum->[0] eq 'ARRAY') ||
+          ref $datum eq 'CODE') {
+        $chunked[-1] ||= [];
+        push @{$chunked[-1]}, $datum;
+      }
+      elsif (ref $datum eq 'REF') {
+        push @chunked, $datum;
+      }
+      else {
+        $self->throw_exception('Expecting ARRAYREF or ARRAYREF-ref or CODE or not '.ref $datum);
+      }
+    }
+
+    for my $chunk (@chunked) {
+      if (ref $chunk eq 'REF') {
+        $self->_insert_bulk($source, $cols, $chunk);
+      }
+      else {
+        my $current = shift @$chunk;
+
+        my $tuple;
+        $tuple = sub {
+          my $row = do {
+            if (ref $current eq 'ARRAY') {
+              shift @$current;
+            }
+            elsif (ref $current eq 'CODE') {
+              $current->();
+            }
+          };
+
+          if ($row) {
+            return $row;
+          }
+          elsif (!defined $row && @$chunk) {
+            $current = shift @$chunk;
+            return $tuple->();
+          }
+        };
+
+        $self->_insert_bulk($source, $cols, $tuple);
+      }
     }
   }
+  else {
+    $self->_insert_bulk($source, $cols, $data);
+  }
+}
+
+sub _insert_bulk {
+  my ($self, $source, $cols, $data) = @_;
+
+  my @col_range = (0..$#$cols);
 
   my $colinfos = $source->columns_info($cols);
 
@@ -1982,71 +2043,86 @@ sub insert_bulk {
       : 0
   ;
 
-  # get a slice type index based on first row of data
-  # a "column" in this context may refer to more than one bind value
-  # e.g. \[ '?, ?', [...], [...] ]
-  #
-  # construct the value type index - a description of values types for every
-  # per-column slice of $data:
-  #
-  # nonexistent - nonbind literal
-  # 0 - regular value
-  # [] of bindattrs - resolved attribute(s) of bind(s) passed via literal+bind \[] combo
-  #
-  # also construct the column hash to pass to the SQL generator. For plain
-  # (non literal) values - convert the members of the first row into a
-  # literal+bind combo, with extra positional info in the bind attr hashref.
-  # This will allow us to match the order properly, and is so contrived
-  # because a user-supplied literal/bind (or something else specific to a
-  # resultsource and/or storage driver) can inject extra binds along the
-  # way, so one can't rely on "shift positions" ordering at all. Also we
-  # can't just hand SQLA a set of some known "values" (e.g. hashrefs that
-  # can be later matched up by address), because we want to supply a real
-  # value on which perhaps e.g. datatype checks will be performed
   my ($proto_data, $value_type_by_col_idx);
-  for my $i (@col_range) {
-    my $colname = $cols->[$i];
-    if (ref $data->[0][$i] eq 'SCALAR') {
-      # no bind value at all - no type
-
-      $proto_data->{$colname} = $data->[0][$i];
+  my $reference_row = do {
+    if (ref $data eq 'CODE') {
+      $data->();
+    }
+    elsif (ref $data eq 'ARRAY') {
+      shift @$data;
     }
-    elsif (ref $data->[0][$i] eq 'REF' and ref ${$data->[0][$i]} eq 'ARRAY' ) {
-      # repack, so we don't end up mangling the original \[]
-      my ($sql, @bind) = @${$data->[0][$i]};
+  };
 
-      # normalization of user supplied stuff
-      my $resolved_bind = $self->_resolve_bindattrs(
-        $source, \@bind, $colinfos,
-      );
+  if ($reference_row) {
+    # get a slice type index based on first row of data
+    # a "column" in this context may refer to more than one bind value
+    # e.g. \[ '?, ?', [...], [...] ]
+    #
+    # construct the value type index - a description of values types for every
+    # per-column slice of $data:
+    #
+    # nonexistent - nonbind literal
+    # 0 - regular value
+    # [] of bindattrs - resolved attribute(s) of bind(s) passed via literal+bind \[] combo
+    #
+    # also construct the column hash to pass to the SQL generator. For plain
+    # (non literal) values - convert the members of the first row into a
+    # literal+bind combo, with extra positional info in the bind attr hashref.
+    # This will allow us to match the order properly, and is so contrived
+    # because a user-supplied literal/bind (or something else specific to a
+    # resultsource and/or storage driver) can inject extra binds along the
+    # way, so one can't rely on "shift positions" ordering at all. Also we
+    # can't just hand SQLA a set of some known "values" (e.g. hashrefs that
+    # can be later matched up by address), because we want to supply a real
+    # value on which perhaps e.g. datatype checks will be performed
+    for my $i (@col_range) {
+
+      my $colname = $cols->[$i];
+      if (ref $reference_row eq 'REF' && ref $$reference_row eq 'ARRAY') {
+        $proto_data = $reference_row;
+        last;
+      }
+      elsif (ref $reference_row eq 'ARRAY' && ref $reference_row->[$i] eq 'SCALAR') {
+        # no bind value at all - no type
+        $proto_data->{$colname} = $reference_row->[$i];
+      }
+      elsif (ref $reference_row->[$i] eq 'REF' and ref ${$reference_row->[$i]} eq 'ARRAY' ) {
+        # repack, so we don't end up mangling the original \[]
+        my ($sql, @bind) = @${$reference_row->[$i]};
 
-      # store value-less (attrs only) bind info - we will be comparing all
-      # supplied binds against this for sanity
-      $value_type_by_col_idx->{$i} = [ map { $_->[0] } @$resolved_bind ];
+        # normalization of user supplied stuff
+        my $resolved_bind = $self->_resolve_bindattrs(
+          $source, \@bind, $colinfos,
+        );
 
-      $proto_data->{$colname} = \[ $sql, map { [
-        # inject slice order to use for $proto_bind construction
-          { %{$resolved_bind->[$_][0]}, _bind_data_slice_idx => $i, _literal_bind_subindex => $_+1 }
-            =>
-          $resolved_bind->[$_][1]
-        ] } (0 .. $#bind)
-      ];
-    }
-    else {
-      $value_type_by_col_idx->{$i} = undef;
+        # store value-less (attrs only) bind info - we will be comparing all
+        # supplied binds against this for sanity
+        $value_type_by_col_idx->{$i} = [ map { $_->[0] } @$resolved_bind ];
+
+        $proto_data->{$colname} = \[ $sql, map { [
+          # inject slice order to use for $proto_bind construction
+            { %{$resolved_bind->[$_][0]}, _bind_data_slice_idx => $i, _literal_bind_subindex => $_+1 }
+              =>
+            $resolved_bind->[$_][1]
+          ] } (0 .. $#bind)
+        ];
+      }
+      else {
+        $value_type_by_col_idx->{$i} = undef;
 
-      $proto_data->{$colname} = \[ '?', [
-        { dbic_colname => $colname, _bind_data_slice_idx => $i }
-          =>
-        $data->[0][$i]
-      ] ];
+        $proto_data->{$colname} = \[ '?', [
+          { dbic_colname => $colname, _bind_data_slice_idx => $i }
+            =>
+          $reference_row->[$i]
+        ] ];
+      }
     }
   }
 
   my ($sql, $proto_bind) = $self->_prep_for_execute (
     'insert',
     $source,
-    [ $proto_data ],
+    [ $proto_data || \[ $cols => $data ] ],
   );
 
   if (! @$proto_bind and keys %$value_type_by_col_idx) {
@@ -2077,14 +2153,25 @@ sub insert_bulk {
     );
   };
 
-  for my $col_idx (@col_range) {
-    my $reference_val = $data->[0][$col_idx];
+  my $data_filter = sub {
+    my ($row, $row_idx) = @_;
+    # FIXME - perhaps this is not even needed? does DBI stringify?
+    #
+    # forcibly stringify whatever is stringifiable
+    # ResultSet::populate() hands us a copy - safe to mangle
+    for my $c (0 .. $#{$row}) {
+      $row->[$c] = "$row->[$c]"
+        if ( ref $row->[$c] and overload::Method($row->[$c], '""') );
+    }
+
+    for my $col_idx (@col_range) {
+      my $reference_val = $reference_row->[$col_idx];
 
-    for my $row_idx (1..$#$data) {  # we are comparing against what we got from [0] above, hence start from 1
-      my $val = $data->[$row_idx][$col_idx];
+      my $val = $row->[$col_idx];
 
       if (! exists $value_type_by_col_idx->{$col_idx}) { # literal no binds
         if (ref $val ne 'SCALAR') {
+          use DDP; p @_;
           $bad_slice_report_cref->(
             "Incorrect value (expecting SCALAR-ref \\'$$reference_val')",
             $row_idx,
@@ -2146,6 +2233,12 @@ sub insert_bulk {
         }
       }
     }
+  };
+
+  # we have a split codepath here where col validation happens in the
+  # fetch_tuple, but the tuple isnt used in no proto_bind situations, so we run it
+  if (!@$proto_bind && ref $data eq 'ARRAY') {
+    $data_filter->($data->[$_], $_) for (0..$#$data);
   }
 
   # neither _dbh_execute_for_fetch, nor _dbh_execute_inserts_with_no_binds
@@ -2159,11 +2252,25 @@ sub insert_bulk {
     if (@$proto_bind) {
       # proto bind contains the information on which pieces of $data to pull
       # $cols is passed in only for prettier error-reporting
-      $self->_dbh_execute_for_fetch( $source, $sth, $proto_bind, $cols, $data );
+      my $row_idx = 0;
+      my $fetch_tuple = sub {
+        my $row
+          = !$row_idx++         ? $reference_row
+          : ref $data eq 'CODE' ? $data->()
+          :                     shift @$data;
+
+        return undef if !$row;
+
+        $data_filter->($row, $row_idx);
+
+        $row;
+      };
+
+      $self->_dbh_execute_for_fetch( $source, $sth, $proto_bind, $cols, $fetch_tuple );
     }
     else {
       # bind_param_array doesn't work if there are no binds
-      $self->_dbh_execute_inserts_with_no_binds( $sth, scalar @$data );
+      $self->_dbh_execute_inserts_with_no_binds( $sth, ref $data eq 'ARRAY' ? (scalar(@$data)+1) : 1 );
     }
   };
 
@@ -2208,14 +2315,23 @@ sub _dbh_execute_for_fetch {
 
   my $fetch_row_idx = -1; # saner loop this way
   my $fetch_tuple = sub {
-    return undef if ++$fetch_row_idx > $#$data;
+    my $row = do {
+      if (ref $data eq 'CODE') {
+        $data->();
+      }
+      else {
+        return undef if ++$fetch_row_idx > $#$data;
+        $data->[$fetch_row_idx];
+      }
+    };
+    return undef if not defined $row;
 
     return [ map { defined $_->{_literal_bind_subindex}
-      ? ${ $data->[ $fetch_row_idx ]->[ $_->{_bind_data_slice_idx} ]}
+      ? ${ $row->[ $_->{_bind_data_slice_idx} ]}
          ->[ $_->{_literal_bind_subindex} ]
           ->[1]
-      : $data->[ $fetch_row_idx ]->[ $_->{_bind_data_slice_idx} ]
-    } map { $_->[0] } @$proto_bind];
+      : $row->[ $_->{_bind_data_slice_idx} ]
+    } map { $_->[0] } @$proto_bind ];
   };
 
   my $tuple_status = [];
@@ -2254,9 +2370,10 @@ sub _dbh_execute_for_fetch {
       if ($i > $#$tuple_status);
 
     require Data::Dumper::Concise;
-    $self->throw_exception(sprintf "execute_for_fetch() aborted with '%s' at populate slice:\n%s",
+
+    $self->throw_exception(sprintf "execute_for_fetch() aborted with '%s' at populate slice: %s",
       ($tuple_status->[$i][1] || $err),
-      Data::Dumper::Concise::Dumper( { map { $cols->[$_] => $data->[$i][$_] } (0 .. $#$cols) } ),
+      $i,
     );
   }
 
@@ -2380,8 +2497,8 @@ sub _select_args {
   my ($prefetch_needs_subquery, @limit_args);
 
   if ( $attrs->{_grouped_by_distinct} and $attrs->{collapse} ) {
-    # we already know there is a valid group_by and we know it is intended
-    # to be based *only* on the main result columns
+    # we already know there is a valid group_by (we made it) and we know it is
+    # intended to be based *only* on non-multi stuff
     # short circuit the group_by parsing below
     $prefetch_needs_subquery = 1;
   }
@@ -2399,7 +2516,7 @@ sub _select_args {
     @{$attrs->{group_by}}
       and
     my $grp_aliases = try { # try{} because $attrs->{from} may be unreadable
-      $self->_resolve_aliastypes_from_select_args( $attrs->{from}, undef, undef, { group_by => $attrs->{group_by} } )
+      $self->_resolve_aliastypes_from_select_args({ from => $attrs->{from}, group_by => $attrs->{group_by} })
     }
   ) {
     # no aliases other than our own in group_by
@@ -2413,8 +2530,7 @@ sub _select_args {
   }
 
   if ($prefetch_needs_subquery) {
-    ($ident, $select, $where, $attrs) =
-      $self->_adjust_select_args_for_complex_prefetch ($ident, $select, $where, $attrs);
+    $attrs = $self->_adjust_select_args_for_complex_prefetch ($attrs);
   }
   elsif (! $attrs->{software_limit} ) {
     push @limit_args, (
@@ -2427,17 +2543,17 @@ sub _select_args {
   if (
     ! $prefetch_needs_subquery  # already pruned
       and
-    ref $ident
+    ref $attrs->{from}
       and
-    reftype $ident eq 'ARRAY'
+    reftype $attrs->{from} eq 'ARRAY'
       and
-    @$ident != 1
+    @{$attrs->{from}} != 1
   ) {
-    ($ident, $attrs->{_aliastypes}) = $self->_prune_unused_joins ($ident, $select, $where, $attrs);
+    ($attrs->{from}, $attrs->{_aliastypes}) = $self->_prune_unused_joins ($attrs);
   }
 
 ###
-  # This would be the point to deflate anything found in $where
+  # This would be the point to deflate anything found in $attrs->{where}
   # (and leave $attrs->{bind} intact). Problem is - inflators historically
   # expect a result object. And all we have is a resultsource (it is trivial
   # to extract deflator coderefs via $alias2source above).
@@ -2447,7 +2563,7 @@ sub _select_args {
 ###
 
   return ( 'select', @{ $orig_attrs->{_sqlmaker_select_args} = [
-    $ident, $select, $where, $attrs, @limit_args
+    @{$attrs}{qw(from select where)}, $attrs, @limit_args
   ]} );
 }
 
@@ -2938,7 +3054,7 @@ sub deployment_statements {
     $self->throw_exception("Can't deploy without a ddl_dir or " . DBIx::Class::Optional::Dependencies->req_missing_for ('deploy') );
   }
 
-  # sources needs to be a parser arg, but for simplicty allow at top level
+  # sources needs to be a parser arg, but for simplicity allow at top level
   # coming in
   $sqltargs->{parser_args}{sources} = delete $sqltargs->{sources}
       if exists $sqltargs->{sources};