From: Peter Rabbitson <ribasushi@cpan.org>
Date: Sat, 10 Dec 2011 23:51:12 +0000 (+0100)
Subject: Overhaul of variable \[] handling in populate, and a lot of extra tests
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a9bac98fc664ce08e085b230a3a8d79deee44727;p=dbsrgits%2FDBIx-Class-Historic.git

Overhaul of variable \[] handling in populate, and a lot of extra tests

Now things like multi-value \[ ' ? || ?', [...], [...] ] and populate
datasets where the values of the \[] change per row work correctly
---

diff --git a/Changes b/Changes
index 702927c..2b8aac3 100644
--- a/Changes
+++ b/Changes
@@ -7,6 +7,7 @@ Revision history for DBIx::Class
 
     * Fixes
         - Fix SkipFirst and FirstSkip limit dialects (Informix and Firebird)
+        - A number of corner case fixes of void context populate() with \[]
 
     * Misc
         - Codebase is now trailing-whitespace-free
diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm
index 607b1ef..e9fab0b 100644
--- a/lib/DBIx/Class/Storage/DBI.pm
+++ b/lib/DBIx/Class/Storage/DBI.pm
@@ -14,6 +14,7 @@ use List::Util qw/first/;
 use Sub::Name 'subname';
 use Try::Tiny;
 use overload ();
+use Data::Compare (); # no imports!!! guard against insane architecture
 use namespace::clean;
 
 # default cursor class, overridable in connect_info attributes
@@ -1751,9 +1752,12 @@ sub insert {
 sub insert_bulk {
   my ($self, $source, $cols, $data) = @_;
 
+  my @col_range = (0..$#$cols);
+
   # FIXME - perhaps this is not even needed? does DBI stringify?
   #
   # forcibly stringify whatever is stringifiable
+  # ResultSet::populate() hands us a copy - safe to mangle
   for my $r (0 .. $#$data) {
     for my $c (0 .. $#{$data->[$r]}) {
       $data->[$r][$c] = "$data->[$r][$c]"
@@ -1761,116 +1765,174 @@ sub insert_bulk {
     }
   }
 
-  # check the data for consistency
-  # report a sensible error on bad data
+  my $colinfo_cache = {}; # since we will run _resolve_bindattrs on the same $source a lot
+
+  # get a slice type index based on first row of data
+  # a "column" in this context may refer to more than one bind value
+  # e.g. \[ '?, ?', [...], [...] ]
+  #
+  # construct the value type index - a description of values types for every
+  # per-column slice of $data:
+  #
+  # nonexistent - nonbind literal
+  # 0 - regular value
+  # [] of bindattrs - resolved attribute(s) of bind(s) passed via literal+bind \[] combo
   #
-  # also create a list of dynamic binds (ones that will be changing
-  # for each row)
-  my $dyn_bind_idx;
-  for my $col_idx (0..$#$cols) {
+  # also construct the column hash to pass to the SQL generator. For plain
+  # (non literal) values - convert the members of the first row into a
+  # literal+bind combo, with extra positional info in the bind attr hashref.
+  # This will allow us to match the order properly, and is so contrived
+  # because a user-supplied literal/bind (or something else specific to a
+  # resultsource and/or storage driver) can inject extra binds along the
+  # way, so one can't rely on "shift positions" ordering at all. Also we
+  # can't just hand SQLA a set of some known "values" (e.g. hashrefs that
+  # can be later matched up by address), because we want to supply a real
+  # value on which perhaps e.g. datatype checks will be performed
+  my ($proto_data, $value_type_idx);
+  for my $i (@col_range) {
+    my $colname = $cols->[$i];
+    if (ref $data->[0][$i] eq 'SCALAR') {
+      # no bind value at all - no type
+
+      $proto_data->{$colname} = $data->[0][$i];
+    }
+    elsif (ref $data->[0][$i] eq 'REF' and ref ${$data->[0][$i]} eq 'ARRAY' ) {
+      # repack, so we don't end up mangling the original \[]
+      my ($sql, @bind) = @${$data->[0][$i]};
 
-    # the first "row" is used as a point of reference
-    my $reference_val = $data->[0][$col_idx];
-    my $is_literal = ref $reference_val eq 'SCALAR';
-    my $is_literal_bind = ( !$is_literal and (
-      ref $reference_val eq 'REF'
-        and
-      ref $$reference_val eq 'ARRAY'
-    ) );
-
-    $dyn_bind_idx->{$col_idx} = 1
-      if (!$is_literal and !$is_literal_bind);
-
-    # use a closure for convenience (less to pass)
-    my $bad_slice = sub {
-      my ($msg, $slice_idx) = @_;
-      $self->throw_exception(sprintf "%s for column '%s' in populate slice:\n%s",
-        $msg,
-        $cols->[$col_idx],
-        do {
-          require Data::Dumper::Concise;
-          local $Data::Dumper::Maxdepth = 2;
-          Data::Dumper::Concise::Dumper ({
-            map { $cols->[$_] =>
-              $data->[$slice_idx][$_]
-            } (0 .. $#$cols)
-          }),
-        }
+      # normalization of user supplied stuff
+      my $resolved_bind = $self->_resolve_bindattrs(
+        $source, \@bind, $colinfo_cache,
       );
-    };
+
+      # store value-less (attrs only) bind info - we will be comparing all
+      # supplied binds against this for sanity
+      $value_type_idx->{$i} = [ map { $_->[0] } @$resolved_bind ];
+
+      $proto_data->{$colname} = \[ $sql, map { [
+        # inject slice order to use for $proto_bind construction
+          { %{$resolved_bind->[$_][0]}, _bind_data_slice_idx => $i }
+            =>
+          $resolved_bind->[$_][1]
+        ] } (0 .. $#bind)
+      ];
+    }
+    else {
+      $value_type_idx->{$i} = 0;
+
+      $proto_data->{$colname} = \[ '?', [
+        { dbic_colname => $colname, _bind_data_slice_idx => $i }
+          =>
+        $data->[0][$i]
+      ] ];
+    }
+  }
+
+  my ($sql, $proto_bind) = $self->_prep_for_execute (
+    'insert',
+    $source,
+    [ $proto_data ],
+  );
+
+  if (! @$proto_bind and keys %$value_type_idx) {
+    # if the bindlist is empty and we had some dynamic binds, this means the
+    # storage ate them away (e.g. the NoBindVars component) and interpolated
+    # them directly into the SQL. This obviously can't be good for multi-inserts
+    $self->throw_exception('Cannot insert_bulk without support for placeholders');
+  }
+
+  # sanity checks
+  # FIXME - devise a flag "no babysitting" or somesuch to shut this off
+  #
+  # use an error reporting closure for convenience (less to pass)
+  my $bad_slice_report_cref = sub {
+    my ($msg, $r_idx, $c_idx) = @_;
+    $self->throw_exception(sprintf "%s for column '%s' in populate slice:\n%s",
+      $msg,
+      $cols->[$c_idx],
+      do {
+        require Data::Dumper::Concise;
+        local $Data::Dumper::Maxdepth = 5;
+        Data::Dumper::Concise::Dumper ({
+          map { $cols->[$_] =>
+            $data->[$r_idx][$_]
+          } @col_range
+        }),
+      }
+    );
+  };
+
+  for my $col_idx (@col_range) {
+    my $reference_val = $data->[0][$col_idx];
 
     for my $row_idx (1..$#$data) {  # we are comparing against what we got from [0] above, hence start from 1
       my $val = $data->[$row_idx][$col_idx];
 
-      if ($is_literal) {
+      if (! exists $value_type_idx->{$col_idx}) { # literal no binds
         if (ref $val ne 'SCALAR') {
-          $bad_slice->(
+          $bad_slice_report_cref->(
             "Incorrect value (expecting SCALAR-ref \\'$$reference_val')",
-            $row_idx
+            $row_idx,
+            $col_idx,
           );
         }
         elsif ($$val ne $$reference_val) {
-          $bad_slice->(
+          $bad_slice_report_cref->(
             "Inconsistent literal SQL value (expecting \\'$$reference_val')",
-            $row_idx
+            $row_idx,
+            $col_idx,
           );
         }
       }
-      elsif ($is_literal_bind) {
+      elsif (! $value_type_idx->{$col_idx} ) {  # regular non-literal value
+        if (ref $val eq 'SCALAR' or (ref $val eq 'REF' and ref $$val eq 'ARRAY') ) {
+          $bad_slice_report_cref->("Literal SQL found where a plain bind value is expected", $row_idx, $col_idx);
+        }
+      }
+      else {  # binds from a \[], compare type and attrs
         if (ref $val ne 'REF' or ref $$val ne 'ARRAY') {
-          $bad_slice->(
+          $bad_slice_report_cref->(
             "Incorrect value (expecting ARRAYREF-ref \\['${$reference_val}->[0]', ... ])",
-            $row_idx
+            $row_idx,
+            $col_idx,
           );
         }
-        elsif (${$val}->[0] ne ${$reference_val}->[0]) {
-          $bad_slice->(
-            "Inconsistent literal SQL-bind value (expecting \\['${$reference_val}->[0]', ... ])",
-            $row_idx
-          );
-        }
-      }
-      elsif (ref $val) {
-        if (ref $val eq 'SCALAR' or (ref $val eq 'REF' and ref $$val eq 'ARRAY') ) {
-          $bad_slice->("Literal SQL found where a plain bind value is expected", $row_idx);
-        }
-        else {
-          $bad_slice->("$val reference found where bind expected", $row_idx);
+        # start drilling down and bail out early on identical refs
+        elsif (
+          $reference_val != $val
+            or
+          $$reference_val != $$val
+        ) {
+          if (${$val}->[0] ne ${$reference_val}->[0]) {
+            $bad_slice_report_cref->(
+              "Inconsistent literal/bind SQL (expecting \\['${$reference_val}->[0]', ... ])",
+              $row_idx,
+              $col_idx,
+            );
+          }
+          # need to check the bind attrs - a bind will happen only once for
+          # the entire dataset, so any changes further down will be ignored.
+          elsif (! Data::Compare::Compare(
+            $value_type_idx->{$col_idx},
+            [
+              map
+              { $_->[0] }
+              @{$self->_resolve_bindattrs(
+                $source, [ @{$$val}[1 .. $#$$val] ], $colinfo_cache,
+              )}
+            ],
+          )) {
+            $bad_slice_report_cref->(
+              'Differing bind attributes on literal/bind values not supported',
+              $row_idx,
+              $col_idx,
+            );
+          }
         }
       }
     }
   }
 
-  # Get the sql with bind values interpolated where necessary. For dynamic
-  # binds convert the values of the first row into a literal+bind combo, with
-  # extra positional info in the bind attr hashref. This will allow us to match
-  # the order properly, and is so contrived because a user-supplied literal
-  # bind (or something else specific to a resultsource and/or storage driver)
-  # can inject extra binds along the way, so one can't rely on "shift
-  # positions" ordering at all. Also we can't just hand SQLA a set of some
-  # known "values" (e.g. hashrefs that can be later matched up by address),
-  # because we want to supply a real value on which perhaps e.g. datatype
-  # checks will be performed
-  my ($sql, $proto_bind) = $self->_prep_for_execute (
-    'insert',
-    $source,
-    [ { map { $cols->[$_] => $dyn_bind_idx->{$_}
-      ? \[ '?', [
-          { dbic_colname => $cols->[$_], _bind_data_slice_idx => $_ }
-            =>
-          $data->[0][$_]
-        ] ]
-      : $data->[0][$_]
-    } (0..$#$cols) } ],
-  );
-
-  if (! @$proto_bind and keys %$dyn_bind_idx) {
-    # if the bindlist is empty and we had some dynamic binds, this means the
-    # storage ate them away (e.g. the NoBindVars component) and interpolated
-    # them directly into the SQL. This obviosly can't be good for multi-inserts
-    $self->throw_exception('Cannot insert_bulk without support for placeholders');
-  }
-
   # neither _dbh_execute_for_fetch, nor _dbh_execute_inserts_with_no_binds
   # are atomic (even if execute_for_fetch is a single call). Thus a safety
   # scope guard
@@ -1921,23 +1983,30 @@ sub _dbh_execute_for_fetch {
     ) if defined $bind_attrs->[$i];
   }
 
-  my $data_slice_idx = [ map {
-    (
-      ref $proto_bind->[$_][0] eq 'HASH'
-        and
-      exists $proto_bind->[$_][0]{_bind_data_slice_idx}
-    ) ? $proto_bind->[$_][0]{_bind_data_slice_idx} : undef;
-  } @idx_range ];
+  # At this point $data slots named in the _bind_data_slice_idx of
+  # each piece of $proto_bind are either \[]s or plain values to be
+  # passed in. Construct the dispensing coderef. *NOTE* the order
+  # of $data will differ from this of the ?s in the SQL (due to
+  # alphabetical ordering by colname). We actually do want to
+  # preserve this behavior so that prepare_cached has a better
+  # chance of matching on unrelated calls
+  my %data_reorder = map { $proto_bind->[$_][0]{_bind_data_slice_idx} => $_ } @idx_range;
 
   my $fetch_row_idx = -1; # saner loop this way
   my $fetch_tuple = sub {
     return undef if ++$fetch_row_idx > $#$data;
 
-    return [ map {
-      defined $data_slice_idx->[$_]
-        ? $data->[$fetch_row_idx][$data_slice_idx->[$_]]
-        : $proto_bind->[$_][1]
-    } @idx_range ];
+    return [ map
+      { (ref $_ eq 'REF' and ref $$_ eq 'ARRAY')
+        ? map { $_->[-1] } @{$$_}[1 .. $#$$_]
+        : $_
+      }
+      map
+        { $data->[$fetch_row_idx][$_]}
+        sort
+          { $data_reorder{$a} <=> $data_reorder{$b} }
+          keys %data_reorder
+    ];
   };
 
   my $tuple_status = [];
diff --git a/t/100populate.t b/t/100populate.t
index c5c3868..822ad93 100644
--- a/t/100populate.t
+++ b/t/100populate.t
@@ -154,6 +154,55 @@ is($link7->title, 'gtitle', 'Link 7 title');
   $rs->delete;
 }
 
+# populate with literal+bind
+{
+  my $rs = $schema->resultset('Link');
+  $rs->delete;
+
+  # test insert_bulk with all literal/bind sql
+  $rs->populate([
+    (+{
+        url => \['?', [ {} => 'cpan.org' ] ],
+        title => \['?', [ {} => "The 'best of' cpan" ] ],
+    }) x 5
+  ]);
+
+  is((grep {
+    $_->url eq 'cpan.org' &&
+    $_->title eq "The 'best of' cpan",
+  } $rs->all), 5, 'populate with all literal/bind');
+
+  $rs->delete;
+
+  # test insert_bulk with mix literal and literal/bind
+  $rs->populate([
+    (+{
+        url => \"'cpan.org'",
+        title => \['?', [ {} => "The 'best of' cpan" ] ],
+    }) x 5
+  ]);
+
+  is((grep {
+    $_->url eq 'cpan.org' &&
+    $_->title eq "The 'best of' cpan",
+  } $rs->all), 5, 'populate with all literal/bind SQL');
+
+  $rs->delete;
+
+  # test mixed binds with literal sql/bind
+
+  $rs->populate([ map { +{
+    url => \[ '? || ?', [ {} => 'cpan.org_' ], [ undef, $_ ] ],
+    title => "The 'best of' cpan",
+  } } (1 .. 5) ]);
+
+  for (1 .. 5) {
+    ok($rs->find({ url => "cpan.org_$_" }), "Row $_ correctly created with dynamic literal/bind populate" );
+  }
+
+  $rs->delete;
+}
+
 my $rs = $schema->resultset('Artist');
 $rs->delete;
 throws_ok {
@@ -219,6 +268,45 @@ throws_ok {
   ]);
 } qr/Inconsistent literal SQL value/, 'literal sql must be the same in all slices';
 
+throws_ok {
+  $rs->populate([
+    {
+      artistid => 1,
+      name => \['?', [ {} => 'foo' ] ],
+    },
+    {
+      artistid => 2,
+      name => \"'bar'",
+    }
+  ]);
+} qr/\QIncorrect value (expecting ARRAYREF-ref/, 'literal where literal+bind expected throws';
+
+throws_ok {
+  $rs->populate([
+    {
+      artistid => 1,
+      name => \['?', [ { sqlt_datatype => 'foooo' } => 'foo' ] ],
+    },
+    {
+      artistid => 2,
+      name => \['?', [ {} => 'foo' ] ],
+    }
+  ]);
+} qr/\QDiffering bind attributes on literal\/bind values not supported for column 'name'/, 'literal+bind with differing attrs throws';
+
+lives_ok {
+  $rs->populate([
+    {
+      artistid => 1,
+      name => \['?', [ undef, 'foo' ] ],
+    },
+    {
+      artistid => 2,
+      name => \['?', [ {} => 'bar' ] ],
+    }
+  ]);
+} 'literal+bind with semantically identical attrs works after normalization';
+
 # the stringification has nothing to do with the artist name
 # this is solely for testing consistency
 my $fn = Path::Class::File->new ('somedir/somefilename.tmp');
diff --git a/t/72pg_bytea.t b/t/72pg_bytea.t
index 234da7e..4ff3e36 100644
--- a/t/72pg_bytea.t
+++ b/t/72pg_bytea.t
@@ -105,7 +105,32 @@ my $new;
   ok($new->bytea eq $big_long_string, 'bytea value made it to db');
 }
 
+# test inserting a row via populate() (bindtype propagation through execute_for_fetch)
+# use a new $dbh to ensure no leakage due to prepare_cached
+{
+  my $cnt = 4;
+
+  $schema->storage->_dbh(undef);
+  my $rs = $schema->resultset('BindType');
+  $rs->delete;
+
+  $rs->populate([
+    [qw/id bytea/],
+    map { [
+      \[ '?', [ {} => $_ ] ],
+      "pop_${_}_" . $big_long_string,
+    ]} (1 .. $cnt)
+  ]);
+
+  is($rs->count, $cnt, 'All rows were correctly inserted');
+  for (1..$cnt) {
+    my $r = $rs->find({ bytea => "pop_${_}_" . $big_long_string });
+    is ($r->id, $_, "Row $_ found after find() on the blob");
+
+  }
+}
+
 done_testing;
 
-eval { $dbh->do("DROP TABLE bindtype_test") };
+eval { $schema->storage->dbh_do(sub { $_[1]->do("DROP TABLE bindtype_test") } ) };