Entirely and utterly rewrite populate(), fixing the variable hash issue

diff --git a/Changes b/Changes

index d69a071..6dcba26 100644 (file)
--- a/Changes
+++ b/Changes
@@ -25,6 +25,8 @@ Revision history for DBIx::Class
           resultsets with no rows
         - Fix incorrect handling of custom relationship conditions returning
           SQLA literal expressions
+        - Fix long standing bug with populate() getting confused by hashrefs
+          with different sets of keys: http://is.gd/2011_dbic_populate_gotcha
         - Fix multi-value literal populate not working with simplified bind
           specifications
         - Massively improve the implied resultset condition parsing - now all
diff --git a/lib/DBIx/Class/Relationship/BelongsTo.pm b/lib/DBIx/Class/Relationship/BelongsTo.pm

index b594d3a..4b1577c 100644 (file)
--- a/lib/DBIx/Class/Relationship/BelongsTo.pm
+++ b/lib/DBIx/Class/Relationship/BelongsTo.pm
@@ -89,6 +89,7 @@ sub belongs_to {
   $class->add_relationship($rel, $f_class,
     $cond,
     {
+      is_depends_on => 1,
       accessor => $acc_type,
       $fk_columns ? ( fk_columns => $fk_columns ) : (),
       %{$attrs || {}}
diff --git a/lib/DBIx/Class/ResultSet.pm b/lib/DBIx/Class/ResultSet.pm

index 0262537..c6b725b 100644 (file)
--- a/lib/DBIx/Class/ResultSet.pm
+++ b/lib/DBIx/Class/ResultSet.pm
@@ -2234,127 +2234,266 @@ case there are obviously no benefits to using this method over L</create>.
 sub populate {
   my $self = shift;
 
-  # cruft placed in standalone method
-  my $data = $self->_normalize_populate_args(@_);
+  my ($data, $guard);
 
-  return unless @$data;
+  # this is naive and just a quick check
+  # the types will need to be checked more thoroughly when the
+  # multi-source populate gets added
+  if (ref $_[0] eq 'ARRAY') {
+    return unless @{$_[0]};
 
-  if(defined wantarray) {
-    my @created = map { $self->new_result($_)->insert } @$data;
-    return wantarray ? @created : \@created;
+    $data = $_[0] if (ref $_[0][0] eq 'HASH' or ref $_[0][0] eq 'ARRAY');
   }
-  else {
-    my $first = $data->[0];
 
-    # if a column is a registered relationship, and is a non-blessed hash/array, consider
-    # it relationship data
-    my (@rels, @columns);
-    my $rsrc = $self->result_source;
-    my $rels = { map { $_ => $rsrc->relationship_info($_) } $rsrc->relationships };
-    for (keys %$first) {
-      my $ref = ref $first->{$_};
-      $rels->{$_} && ($ref eq 'ARRAY' or $ref eq 'HASH')
-        ? push @rels, $_
-        : push @columns, $_
+  $self->throw_exception('Populate expects an arrayref of hashrefs or arrayref of arrayrefs')
+    unless $data;
+
+  # FIXME - no cref handling
+  # At this point assume either hashes or arrays
+
+  if(defined wantarray) {
+    my @results;
+
+    $guard = $self->result_source->schema->storage->txn_scope_guard
+      if ( @$data > 2 or ( @$data == 2 and ref $data->[0] eq 'ARRAY' ) );
+
+    if (ref $data->[0] eq 'ARRAY') {
+      @results = map
+        { my $vals = $_; $self->new_result({ map { $data->[0][$_] => $vals->[$_] } 0..$#{$data->[0]} })->insert }
+        @{$data}[1 .. $#$data]
       ;
     }
+    else {
+      @results = map { $self->new_result($_)->insert } @$data;
+    }
+
+    $guard->commit if $guard;
+    return wantarray ? @results : \@results;
+  }
+
+  # we have to deal with *possibly incomplete* related data
+  # this means we have to walk the data structure twice
+  # whether we want this or not
+  # jnap, I hate you ;)
+  my $rsrc = $self->result_source;
+  my $rel_info = { map { $_ => $rsrc->relationship_info($_) } $rsrc->relationships };
+
+  my ($colinfo, $colnames, $slices_with_rels);
+  my $data_start = 0;
+
+  DATA_SLICE:
+  for my $i (0 .. $#$data) {
+
+    my $current_slice_seen_rel_infos;
 
-    my @pks = $rsrc->primary_columns;
+### Determine/Supplement collists
+### BEWARE - This is a hot piece of code, a lot of weird idioms were used
+    if( ref $data->[$i] eq 'ARRAY' ) {
 
-    ## do the belongs_to relationships
-    foreach my $index (0..$#$data) {
+      # positional(!) explicit column list
+      if ($i == 0) {
 
-      # delegate to create() for any dataset without primary keys with specified relationships
-      if (grep { !defined $data->[$index]->{$_} } @pks ) {
-        for my $r (@rels) {
-          if (grep { ref $data->[$index]{$r} eq $_ } qw/HASH ARRAY/) {  # a related set must be a HASH or AoH
-            my @ret = $self->populate($data);
-            return;
+        $colinfo->{$data->[0][$_]} = { pos => $_, name => $data->[0][$_] } and push @$colnames, $data->[0][$_]
+          for 0 .. $#{$data->[0]};
+
+        $data_start = 1;
+
+        next DATA_SLICE;
+      }
+      else {
+        for (values %$colinfo) {
+          if ($_->{is_rel} ||= (
+            $rel_info->{$_->{name}}
+              and
+            (
+              ref $data->[$i][$_->{pos}] eq 'ARRAY'
+                or
+              ref $data->[$i][$_->{pos}] eq 'HASH'
+                or
+              ( defined blessed $data->[$i][$_->{pos}] and $data->[$i][$_->{pos}]->isa('DBIx::Class::Row') )
+            )
+              and
+            1
+          )) {
+
+            # moar sanity check... sigh
+            for ( ref $data->[$i][$_->{pos}] eq 'ARRAY' ? @{$data->[$i][$_->{pos}]} : $data->[$i][$_->{pos}] ) {
+              if ( defined blessed $_ and $_->isa('DBIx::Class::Row' ) ) {
+                carp_unique("Fast-path populate() with supplied related objects is not possible - falling back to regular create()");
+                return my $throwaway = $self->populate(@_);
+              }
+            }
+
+            push @$current_slice_seen_rel_infos, $rel_info->{$_->{name}};
           }
         }
       }
 
-      foreach my $rel (@rels) {
-        next unless ref $data->[$index]->{$rel} eq "HASH";
-        my $result = $self->related_resultset($rel)->new_result($data->[$index]->{$rel})->insert;
-        my (undef, $reverse_relinfo) = %{$rsrc->reverse_relationship_info($rel)};
-        my $related = $result->result_source->_resolve_condition(
-          $reverse_relinfo->{cond},
-          $self,
-          $result,
-          $rel,
-        );
-
-        delete $data->[$index]->{$rel};
-        $data->[$index] = {%{$data->[$index]}, %$related};
-
-        push @columns, keys %$related if $index == 0;
+     if ($current_slice_seen_rel_infos) {
+        push @$slices_with_rels, { map { $colnames->[$_] => $data->[$i][$_] } 0 .. $#$colnames };
+
+        # this is needed further down to decide whether or not to fallback to create()
+        $colinfo->{$colnames->[$_]}{seen_null} ||= ! defined $data->[$i][$_]
+          for 0 .. $#$colnames;
       }
     }
+    elsif( ref $data->[$i] eq 'HASH' ) {
 
-    ## inherit the data locked in the conditions of the resultset
-    my ($rs_data) = $self->_merge_with_rscond({});
-    delete @{$rs_data}{@columns};
-
-    ## do bulk insert on current row
-    $rsrc->storage->insert_bulk(
-      $rsrc,
-      [@columns, keys %$rs_data],
-      [ map { [ @$_{@columns}, values %$rs_data ] } @$data ],
-    );
+      for ( sort keys %{$data->[$i]} ) {
 
-    ## do the has_many relationships
-    foreach my $item (@$data) {
+        $colinfo->{$_} ||= do {
 
-      my $main_row;
+          $self->throw_exception("Column '$_' must be present in supplied explicit column list")
+            if $data_start; # it will be 0 on AoH, 1 on AoA
 
-      foreach my $rel (@rels) {
-        next unless ref $item->{$rel} eq "ARRAY" && @{ $item->{$rel} };
+          push @$colnames, $_;
 
-        $main_row ||= $self->new_result({map { $_ => $item->{$_} } @pks});
+          # RV
+          { pos => $#$colnames, name => $_ }
+        };
 
-        my $child = $main_row->$rel;
+        if ($colinfo->{$_}{is_rel} ||= (
+          $rel_info->{$_}
+            and
+          (
+            ref $data->[$i]{$_} eq 'ARRAY'
+              or
+            ref $data->[$i]{$_} eq 'HASH'
+              or
+            ( defined blessed $data->[$i]{$_} and $data->[$i]{$_}->isa('DBIx::Class::Row') )
+          )
+            and
+          1
+        )) {
+
+          # moar sanity check... sigh
+          for ( ref $data->[$i]{$_} eq 'ARRAY' ? @{$data->[$i]{$_}} : $data->[$i]{$_} ) {
+            if ( defined blessed $_ and $_->isa('DBIx::Class::Row' ) ) {
+              carp_unique("Fast-path populate() with supplied related objects is not possible - falling back to regular create()");
+              return my $throwaway = $self->populate(@_);
+            }
+          }
 
-        my $related = $child->result_source->_resolve_condition(
-          $rels->{$rel}{cond},
-          $child,
-          $main_row,
-          $rel,
-        );
+          push @$current_slice_seen_rel_infos, $rel_info->{$_};
+        }
+      }
 
-        my @rows_to_add = ref $item->{$rel} eq 'ARRAY' ? @{$item->{$rel}} : ($item->{$rel});
-        my @populate = map { {%$_, %$related} } @rows_to_add;
+      if ($current_slice_seen_rel_infos) {
+        push @$slices_with_rels, $data->[$i];
 
-        $child->populate( \@populate );
+        # this is needed further down to decide whether or not to fallback to create()
+        $colinfo->{$_}{seen_null} ||= ! defined $data->[$i]{$_}
+          for keys %{$data->[$i]};
       }
     }
+    else {
+      $self->throw_exception('Unexpected populate() data structure member type: ' . ref $data->[$i] );
+    }
+
+    if ( grep
+      { $_->{attrs}{is_depends_on} }
+      @{ $current_slice_seen_rel_infos || [] }
+    ) {
+      carp_unique("Fast-path populate() of belongs_to relationship data is not possible - falling back to regular create()");
+      return my $throwaway = $self->populate(@_);
+    }
   }
-}
 
+  if( $slices_with_rels ) {
 
-# populate() arguments went over several incarnations
-# What we ultimately support is AoH
-sub _normalize_populate_args {
-  my ($self, $arg) = @_;
+    # need to exclude the rel "columns"
+    $colnames = [ grep { ! $colinfo->{$_}{is_rel} } @$colnames ];
 
-  if (ref $arg eq 'ARRAY') {
-    if (!@$arg) {
-      return [];
-    }
-    elsif (ref $arg->[0] eq 'HASH') {
-      return $arg;
+    # extra sanity check - ensure the main source is in fact identifiable
+    # the localizing of nullability is insane, but oh well... the use-case is legit
+    my $ci = $rsrc->columns_info($colnames);
+
+    $ci->{$_} = { %{$ci->{$_}}, is_nullable => 0 }
+      for grep { ! $colinfo->{$_}{seen_null} } keys %$ci;
+
+    unless( $rsrc->_identifying_column_set($ci) ) {
+      carp_unique("Fast-path populate() of non-uniquely identifiable rows with related data is not possible - falling back to regular create()");
+      return my $throwaway = $self->populate(@_);
     }
-    elsif (ref $arg->[0] eq 'ARRAY') {
-      my @ret;
-      my @colnames = @{$arg->[0]};
-      foreach my $values (@{$arg}[1 .. $#$arg]) {
-        push @ret, { map { $colnames[$_] => $values->[$_] } (0 .. $#colnames) };
+  }
+
+### inherit the data locked in the conditions of the resultset
+  my ($rs_data) = $self->_merge_with_rscond({});
+  delete @{$rs_data}{@$colnames};  # passed-in stuff takes precedence
+
+  # if anything left - decompose rs_data
+  my $rs_data_vals;
+  if (keys %$rs_data) {
+     push @$rs_data_vals, $rs_data->{$_}
+      for sort keys %$rs_data;
+  }
+
+### start work
+  $guard = $rsrc->schema->storage->txn_scope_guard
+    if $slices_with_rels;
+
+### main source data
+  # FIXME - need to switch entirely to a coderef-based thing,
+  # so that large sets aren't copied several times... I think
+  $rsrc->storage->insert_bulk(
+    $rsrc,
+    [ @$colnames, sort keys %$rs_data ],
+    [ map {
+      ref $data->[$_] eq 'ARRAY'
+      ? (
+          $slices_with_rels ? [ @{$data->[$_]}[0..$#$colnames], @{$rs_data_vals||[]} ]  # the collist changed
+        : $rs_data_vals     ? [ @{$data->[$_]}, @$rs_data_vals ]
+        :                     $data->[$_]
+      )
+      : [ @{$data->[$_]}{@$colnames}, @{$rs_data_vals||[]} ]
+    } $data_start .. $#$data ],
+  );
+
+### do the children relationships
+  if ( $slices_with_rels ) {
+    my @rels = grep { $colinfo->{$_}{is_rel} } keys %$colinfo
+      or die 'wtf... please report a bug with DBIC_TRACE=1 output (stacktrace)';
+
+    for my $sl (@$slices_with_rels) {
+
+      my ($main_proto, $main_proto_rs);
+      for my $rel (@rels) {
+        next unless defined $sl->{$rel};
+
+        $main_proto ||= {
+          %$rs_data,
+          (map { $_ => $sl->{$_} } @$colnames),
+        };
+
+        unless (defined $colinfo->{$rel}{rs}) {
+
+          $colinfo->{$rel}{rs} = $rsrc->related_source($rel)->resultset;
+
+          $colinfo->{$rel}{fk_map} = { reverse %{ $rsrc->_resolve_relationship_condition(
+            rel_name => $rel,
+            self_alias => "\xFE", # irrelevant
+            foreign_alias => "\xFF", # irrelevant
+          )->{identity_map} || {} } };
+
+        }
+
+        $colinfo->{$rel}{rs}->search({ map # only so that we inherit them values properly, no actual search
+          {
+            $_ => { '=' =>
+              ( $main_proto_rs ||= $rsrc->resultset->search($main_proto) )
+                ->get_column( $colinfo->{$rel}{fk_map}{$_} )
+                 ->as_query
+            }
+          }
+          keys %{$colinfo->{$rel}{fk_map}}
+        })->populate( ref $sl->{$rel} eq 'ARRAY' ? $sl->{$rel} : [ $sl->{$rel} ] );
+
+        1;
       }
-      return \@ret;
     }
   }
 
-  $self->throw_exception('Populate expects an arrayref of hashrefs or arrayref of arrayrefs');
+  $guard->commit if $guard;
 }
 
 =head2 pager
diff --git a/t/100populate.t b/t/100populate.t

index 4a3f0ac..16c1e6d 100644 (file)
--- a/t/100populate.t
+++ b/t/100populate.t
@@ -88,6 +88,15 @@ is($link4->id, 4, 'Link 4 id');
 is($link4->url, undef, 'Link 4 url');
 is($link4->title, 'dtitle', 'Link 4 title');
 
+## variable size dataset
+@links = $schema->populate('Link', [
+[ qw/id title url/ ],
+[ 41 ],
+[ 42, undef, 'url42' ],
+]);
+is(scalar @links, 2);
+is($links[0]->url, undef);
+is($links[1]->url, 'url42');
 
 ## make sure populate -> insert_bulk honors fields/orders in void context
 ## schema order
@@ -120,6 +129,63 @@ is($link7->id, 7, 'Link 7 id');
 is($link7->url, undef, 'Link 7 url');
 is($link7->title, 'gtitle', 'Link 7 title');
 
+## variable size dataset in void ctx
+$schema->populate('Link', [
+[ qw/id title url/ ],
+[ 71 ],
+[ 72, undef, 'url72' ],
+]);
+@links = $schema->resultset('Link')->search({ id => [71, 72]}, { order_by => 'id' })->all;
+is(scalar @links, 2);
+is($links[0]->url, undef);
+is($links[1]->url, 'url72');
+
+## variable size dataset in void ctx, hash version
+$schema->populate('Link', [
+  { id => 73 },
+  { id => 74, title => 't74' },
+  { id => 75, url => 'u75' },
+]);
+@links = $schema->resultset('Link')->search({ id => [73..75]}, { order_by => 'id' })->all;
+is(scalar @links, 3);
+is($links[0]->url, undef);
+is($links[0]->title, undef);
+is($links[1]->url, undef);
+is($links[1]->title, 't74');
+is($links[2]->url, 'u75');
+is($links[2]->title, undef);
+
+## Make sure the void ctx trace is sane
+{
+  for (
+    [
+      [ qw/id title url/ ],
+      [ 81 ],
+      [ 82, 't82' ],
+      [ 83, undef, 'url83' ],
+    ],
+    [
+      { id => 91 },
+      { id => 92, title => 't92' },
+      { id => 93, url => 'url93' },
+    ]
+  ) {
+    $schema->is_executed_sql_bind(
+      sub {
+        $schema->populate('Link', $_);
+      },
+      [
+        [ 'BEGIN' ],
+        [
+          'INSERT INTO link( id, title, url ) VALUES( ?, ?, ? )',
+          "__BULK_INSERT__"
+        ],
+        [ 'COMMIT' ],
+      ]
+    );
+  }
+}
+
 # populate with literals
 {
   my $rs = $schema->resultset('Link');
@@ -419,7 +485,8 @@ warnings_like {
     : (qr/\QPOSSIBLE *PAST* DATA CORRUPTION detected \E.+\QTrigger condition encountered at @{[ __FILE__ ]} line\E \d/) x 2
 ], 'Data integrity warnings as planned';
 
-lives_ok {
+$schema->is_executed_sql_bind(
+  sub {
    $schema->resultset('TwoKeys')->populate([{
       artist => 1,
       cd     => 5,
@@ -437,7 +504,26 @@ lives_ok {
             autopilot => 'b',
       }]
    }])
-} 'multicol-PK has_many populate works';
+  },
+  [
+    [ 'BEGIN' ],
+    [ 'INSERT INTO twokeys ( artist, cd)
+        VALUES ( ?, ? )',
+      '__BULK_INSERT__'
+    ],
+    [ 'INSERT INTO fourkeys_to_twokeys ( autopilot, f_bar, f_foo, f_goodbye, f_hello, t_artist, t_cd)
+        VALUES (
+          ?, ?, ?, ?, ?,
+          ( SELECT me.artist FROM twokeys me WHERE artist = ? AND cd = ? ),
+          ( SELECT me.cd FROM twokeys me WHERE artist = ? AND cd = ? )
+        )
+      ',
+      '__BULK_INSERT__'
+    ],
+    [ 'COMMIT' ],
+  ],
+  'multicol-PK has_many populate expected trace'
+);
 
 lives_ok ( sub {
   $schema->populate('CD', [
diff --git a/t/101populate_rs.t b/t/101populate_rs.t

index 8a0eea4..a592e56 100644 (file)
--- a/t/101populate_rs.t
+++ b/t/101populate_rs.t
@@ -12,6 +12,7 @@ use strict;
 use warnings;
 
 use Test::More;
+use Test::Warn;
 use lib qw(t/lib);
 use DBICTest;
 
@@ -37,10 +38,10 @@ ok( $cd_rs, 'Got Good CD Resultset');
 
 SCHEMA_POPULATE1: {
 
-  ## Test to make sure that the old $schema->populate is using the new method
-  ## for $resultset->populate when in void context and with sub objects.
+  # throw a monkey wrench
+  my $post_jnap_monkeywrench = $schema->resultset('Artist')->find(1)->update({ name => undef });
 
-  $schema->populate('Artist', [
+  warnings_exist { $schema->populate('Artist', [
 
     [qw/name cds/],
     ["001First Artist", [
@@ -55,13 +56,13 @@ SCHEMA_POPULATE1: {
     [undef, [
       {title=>"004Title1", year=>2010}
     ]],
-  ]);
+  ]) } qr/\QFast-path populate() of non-uniquely identifiable rows with related data is not possible/;
 
   isa_ok $schema, 'DBIx::Class::Schema';
 
-  my ($undef, $artist1, $artist2, $artist3 ) = $schema->resultset('Artist')->search({
+  my ( $preexisting_undef, $artist1, $artist2, $artist3, $undef ) = $schema->resultset('Artist')->search({
     name=>["001First Artist","002Second Artist","003Third Artist", undef]},
-    {order_by=>'name ASC'})->all;
+    {order_by => { -asc => 'artistid' }})->all;
 
   isa_ok  $artist1, 'DBICTest::Artist';
   isa_ok  $artist2, 'DBICTest::Artist';
@@ -78,6 +79,8 @@ SCHEMA_POPULATE1: {
   ok $artist3->cds->count eq 1, "Got Right number of CDs for Artist3";
   ok $undef->cds->count eq 1, "Got Right number of CDs for Artist4";
 
+  $post_jnap_monkeywrench->delete;
+
   ARTIST1CDS: {
 
     my ($cd1, $cd2, $cd3) = $artist1->cds->search(undef, {order_by=>'year ASC'});
@@ -475,7 +478,9 @@ VOID_CONTEXT: {
       },
     ];
 
-    $cd_rs->populate($cds);
+    warnings_exist {
+      $cd_rs->populate($cds)
+    } qr/\QFast-path populate() of belongs_to relationship data is not possible/;
 
     my ($cdA, $cdB) = $cd_rs->search(
       {title=>[sort map {$_->{title}} @$cds]},
@@ -515,7 +520,9 @@ VOID_CONTEXT: {
       },
     ];
 
-    $cd_rs->populate($cds);
+    warnings_exist {
+      $cd_rs->populate($cds);
+    } qr/\QFast-path populate() of belongs_to relationship data is not possible/;
 
     my ($cdA, $cdB, $cdC) = $cd_rs->search(
       {title=>[sort map {$_->{title}} @$cds]},
diff --git a/t/multi_create/standard.t b/t/multi_create/standard.t

index 6c1efd8..54cf04e 100644 (file)
--- a/t/multi_create/standard.t
+++ b/t/multi_create/standard.t
@@ -3,11 +3,10 @@ use warnings;
 
 use Test::More;
 use Test::Exception;
+use Test::Warn;
 use lib qw(t/lib);
 use DBICTest;
 
-plan tests => 91;
-
 my $schema = DBICTest->init_schema();
 
 lives_ok ( sub {
@@ -403,8 +402,11 @@ lives_ok ( sub {
 
   $kurt_cobain->{cds} = [ $in_utero ];
 
+  warnings_exist {
+    $schema->resultset('Artist')->populate([ $kurt_cobain ]);
+  }  qr/\QFast-path populate() with supplied related objects is not possible/;
+
 
-  $schema->resultset('Artist')->populate([ $kurt_cobain ]); # %)
   my $artist = $schema->resultset('Artist')->find({name => 'Kurt Cobain'});
 
   is($artist->name, 'Kurt Cobain', 'Artist insertion ok');
@@ -468,4 +470,4 @@ lives_ok ( sub {
   is ($m2m_cd->first->producers->first->name, 'Cowboy Neal', 'Correct producer row created');
 }, 'Test multi create over many_to_many');
 
-1;
+done_testing;
Changes		patch \| blob \| blame \| history
lib/DBIx/Class/Relationship/BelongsTo.pm		patch \| blob \| blame \| history
lib/DBIx/Class/ResultSet.pm		patch \| blob \| blame \| history
t/100populate.t		patch \| blob \| blame \| history
t/101populate_rs.t		patch \| blob \| blame \| history
t/multi_create/standard.t		patch \| blob \| blame \| history