From: Peter Rabbitson Date: Sat, 10 Dec 2011 23:51:12 +0000 (+0100) Subject: Overhaul of variable \[] handling in populate, and a lot of extra tests X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=a9bac98fc664ce08e085b230a3a8d79deee44727;p=dbsrgits%2FDBIx-Class-Historic.git Overhaul of variable \[] handling in populate, and a lot of extra tests Now things like multi-value \[ ' ? || ?', [...], [...] ] and populate datasets where the values of the \[] change per row work correctly --- diff --git a/Changes b/Changes index 702927c..2b8aac3 100644 --- a/Changes +++ b/Changes @@ -7,6 +7,7 @@ Revision history for DBIx::Class * Fixes - Fix SkipFirst and FirstSkip limit dialects (Informix and Firebird) + - A number of corner case fixes of void context populate() with \[] * Misc - Codebase is now trailing-whitespace-free diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm index 607b1ef..e9fab0b 100644 --- a/lib/DBIx/Class/Storage/DBI.pm +++ b/lib/DBIx/Class/Storage/DBI.pm @@ -14,6 +14,7 @@ use List::Util qw/first/; use Sub::Name 'subname'; use Try::Tiny; use overload (); +use Data::Compare (); # no imports!!! guard against insane architecture use namespace::clean; # default cursor class, overridable in connect_info attributes @@ -1751,9 +1752,12 @@ sub insert { sub insert_bulk { my ($self, $source, $cols, $data) = @_; + my @col_range = (0..$#$cols); + # FIXME - perhaps this is not even needed? does DBI stringify? # # forcibly stringify whatever is stringifiable + # ResultSet::populate() hands us a copy - safe to mangle for my $r (0 .. $#$data) { for my $c (0 .. $#{$data->[$r]}) { $data->[$r][$c] = "$data->[$r][$c]" @@ -1761,116 +1765,174 @@ sub insert_bulk { } } - # check the data for consistency - # report a sensible error on bad data + my $colinfo_cache = {}; # since we will run _resolve_bindattrs on the same $source a lot + + # get a slice type index based on first row of data + # a "column" in this context may refer to more than one bind value + # e.g. \[ '?, ?', [...], [...] ] + # + # construct the value type index - a description of values types for every + # per-column slice of $data: + # + # nonexistent - nonbind literal + # 0 - regular value + # [] of bindattrs - resolved attribute(s) of bind(s) passed via literal+bind \[] combo # - # also create a list of dynamic binds (ones that will be changing - # for each row) - my $dyn_bind_idx; - for my $col_idx (0..$#$cols) { + # also construct the column hash to pass to the SQL generator. For plain + # (non literal) values - convert the members of the first row into a + # literal+bind combo, with extra positional info in the bind attr hashref. + # This will allow us to match the order properly, and is so contrived + # because a user-supplied literal/bind (or something else specific to a + # resultsource and/or storage driver) can inject extra binds along the + # way, so one can't rely on "shift positions" ordering at all. Also we + # can't just hand SQLA a set of some known "values" (e.g. hashrefs that + # can be later matched up by address), because we want to supply a real + # value on which perhaps e.g. datatype checks will be performed + my ($proto_data, $value_type_idx); + for my $i (@col_range) { + my $colname = $cols->[$i]; + if (ref $data->[0][$i] eq 'SCALAR') { + # no bind value at all - no type + + $proto_data->{$colname} = $data->[0][$i]; + } + elsif (ref $data->[0][$i] eq 'REF' and ref ${$data->[0][$i]} eq 'ARRAY' ) { + # repack, so we don't end up mangling the original \[] + my ($sql, @bind) = @${$data->[0][$i]}; - # the first "row" is used as a point of reference - my $reference_val = $data->[0][$col_idx]; - my $is_literal = ref $reference_val eq 'SCALAR'; - my $is_literal_bind = ( !$is_literal and ( - ref $reference_val eq 'REF' - and - ref $$reference_val eq 'ARRAY' - ) ); - - $dyn_bind_idx->{$col_idx} = 1 - if (!$is_literal and !$is_literal_bind); - - # use a closure for convenience (less to pass) - my $bad_slice = sub { - my ($msg, $slice_idx) = @_; - $self->throw_exception(sprintf "%s for column '%s' in populate slice:\n%s", - $msg, - $cols->[$col_idx], - do { - require Data::Dumper::Concise; - local $Data::Dumper::Maxdepth = 2; - Data::Dumper::Concise::Dumper ({ - map { $cols->[$_] => - $data->[$slice_idx][$_] - } (0 .. $#$cols) - }), - } + # normalization of user supplied stuff + my $resolved_bind = $self->_resolve_bindattrs( + $source, \@bind, $colinfo_cache, ); - }; + + # store value-less (attrs only) bind info - we will be comparing all + # supplied binds against this for sanity + $value_type_idx->{$i} = [ map { $_->[0] } @$resolved_bind ]; + + $proto_data->{$colname} = \[ $sql, map { [ + # inject slice order to use for $proto_bind construction + { %{$resolved_bind->[$_][0]}, _bind_data_slice_idx => $i } + => + $resolved_bind->[$_][1] + ] } (0 .. $#bind) + ]; + } + else { + $value_type_idx->{$i} = 0; + + $proto_data->{$colname} = \[ '?', [ + { dbic_colname => $colname, _bind_data_slice_idx => $i } + => + $data->[0][$i] + ] ]; + } + } + + my ($sql, $proto_bind) = $self->_prep_for_execute ( + 'insert', + $source, + [ $proto_data ], + ); + + if (! @$proto_bind and keys %$value_type_idx) { + # if the bindlist is empty and we had some dynamic binds, this means the + # storage ate them away (e.g. the NoBindVars component) and interpolated + # them directly into the SQL. This obviously can't be good for multi-inserts + $self->throw_exception('Cannot insert_bulk without support for placeholders'); + } + + # sanity checks + # FIXME - devise a flag "no babysitting" or somesuch to shut this off + # + # use an error reporting closure for convenience (less to pass) + my $bad_slice_report_cref = sub { + my ($msg, $r_idx, $c_idx) = @_; + $self->throw_exception(sprintf "%s for column '%s' in populate slice:\n%s", + $msg, + $cols->[$c_idx], + do { + require Data::Dumper::Concise; + local $Data::Dumper::Maxdepth = 5; + Data::Dumper::Concise::Dumper ({ + map { $cols->[$_] => + $data->[$r_idx][$_] + } @col_range + }), + } + ); + }; + + for my $col_idx (@col_range) { + my $reference_val = $data->[0][$col_idx]; for my $row_idx (1..$#$data) { # we are comparing against what we got from [0] above, hence start from 1 my $val = $data->[$row_idx][$col_idx]; - if ($is_literal) { + if (! exists $value_type_idx->{$col_idx}) { # literal no binds if (ref $val ne 'SCALAR') { - $bad_slice->( + $bad_slice_report_cref->( "Incorrect value (expecting SCALAR-ref \\'$$reference_val')", - $row_idx + $row_idx, + $col_idx, ); } elsif ($$val ne $$reference_val) { - $bad_slice->( + $bad_slice_report_cref->( "Inconsistent literal SQL value (expecting \\'$$reference_val')", - $row_idx + $row_idx, + $col_idx, ); } } - elsif ($is_literal_bind) { + elsif (! $value_type_idx->{$col_idx} ) { # regular non-literal value + if (ref $val eq 'SCALAR' or (ref $val eq 'REF' and ref $$val eq 'ARRAY') ) { + $bad_slice_report_cref->("Literal SQL found where a plain bind value is expected", $row_idx, $col_idx); + } + } + else { # binds from a \[], compare type and attrs if (ref $val ne 'REF' or ref $$val ne 'ARRAY') { - $bad_slice->( + $bad_slice_report_cref->( "Incorrect value (expecting ARRAYREF-ref \\['${$reference_val}->[0]', ... ])", - $row_idx + $row_idx, + $col_idx, ); } - elsif (${$val}->[0] ne ${$reference_val}->[0]) { - $bad_slice->( - "Inconsistent literal SQL-bind value (expecting \\['${$reference_val}->[0]', ... ])", - $row_idx - ); - } - } - elsif (ref $val) { - if (ref $val eq 'SCALAR' or (ref $val eq 'REF' and ref $$val eq 'ARRAY') ) { - $bad_slice->("Literal SQL found where a plain bind value is expected", $row_idx); - } - else { - $bad_slice->("$val reference found where bind expected", $row_idx); + # start drilling down and bail out early on identical refs + elsif ( + $reference_val != $val + or + $$reference_val != $$val + ) { + if (${$val}->[0] ne ${$reference_val}->[0]) { + $bad_slice_report_cref->( + "Inconsistent literal/bind SQL (expecting \\['${$reference_val}->[0]', ... ])", + $row_idx, + $col_idx, + ); + } + # need to check the bind attrs - a bind will happen only once for + # the entire dataset, so any changes further down will be ignored. + elsif (! Data::Compare::Compare( + $value_type_idx->{$col_idx}, + [ + map + { $_->[0] } + @{$self->_resolve_bindattrs( + $source, [ @{$$val}[1 .. $#$$val] ], $colinfo_cache, + )} + ], + )) { + $bad_slice_report_cref->( + 'Differing bind attributes on literal/bind values not supported', + $row_idx, + $col_idx, + ); + } } } } } - # Get the sql with bind values interpolated where necessary. For dynamic - # binds convert the values of the first row into a literal+bind combo, with - # extra positional info in the bind attr hashref. This will allow us to match - # the order properly, and is so contrived because a user-supplied literal - # bind (or something else specific to a resultsource and/or storage driver) - # can inject extra binds along the way, so one can't rely on "shift - # positions" ordering at all. Also we can't just hand SQLA a set of some - # known "values" (e.g. hashrefs that can be later matched up by address), - # because we want to supply a real value on which perhaps e.g. datatype - # checks will be performed - my ($sql, $proto_bind) = $self->_prep_for_execute ( - 'insert', - $source, - [ { map { $cols->[$_] => $dyn_bind_idx->{$_} - ? \[ '?', [ - { dbic_colname => $cols->[$_], _bind_data_slice_idx => $_ } - => - $data->[0][$_] - ] ] - : $data->[0][$_] - } (0..$#$cols) } ], - ); - - if (! @$proto_bind and keys %$dyn_bind_idx) { - # if the bindlist is empty and we had some dynamic binds, this means the - # storage ate them away (e.g. the NoBindVars component) and interpolated - # them directly into the SQL. This obviosly can't be good for multi-inserts - $self->throw_exception('Cannot insert_bulk without support for placeholders'); - } - # neither _dbh_execute_for_fetch, nor _dbh_execute_inserts_with_no_binds # are atomic (even if execute_for_fetch is a single call). Thus a safety # scope guard @@ -1921,23 +1983,30 @@ sub _dbh_execute_for_fetch { ) if defined $bind_attrs->[$i]; } - my $data_slice_idx = [ map { - ( - ref $proto_bind->[$_][0] eq 'HASH' - and - exists $proto_bind->[$_][0]{_bind_data_slice_idx} - ) ? $proto_bind->[$_][0]{_bind_data_slice_idx} : undef; - } @idx_range ]; + # At this point $data slots named in the _bind_data_slice_idx of + # each piece of $proto_bind are either \[]s or plain values to be + # passed in. Construct the dispensing coderef. *NOTE* the order + # of $data will differ from this of the ?s in the SQL (due to + # alphabetical ordering by colname). We actually do want to + # preserve this behavior so that prepare_cached has a better + # chance of matching on unrelated calls + my %data_reorder = map { $proto_bind->[$_][0]{_bind_data_slice_idx} => $_ } @idx_range; my $fetch_row_idx = -1; # saner loop this way my $fetch_tuple = sub { return undef if ++$fetch_row_idx > $#$data; - return [ map { - defined $data_slice_idx->[$_] - ? $data->[$fetch_row_idx][$data_slice_idx->[$_]] - : $proto_bind->[$_][1] - } @idx_range ]; + return [ map + { (ref $_ eq 'REF' and ref $$_ eq 'ARRAY') + ? map { $_->[-1] } @{$$_}[1 .. $#$$_] + : $_ + } + map + { $data->[$fetch_row_idx][$_]} + sort + { $data_reorder{$a} <=> $data_reorder{$b} } + keys %data_reorder + ]; }; my $tuple_status = []; diff --git a/t/100populate.t b/t/100populate.t index c5c3868..822ad93 100644 --- a/t/100populate.t +++ b/t/100populate.t @@ -154,6 +154,55 @@ is($link7->title, 'gtitle', 'Link 7 title'); $rs->delete; } +# populate with literal+bind +{ + my $rs = $schema->resultset('Link'); + $rs->delete; + + # test insert_bulk with all literal/bind sql + $rs->populate([ + (+{ + url => \['?', [ {} => 'cpan.org' ] ], + title => \['?', [ {} => "The 'best of' cpan" ] ], + }) x 5 + ]); + + is((grep { + $_->url eq 'cpan.org' && + $_->title eq "The 'best of' cpan", + } $rs->all), 5, 'populate with all literal/bind'); + + $rs->delete; + + # test insert_bulk with mix literal and literal/bind + $rs->populate([ + (+{ + url => \"'cpan.org'", + title => \['?', [ {} => "The 'best of' cpan" ] ], + }) x 5 + ]); + + is((grep { + $_->url eq 'cpan.org' && + $_->title eq "The 'best of' cpan", + } $rs->all), 5, 'populate with all literal/bind SQL'); + + $rs->delete; + + # test mixed binds with literal sql/bind + + $rs->populate([ map { +{ + url => \[ '? || ?', [ {} => 'cpan.org_' ], [ undef, $_ ] ], + title => "The 'best of' cpan", + } } (1 .. 5) ]); + + for (1 .. 5) { + ok($rs->find({ url => "cpan.org_$_" }), "Row $_ correctly created with dynamic literal/bind populate" ); + } + + $rs->delete; +} + my $rs = $schema->resultset('Artist'); $rs->delete; throws_ok { @@ -219,6 +268,45 @@ throws_ok { ]); } qr/Inconsistent literal SQL value/, 'literal sql must be the same in all slices'; +throws_ok { + $rs->populate([ + { + artistid => 1, + name => \['?', [ {} => 'foo' ] ], + }, + { + artistid => 2, + name => \"'bar'", + } + ]); +} qr/\QIncorrect value (expecting ARRAYREF-ref/, 'literal where literal+bind expected throws'; + +throws_ok { + $rs->populate([ + { + artistid => 1, + name => \['?', [ { sqlt_datatype => 'foooo' } => 'foo' ] ], + }, + { + artistid => 2, + name => \['?', [ {} => 'foo' ] ], + } + ]); +} qr/\QDiffering bind attributes on literal\/bind values not supported for column 'name'/, 'literal+bind with differing attrs throws'; + +lives_ok { + $rs->populate([ + { + artistid => 1, + name => \['?', [ undef, 'foo' ] ], + }, + { + artistid => 2, + name => \['?', [ {} => 'bar' ] ], + } + ]); +} 'literal+bind with semantically identical attrs works after normalization'; + # the stringification has nothing to do with the artist name # this is solely for testing consistency my $fn = Path::Class::File->new ('somedir/somefilename.tmp'); diff --git a/t/72pg_bytea.t b/t/72pg_bytea.t index 234da7e..4ff3e36 100644 --- a/t/72pg_bytea.t +++ b/t/72pg_bytea.t @@ -105,7 +105,32 @@ my $new; ok($new->bytea eq $big_long_string, 'bytea value made it to db'); } +# test inserting a row via populate() (bindtype propagation through execute_for_fetch) +# use a new $dbh to ensure no leakage due to prepare_cached +{ + my $cnt = 4; + + $schema->storage->_dbh(undef); + my $rs = $schema->resultset('BindType'); + $rs->delete; + + $rs->populate([ + [qw/id bytea/], + map { [ + \[ '?', [ {} => $_ ] ], + "pop_${_}_" . $big_long_string, + ]} (1 .. $cnt) + ]); + + is($rs->count, $cnt, 'All rows were correctly inserted'); + for (1..$cnt) { + my $r = $rs->find({ bytea => "pop_${_}_" . $big_long_string }); + is ($r->id, $_, "Row $_ found after find() on the blob"); + + } +} + done_testing; -eval { $dbh->do("DROP TABLE bindtype_test") }; +eval { $schema->storage->dbh_do(sub { $_[1]->do("DROP TABLE bindtype_test") } ) };