From: Rafael Kitover Date: Mon, 14 Sep 2009 13:09:21 +0000 (+0000) Subject: sybase bulk API support stuff (no blobs yet, coming soon...) X-Git-Tag: v0.08112~14^2~12 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=c080561b566a9d8764ef64b3b7d61c130139d9b9;p=dbsrgits%2FDBIx-Class.git sybase bulk API support stuff (no blobs yet, coming soon...) --- diff --git a/Changes b/Changes index 28d0239..de29178 100644 --- a/Changes +++ b/Changes @@ -24,6 +24,7 @@ Revision history for DBIx::Class when needed - Support for interpolated variables with proper quoting when connecting to an older Sybase and/or via FreeTDS + - Bulk API support for populate() 0.08111 2009-09-06 21:58:00 (UTC) - The hashref to connection_info now accepts a 'dbh_maker' diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm index 4990e78..e4e6728 100644 --- a/lib/DBIx/Class/Storage/DBI.pm +++ b/lib/DBIx/Class/Storage/DBI.pm @@ -1328,7 +1328,7 @@ sub insert { ## scalar refs, or at least, all the same type as the first set, the statement is ## only prepped once. sub insert_bulk { - my ($self, $source, $cols, $data) = @_; + my ($self, $source, $cols, $data, $sth_attr) = @_; # redispatch to insert_bulk method of storage we reblessed into, if necessary if (not $self->_driver_determined) { @@ -1339,10 +1339,11 @@ sub insert_bulk { my %colvalues; my $table = $source->from; @colvalues{@$cols} = (0..$#$cols); +# XXX some bulk APIs require column list in database order my ($sql, @bind) = $self->sql_maker->insert($table, \%colvalues); $self->_query_start( $sql, @bind ); - my $sth = $self->sth($sql); + my $sth = $self->sth($sql, 'insert', $sth_attr); # @bind = map { ref $_ ? ''.$_ : $_ } @bind; # stringify args @@ -1371,7 +1372,7 @@ sub insert_bulk { $placeholder_index++; } my $rv = eval { $sth->execute_array({ArrayTupleStatus => $tuple_status}) }; - if (my $err = $@) { + if (my $err = $@ || $sth->errstr) { my $i = 0; ++$i while $i <= $#$tuple_status && !ref $tuple_status->[$i]; @@ -1394,6 +1395,8 @@ sub insert_bulk { } $self->throw_exception($sth->errstr) if !$rv; + $sth->finish; + $self->_query_end( $sql, @bind ); return (wantarray ? ($rv, $sth, @bind) : $rv); } @@ -2057,12 +2060,15 @@ Returns a L sth (statement handle) for the supplied SQL. =cut sub _dbh_sth { - my ($self, $dbh, $sql) = @_; + my ($self, $dbh, $sql, $op, $sth_attr) = @_; +# $op is ignored right now + + $sth_attr ||= {}; # 3 is the if_active parameter which avoids active sth re-use my $sth = $self->disable_sth_caching - ? $dbh->prepare($sql) - : $dbh->prepare_cached($sql, {}, 3); + ? $dbh->prepare($sql, $sth_attr) + : $dbh->prepare_cached($sql, $sth_attr, 3); # XXX You would think RaiseError would make this impossible, # but apparently that's not true :( @@ -2072,8 +2078,8 @@ sub _dbh_sth { } sub sth { - my ($self, $sql) = @_; - $self->dbh_do('_dbh_sth', $sql); # retry over disconnects + my ($self, $sql, @args) = @_; + $self->dbh_do('_dbh_sth', $sql, @args); # retry over disconnects } sub _dbh_columns_info_for { diff --git a/lib/DBIx/Class/Storage/DBI/Sybase.pm b/lib/DBIx/Class/Storage/DBI/Sybase.pm index 1bb8956..792252f 100644 --- a/lib/DBIx/Class/Storage/DBI/Sybase.pm +++ b/lib/DBIx/Class/Storage/DBI/Sybase.pm @@ -13,11 +13,13 @@ use List::Util (); use Sub::Name (); __PACKAGE__->mk_group_accessors('simple' => - qw/_identity _blob_log_on_update _writer_storage _is_writer_storage + qw/_identity _blob_log_on_update _writer_storage _is_extra_storage + _bulk_storage _is_bulk_storage _began_bulk_work + _bulk_disabled_due_to_coderef_connect_info_warned _identity_method/ ); -my @also_proxy_to_writer_storage = qw/ +my @also_proxy_to_extra_storages = qw/ disconnect _connect_info _sql_maker _sql_maker_opts disable_sth_caching auto_savepoint unsafe cursor_class debug debugobj schema /; @@ -102,7 +104,7 @@ EOF bless $self, $no_bind_vars; $self->_rebless; } elsif (not $self->_typeless_placeholders_supported) { -# this is highly unlikely, but we check just in case + # this is highly unlikely, but we check just in case $self->auto_cast(1); } } @@ -118,28 +120,62 @@ sub _init { # create storage for insert/(update blob) transactions, # unless this is that storage - return if $self->_is_writer_storage; + return if $self->_is_extra_storage; my $writer_storage = (ref $self)->new; - $writer_storage->_is_writer_storage(1); + $writer_storage->_is_extra_storage(1); $writer_storage->connect_info($self->connect_info); $self->_writer_storage($writer_storage); + +# create a bulk storage unless connect_info is a coderef + return + if (Scalar::Util::reftype($self->_dbi_connect_info->[0])||'') eq 'CODE'; + + my $bulk_storage = (ref $self)->new; + + $bulk_storage->_is_extra_storage(1); + $bulk_storage->_is_bulk_storage(1); # for special ->disconnect acrobatics + $bulk_storage->connect_info($self->connect_info); + +# this is why + $bulk_storage->_dbi_connect_info->[0] .= ';bulkLogin=1'; + + $self->_bulk_storage($bulk_storage); } -for my $method (@also_proxy_to_writer_storage) { +for my $method (@also_proxy_to_extra_storages) { no strict 'refs'; + no warnings 'redefine'; my $replaced = __PACKAGE__->can($method); - *{$method} = Sub::Name::subname __PACKAGE__."::$method" => sub { + *{$method} = Sub::Name::subname $method => sub { my $self = shift; $self->_writer_storage->$replaced(@_) if $self->_writer_storage; + $self->_bulk_storage->$replaced(@_) if $self->_bulk_storage; return $self->$replaced(@_); }; } +sub disconnect { + my $self = shift; + +# Even though we call $sth->finish for uses off the bulk API, there's still an +# "active statement" warning on disconnect, which we throw away here. +# This is due to the bug described in insert_bulk. +# Currently a noop because 'prepare' is used instead of 'prepare_cached'. + local $SIG{__WARN__} = sub { + warn $_[0] unless $_[0] =~ /active statement/i; + } if $self->_is_bulk_storage; + +# so that next transaction gets a dbh + $self->_began_bulk_work(0) if $self->_is_bulk_storage; + + $self->next::method; +} + # Make sure we have CHAINED mode turned on if AutoCommit is off in non-FreeTDS # DBD::Sybase (since we don't know how DBD::Sybase was compiled.) If however # we're using FreeTDS, CHAINED mode turns on an implicit transaction which we @@ -148,6 +184,12 @@ sub _populate_dbh { my $self = shift; $self->next::method(@_); + + if ($self->_is_bulk_storage) { +# this should be cleared on every reconnect + $self->_began_bulk_work(0); + return; + } if (not $self->using_freetds) { $self->_dbh->{syb_chained_txn} = 1; @@ -384,7 +426,7 @@ sub update { return $wantarray ? @res : $res[0]; } -### the insert_bulk stuff stolen from DBI/MSSQL.pm +### the insert_bulk partially stolen from DBI/MSSQL.pm sub _set_identity_insert { my ($self, $table) = @_; @@ -416,30 +458,160 @@ sub _unset_identity_insert { $dbh->do ($sql); } -# XXX this should use the DBD::Sybase bulk API, where possible +## XXX add blob support sub insert_bulk { my $self = shift; my ($source, $cols, $data) = @_; my $is_identity_insert = (List::Util::first - { $source->column_info ($_)->{is_auto_increment} } - (@{$cols}) - ) - ? 1 - : 0; - - if ($is_identity_insert) { - $self->_set_identity_insert ($source->name); + { $source->column_info ($_)->{is_auto_increment} } @{$cols} + ) ? 1 : 0; + + my @source_columns = $source->columns; + + my $use_bulk_api = + $self->_bulk_storage && + $self->_get_dbh->{syb_has_blk}; + + if ((not $use_bulk_api) && + (Scalar::Util::reftype($self->_dbi_connect_info->[0])||'') eq 'CODE' && + $self->_bulk_disabled_due_to_coderef_connect_info_warned) { + carp <<'EOF'; +Bulk API support disabled due to use of a CODEREF connect_info. Reverting to +array inserts. +EOF + $self->_bulk_disabled_due_to_coderef_connect_info_warned(1); } - $self->next::method(@_); + if (not $use_bulk_api) { + if ($is_identity_insert) { + $self->_set_identity_insert ($source->name); + } + + $self->next::method(@_); - if ($is_identity_insert) { - $self->_unset_identity_insert ($source->name); + if ($is_identity_insert) { + $self->_unset_identity_insert ($source->name); + } + + return; } -} -### end of stolen insert_bulk section +# otherwise, use the bulk API + +# rearrange @$data so that columns are in database order + my %orig_idx; + @orig_idx{@$cols} = 0..$#$cols; + + my %new_idx; + @new_idx{@source_columns} = 0..$#source_columns; + + my @new_data; + for my $datum (@$data) { + my $new_datum = []; + for my $col (@source_columns) { +# identity data will be 'undef' if not $is_identity_insert +# columns with defaults will also be 'undef' + $new_datum->[ $new_idx{$col} ] = + exists $orig_idx{$col} ? $datum->[ $orig_idx{$col} ] : undef; + } + push @new_data, $new_datum; + } + + my $identity_col = List::Util::first + { $source->column_info($_)->{is_auto_increment} } @source_columns; + +# bcp identity index is 1-based + my $identity_idx = exists $new_idx{$identity_col} ? + $new_idx{$identity_col} + 1 : 0; + +## Set a client-side conversion error handler, straight from DBD::Sybase docs. +# This ignores any data conversion errors detected by the client side libs, as +# they are usually harmless. + my $orig_cslib_cb = DBD::Sybase::set_cslib_cb( + Sub::Name::subname insert_bulk => sub { + my ($layer, $origin, $severity, $errno, $errmsg, $osmsg, $blkmsg) = @_; + + return 1 if $errno == 36; + + carp + "Layer: $layer, Origin: $origin, Severity: $severity, Error: $errno" . + ($errmsg ? "\n$errmsg" : '') . + ($osmsg ? "\n$osmsg" : '') . + ($blkmsg ? "\n$blkmsg" : ''); + + return 0; + }); + + eval { + my $bulk = $self->_bulk_storage; + + my $guard = $bulk->txn_scope_guard; + +## XXX get this to work instead of our own $sth +## will require SQLA or *Hacks changes for ordered columns +# $bulk->next::method($source, \@source_columns, \@new_data, { +# syb_bcp_attribs => { +# identity_flag => $is_identity_insert, +# identity_column => $identity_idx, +# } +# }); + my $sql = 'INSERT INTO ' . + $bulk->sql_maker->_quote($source->name) . ' (' . +# colname list is ignored for BCP, but does no harm + (join ', ', map $bulk->sql_maker->_quote($_), @source_columns) . ') '. + ' VALUES ('. (join ', ', ('?') x @source_columns) . ')'; + +## XXX there's a bug in the DBD::Sybase bulk support that makes $sth->finish for +## a prepare_cached statement ineffective. Replace with ->sth when fixed, or +## better yet the version above. Should be fixed in DBD::Sybase . + my $sth = $bulk->_get_dbh->prepare($sql, +# 'insert', # op + { + syb_bcp_attribs => { + identity_flag => $is_identity_insert, + identity_column => $identity_idx, + } + } + ); + + $bulk->_query_start($sql); + + for my $datum (@new_data) { + $sth->execute(@$datum); + die $sth->errstr if $sth->errstr; # just in case + } + + $guard->commit; + $sth->finish; + + $bulk->_query_end($sql); + }; + my $exception = $@; + if ($exception =~ /-Y option/) { + carp <<"EOF"; + +Sybase bulk API operation failed due to character set incompatibility, reverting +to regular array inserts: + +*** Try unsetting the LANG environment variable. + +$@ +EOF + $self->_bulk_storage(undef); + DBD::Sybase::set_cslib_cb($orig_cslib_cb); + unshift @_, $self; + goto \&insert_bulk; + } + elsif ($exception) { + DBD::Sybase::set_cslib_cb($orig_cslib_cb); +# rollback makes the bulkLogin connection unusable + $self->_bulk_storage->disconnect; + $self->throw_exception($exception) if $exception; + } + + DBD::Sybase::set_cslib_cb($orig_cslib_cb); +} sub _remove_blob_cols { my ($self, $source, $fields) = @_; @@ -597,10 +769,18 @@ sub datetime_parser_type { "DateTime::Format::Sybase" } sub _dbh_begin_work { my $self = shift; + +# bulkLogin=1 connections are always in a transaction, and can only call BEGIN +# TRAN once. However, we need to make sure there's a $dbh. + return if $self->_is_bulk_storage && $self->_dbh && $self->_began_bulk_work; + $self->next::method(@_); + if ($self->using_freetds) { $self->_get_dbh->do('BEGIN TRAN'); } + + $self->_began_bulk_work(1) if $self->_is_bulk_storage; } sub _dbh_commit { @@ -772,6 +952,24 @@ C command on connection. See L for a L setting you need to work with C columns. +=head1 BULK API + +The experimental L Bulk API support is used for +L in B context, in a transaction +on a separate connection. + +To use this feature effectively, use a large number of rows for each +L call, eg.: + + while (my $rows = $data_source->get_100_rows()) { + $rs->populate($rows); + } + +B the L +calls in your C classes B list columns in database order for this +to work. Also, you may have to unset the C environment variable before +loading your app, if it doesn't match the character set of your database. + =head1 AUTHOR See L. diff --git a/t/746sybase.t b/t/746sybase.t index 9e0caae..1b8af4a 100644 --- a/t/746sybase.t +++ b/t/746sybase.t @@ -11,7 +11,7 @@ use DBIx::Class::Storage::DBI::Sybase::NoBindVars; my ($dsn, $user, $pass) = @ENV{map { "DBICTEST_SYBASE_${_}" } qw/DSN USER PASS/}; -my $TESTS = 48 + 2; +my $TESTS = 49 + 2; if (not ($dsn && $user)) { plan skip_all => @@ -188,8 +188,7 @@ SQL } } -# test insert_bulk using populate, this should always pass whether or not it -# does anything Sybase specific or not. Just here to aid debugging. +# test insert_bulk using populate. lives_ok { $schema->resultset('Artist')->populate([ { @@ -224,6 +223,21 @@ SQL $bulk_rs->delete; +# test invalid insert_bulk (missing required column) +# +# There should be a rollback, reconnect and the next valid insert_bulk should +# succeed. + throws_ok { + $schema->resultset('Artist')->populate([ + { + charfield => 'foo', + } + ]); + } qr/no value or default|does not allow null/i, +# The second pattern is the error from fallback to regular array insert on +# incompatible charset. + 'insert_bulk with missing required column throws error'; + # now test insert_bulk with IDENTITY_INSERT lives_ok { $schema->resultset('Artist')->populate([