From: Peter Rabbitson <ribasushi@cpan.org>
Date: Sun, 2 Sep 2012 15:13:00 +0000 (+0200)
Subject: LOBWriter review
X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=refs%2Fheads%2Ftopic%2Flobwriter;p=dbsrgits%2FDBIx-Class.git

LOBWriter review
---

diff --git a/lib/DBIx/Class/Storage/DBI.pm b/lib/DBIx/Class/Storage/DBI.pm
index e2ddd5b..dde5976 100644
--- a/lib/DBIx/Class/Storage/DBI.pm
+++ b/lib/DBIx/Class/Storage/DBI.pm
@@ -23,9 +23,15 @@ use namespace::clean;
 __PACKAGE__->cursor_class('DBIx::Class::Storage::DBI::Cursor');
 
 __PACKAGE__->mk_group_accessors('inherited' => qw/
+                                                # rename to _dbh_default_prepare_attrs
   sql_limit_dialect sql_quote_char sql_name_sep _prepare_attributes
 /);
 
+# NEVER set a class default to an anonymous ref. It will be shared between
+# multiple objects, leading to a shitton of hard-to-track bugs
+# Yes I know we have it in resultsource code, it's complicated to fix once
+# fucked up.
+# if you have to have a {} default - add it at new() time, but no earlier
 __PACKAGE__->_prepare_attributes({}); # see _dbh_sth
 
 __PACKAGE__->mk_group_accessors('component_class' => qw/sql_maker_class datetime_parser_type/);
@@ -2306,8 +2312,8 @@ sub _dbh_sth {
 
   # 3 is the if_active parameter which avoids active sth re-use
   my $sth = $self->disable_sth_caching
-    ? $dbh->prepare($sql, $self->_prepare_attributes)
-    : $dbh->prepare_cached($sql, $self->_prepare_attributes, 3);
+    ? $dbh->prepare($sql, $self->_prepare_attributes || {}) # now can be (and often is) undef
+    : $dbh->prepare_cached($sql, $self->_prepare_attributes || {}, 3);
 
   # XXX You would think RaiseError would make this impossible,
   #  but apparently that's not true :(
diff --git a/lib/DBIx/Class/Storage/DBI/Oracle/Generic.pm b/lib/DBIx/Class/Storage/DBI/Oracle/Generic.pm
index ac2895d..b60622b 100644
--- a/lib/DBIx/Class/Storage/DBI/Oracle/Generic.pm
+++ b/lib/DBIx/Class/Storage/DBI/Oracle/Generic.pm
@@ -434,6 +434,9 @@ sub bind_attribute_by_data_type {
     # things like Class::Unload work (unlikely but possible)
     unless ($DBD::Oracle::__DBIC_DBD_VERSION_CHECK_OK__) {
 
+      # the changelog claims this is solved... why is it still here then...?
+      # was it actually tested?
+
       # no earlier - no later
       if ($DBD::Oracle::VERSION eq '1.23') {
         $self->throw_exception(
@@ -459,6 +462,9 @@ sub bind_attribute_by_data_type {
 sub _empty_lob {
   my ($self, $source, $col) = @_;
 
+  # this is "empty" stuff anyway, are you sure Oracle differentiates between
+  # empty binary and empty character lob? If it doesn't matter - you can save
+  # lots of calls here
   return $self->_is_text_lob_type($source->column_info($col)->{data_type}) ?
     \'EMPTY_CLOB()' : \'EMPTY_BLOB()';
 }
@@ -466,22 +472,39 @@ sub _empty_lob {
 sub _write_lobs {
   my ($self, $source, $lobs, $where) = @_;
 
+  # more descriptive varnames, I suspect @blob_column_names is what this is?
   my @lobs = keys %$lobs;
 
   local $self->_prepare_attributes->{ora_auto_lob} = 0;
 
+  # for => 'update' *without* an enclosing txn (via a guard) will lock
+  # stuff up for potentially the rest of the connection
+  # while you seem to open a txn in the calling WriteLOBs.pm, the risk is
+  # too great - this place needs a ->transaction_depth check with an
+  # exception if not the case. It's a 'simple' (XSA) acessor, so not
+  # a horrible speed hit
   my $cursor = $self->select($source, \@lobs, $where, { for => 'update' });
 
   my $dbh = $self->_get_dbh;
 
   while (my @locators = $cursor->next) {
     my %lobs;
+    # are you kidding me? :)
+    #  VVVVVVV
     @lobs{@lobs} = @locators;
 
     foreach my $lob (@lobs) {
       my $data = \$lobs->{$lob};
 
+      # This is silent data corruption, goes against the philosophy of
+      # dbic. You already asked the user to pre-truncate in the docs,
+      # instead of helpfully thrashing user's data, let Oracle throw a
+      # damned exception
+      # I may be wrong however, as ora_lob_trim does not document what
+      # a size of 0 even means, nor is it present anywhere in the docs.
+      # so if it means something else - mea culpa
       $dbh->ora_lob_trim($lobs{$lob}, 0);
+
       $dbh->ora_lob_write($lobs{$lob}, 1, $$data);
     }
   }
diff --git a/lib/DBIx/Class/Storage/DBI/Sybase/ASE.pm b/lib/DBIx/Class/Storage/DBI/Sybase/ASE.pm
index 17c5ad6..369af39 100644
--- a/lib/DBIx/Class/Storage/DBI/Sybase/ASE.pm
+++ b/lib/DBIx/Class/Storage/DBI/Sybase/ASE.pm
@@ -1,4 +1,4 @@
-package DBIx::Class::Storage::DBI::Sybase::ASE;
+package DBIx::Class::Storage::DBwI::Sybase::ASE;
 
 use strict;
 use warnings;
@@ -404,6 +404,7 @@ sub update {
   my $columns_info = $source->columns_info([keys %$fields]);
 
   local $self->{_autoinc_supplied_for_op} = 1
+    # can't you just iterate over values %$columns_info...?
     if first { $columns_info->{$_}{is_auto_increment} } keys %$columns_info;
 
   return $self->next::method(@_);
@@ -448,6 +449,9 @@ sub insert_bulk {
       :
       ($self, undef);
 
+    # This is not strictly necessary - please remove it. The speedup you claim
+    # can be achieved in another manner. Also remove from documentation,
+    # it will be a nightmare to keep backwards comp for it
     local $self->{_skip_writelobs_impl} = 1;
 
     $self->next::method(@_);
@@ -609,6 +613,8 @@ sub insert_bulk {
 
 # Override from WriteLOBs for NULL uniqueness (in ASE null values in UCs are
 # unique.)
+# It doesn't matter. "NULL in nullable UC is unique" will not be supported
+# for any storage (ASE, MSSQL, probably others). Throw this code away
 sub _identifying_column_set {
   my ($self, $source, $cols) = @_;
 
diff --git a/lib/DBIx/Class/Storage/DBI/WriteLOBs.pm b/lib/DBIx/Class/Storage/DBI/WriteLOBs.pm
index 5d77601..cd61757 100644
--- a/lib/DBIx/Class/Storage/DBI/WriteLOBs.pm
+++ b/lib/DBIx/Class/Storage/DBI/WriteLOBs.pm
@@ -58,6 +58,8 @@ C<insert>, C<update> and C<insert_bulk>.
 #
 # to shortcircuit the inherited ones for a minor speedup.
 
+# Ugh! why is this heavy method necessary at all...? You only use it locally
+# Please throw it away, see notes on usage-sites
 sub _is_lob_column {
   my ($self, $source, $column) = @_;
 
@@ -72,6 +74,8 @@ sub _is_lob_column {
 #
 # Returns true if any of %fields are non-empty LOBs.
 
+# there is a *single* callsite for this method in the entire codebase - stop
+# prematurely generalizing
 sub _have_lob_fields {
   my ($self, $source, $fields) = @_;
 
@@ -93,12 +97,19 @@ sub _have_lob_fields {
 # Replace LOB fields with L</_empty_lob> values, and return any non-empty ones as
 # a hash keyed by field name.
 
+# more descriptive name please, something to the effect of _extract_supplied_lobs
+# see more comments in _remove_lob_fields below
+# this is effectively a semi-exposed API
 sub _replace_lob_fields {
   my ($self, $source, $fields) = @_;
 
   my %lob_cols;
 
   for my $col (keys %$fields) {
+    # this is a retarded interface - what is the point of doing the work via 
+    # $source repeatedly, you have $source->columns_info already. Furthermore
+    # all _is_lob_column does is call _is_lob_type on the 'colinfo'
+    # Please inline.
     if ($self->_is_lob_column($source, $col)) {
       my $lob_val = delete $fields->{$col};
       if (not defined $lob_val) {
@@ -128,12 +139,16 @@ sub _replace_lob_fields {
 # Remove LOB fields from %fields entirely, and return any non-empty ones as a
 # hash keyed by field name.
 
+
+# this method is almost identical to _replace_lob_fields - fold the two together,
+# and simply add an extra flag to tge sig. $replace_lobs_with_empty_placeholders
 sub _remove_lob_fields {
   my ($self, $source, $fields) = @_;
 
   my %lob_cols;
 
   for my $col (keys %$fields) {
+    # identical comment as in _replace_lob_fields above
     if ($self->_is_lob_column($source, $col)) {
       my $lob_val = delete $fields->{$col};
       if (not defined $lob_val) {
@@ -162,6 +177,10 @@ sub _remove_lob_fields {
 # Returns a set of rows of LOB values with the LOBs in the original positions
 # they were in @data.
 
+# TO-DISCUSS:
+# I need to merge something else that will obviate this method. Leave this
+# note intact for next round of review
+
 sub _replace_lob_fields_array {
   my ($self, $source, $cols, $data) = @_;
 
@@ -202,6 +221,9 @@ sub _replace_lob_fields_array {
 #
 # The @lobs array is as prepared by L</_replace_lob_fields_array> above.
 
+# TO-DISCUSS:
+# I need to merge something else that will obviate this method. Leave this
+# note intact for next round of review
 sub _write_lobs_array {
   my ($self, $source, $lobs, $cols, $data) = @_;
 
@@ -229,6 +251,10 @@ sub _write_lobs_array {
 }
 
 # Proxy for ResultSource, for overriding in ASE
+#
+# this will not go into DBIC. Not negotiable. See comments in Sybase::ASE
+# and throw away
+#
 sub _identifying_column_set {
   my ($self, $source, @args) = @_;
   return $source->_identifying_column_set(@args);
@@ -251,6 +277,16 @@ sub _identifying_column_set {
 #
 # Uses _identifying_column_set from DBIx::Class::ResultSource.
 
+# this is not the place
+# put the low-level implementation into ResultSource (something like
+# $rsrc->_identity_condition($hashref_of_data)
+# the use it in both $storage, and in DBIC::PK.pm, which as a bonus will
+# allow you to $row->update/delete as long as *any* UC is met, not just
+# on PK
+# *DO NOT* do this as part of this commit, do it separately to facilitate
+# review, and then rebase your work on top of it
+#
+# as a result this method is ultimately a throwaway
 sub _ident_cond_for_cols {
   my ($self, $source, $row) = @_;
 
@@ -274,6 +310,8 @@ sub _ident_cond_for_cols {
 sub insert {
   my $self = shift;
 
+  # as noted earlier - remove this optimization until we figure
+  # out we really need it. Dedocument as well
   return $self->next::method(@_) if $self->{_skip_writelobs_impl};
 
   my ($source, $to_insert) = @_;
@@ -288,6 +326,7 @@ sub insert {
 
   my $row = { %$to_insert, %$updated_cols };
 
+  # this will get replaced by $source->_identity_condition($row)
   my $where = $self->_ident_cond_for_cols($source, $row)
     or $self->throw_exception(
       'Could not identify row for LOB insert '
@@ -304,6 +343,8 @@ sub insert {
 sub update {
   my $self = shift;
 
+  # as noted earlier - remove this optimization until we figure
+  # out we really need it. Dedocument as well
   return $self->next::method(@_) if $self->{_skip_writelobs_impl};
 
   my ($source, $fields, $where, @rest) = @_;
@@ -312,20 +353,26 @@ sub update {
 
   return $self->next::method(@_) unless $lobs;
 
+  # this will get replaced by $source->_identity_condition($row)
   my @key_cols = @{ $self->_identifying_column_set($source) || [] }
     or $self->throw_exception(
          'must be able to uniquely identify rows for LOB updates'
        );
 
+  # This is bogus. What needs to happen is $writer_storage needs to
+  # override the accessor, and check $main_schema (which it holds a
+  # weakref to) too much action at a distance here
   my $autoinc_supplied_for_op = $self->_autoinc_supplied_for_op;
-
   $self = $self->_writer_storage if $self->can('_writer_storage'); # for ASE
-
   local $self->{_autoinc_supplied_for_op} = $autoinc_supplied_for_op
     if $autoinc_supplied_for_op;
 
   my $guard = $self->txn_scope_guard;
 
+
+  # why two scopes both localising the same thing to the same value?
+  # something doesn't look right here, please rewrite
+  # TO_DISCUSS: pending another round of review once rewritten
   my ($cursor, @rows);
   {
     local $self->{_autoinc_supplied_for_op} = 0;
@@ -358,6 +405,7 @@ sub update {
   return $count;
 }
 
+# TO_DISCUSS: pending another round of review
 sub insert_bulk {
   my $self = shift;