From: sprout Date: Sat, 30 Jan 2010 06:17:30 +0000 (-0800) Subject: Speed up clear() X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=d8f1fa9844cbf2e1beddbe01297de1235d7a7622;p=dbsrgits%2FDBM-Deep.git Speed up clear() Before the clear() bug was fixed, t/03_bighash.t took 45 sec., for me at least. It was using a ‘first_key next_key*’ approach, which ended up skipping keys, since they were being deleted during iteration. When clear() was fixed, a ‘first_key+’ approach was used, which was *much* slower, since it created a new iterator object for each key. That test script ended up taking 2 hours! I found that a compromise, using ‘first_key next_key*’ repeatedly until first_key stopped returning a key, would reduce the time to 65 seconds. Then I found that pushing the functionality further inside the engine and freeing the index or bucket list reduces it to 25 sec- onds. Transactions make things a little more complicated, so I fall back to the compromise in the presence of transactions. --- diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index 5757d59..15cca01 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -575,14 +575,15 @@ sub CLEAR { my $self = shift->_get_self; warn "CLEAR($self)\n" if DEBUG; - unless ( $self->_engine->storage->is_writable ) { + my $engine = $self->_engine; + unless ( $engine->storage->is_writable ) { $self->_throw_error( 'Cannot write to a readonly filehandle' ); } $self->lock_exclusive; # Dispatch to the specific clearing functionality. - $self->_clear; + $engine->clear($self); $self->unlock; diff --git a/lib/DBM/Deep/Engine.pm b/lib/DBM/Deep/Engine.pm index 1193bd8..dc6b14c 100644 --- a/lib/DBM/Deep/Engine.pm +++ b/lib/DBM/Deep/Engine.pm @@ -86,6 +86,8 @@ is the following: =item * get_next_key +=item * clear + =item * setup_fh =item * begin_work @@ -566,6 +568,28 @@ sub get_next_key { return $obj->{iterator}->get_next_key( $obj ); } +=head2 clear( $obj ) + +This takes an object that provides _base_offset() and deletes all its +elements, returning nothing. + +=cut + +sub clear { + my $self = shift; + my $obj = shift; + + my $sector = $self->_load_sector( $obj->_base_offset ) + or return; + + if ( $sector->staleness != $obj->_staleness ) { + return; + } + + $sector->clear; + return; +} + =head2 setup_fh( $obj ) This takes an object that provides _base_offset(). It will do everything needed diff --git a/lib/DBM/Deep/Engine/Sector/Reference.pm b/lib/DBM/Deep/Engine/Sector/Reference.pm index c681cda..fb31d95 100644 --- a/lib/DBM/Deep/Engine/Sector/Reference.pm +++ b/lib/DBM/Deep/Engine/Sector/Reference.pm @@ -211,6 +211,36 @@ sub delete_key { return $data; } +sub clear { + my $self = shift; + + my $blist_loc = $self->get_blist_loc or return; + + my $engine = $self->engine; + + if($engine->get_running_txn_ids) { + # ~~~ Temporary; the code below this block needs to be modified to + # take transactions into account. + $self->data->_clear; + return; + } + + my $sector = $engine->_load_sector( $blist_loc ) + or DBM::Deep->_throw_error( + "Cannot read sector at $blist_loc in clear()" + ); + + # Set blist offset to 0 + $engine->storage->print_at( $self->offset + $self->base_size, + pack( $StP{$engine->byte_size}, 0 ), + ); + + # Free the blist + $sector->free; + + return; +} + sub get_blist_loc { my $self = shift; diff --git a/lib/DBM/Deep/Hash.pm b/lib/DBM/Deep/Hash.pm index 45dc3ff..3188dd1 100644 --- a/lib/DBM/Deep/Hash.pm +++ b/lib/DBM/Deep/Hash.pm @@ -115,7 +115,9 @@ sub _clear { my $self = shift; while ( defined(my $key = $self->first_key) ) { + do { $self->_engine->delete_key( $self, $key, $key ); + } while defined($key = $self->next_key($key)); } return;