X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep.pm;h=e08ba84f11615913c092fcc4dd68c916fb65be45;hb=912d50b1c7fab3433aa812baebd75ae7c975ee71;hp=6c08ccad7bdec1e984e5baa34625bde3b8857484;hpb=a20d9a3fb61cb620ce84ce62123fb68a8203eeb9;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index 6c08cca..e08ba84 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -64,7 +64,7 @@ our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK); # Increase this value for slightly greater speed, but larger database files. # DO NOT decrease this value below 16, due to risk of recursive reindex overrun. ## -my $MAX_BUCKETS = 16; +our $MAX_BUCKETS = 16; ## # Better not adjust anything below here, unless you're me :-) @@ -203,496 +203,6 @@ sub TIEARRAY { #sub DESTROY { #} -sub _close { - ## - # Close database fh - ## - my $self = $_[0]->_get_self; - close $self->_root->{fh} if $self->_root->{fh}; - $self->_root->{fh} = undef; -} - -sub _create_tag { - ## - # Given offset, signature and content, create tag and write to disk - ## - my ($self, $offset, $sig, $content) = @_; - my $size = length($content); - - my $fh = $self->_fh; - - seek($fh, $offset + $self->_root->{file_offset}, SEEK_SET); - print( $fh $sig . pack($DATA_LENGTH_PACK, $size) . $content ); - - if ($offset == $self->_root->{end}) { - $self->_root->{end} += SIG_SIZE + $DATA_LENGTH_SIZE + $size; - } - - return { - signature => $sig, - size => $size, - offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE, - content => $content - }; -} - -sub _load_tag { - ## - # Given offset, load single tag and return signature, size and data - ## - my $self = shift; - my $offset = shift; - - my $fh = $self->_fh; - - seek($fh, $offset + $self->_root->{file_offset}, SEEK_SET); - if (eof $fh) { return undef; } - - my $b; - read( $fh, $b, SIG_SIZE + $DATA_LENGTH_SIZE ); - my ($sig, $size) = unpack( "A $DATA_LENGTH_PACK", $b ); - - my $buffer; - read( $fh, $buffer, $size); - - return { - signature => $sig, - size => $size, - offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE, - content => $buffer - }; -} - -sub _index_lookup { - ## - # Given index tag, lookup single entry in index and return . - ## - my $self = shift; - my ($tag, $index) = @_; - - my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) ); - if (!$location) { return; } - - return $self->_load_tag( $location ); -} - -sub _add_bucket { - ## - # Adds one key/value pair to bucket list, given offset, MD5 digest of key, - # plain (undigested) key and value. - ## - my $self = shift; - my ($tag, $md5, $plain_key, $value) = @_; - my $keys = $tag->{content}; - my $location = 0; - my $result = 2; - - my $root = $self->_root; - - my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) }; - my $internal_ref = $is_dbm_deep && ($value->_root eq $root); - - my $fh = $self->_fh; - - ## - # Iterate through buckets, seeing if this is a new entry or a replace. - ## - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - if (!$subloc) { - ## - # Found empty bucket (end of list). Populate and exit loop. - ## - $result = 2; - - $location = $internal_ref - ? $value->_base_offset - : $root->{end}; - - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET); - print( $fh $md5 . pack($LONG_PACK, $location) ); - last; - } - - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - if ($md5 eq $key) { - ## - # Found existing bucket with same key. Replace with new value. - ## - $result = 1; - - if ($internal_ref) { - $location = $value->_base_offset; - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET); - print( $fh $md5 . pack($LONG_PACK, $location) ); - return $result; - } - - seek($fh, $subloc + SIG_SIZE + $root->{file_offset}, SEEK_SET); - my $size; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - - ## - # If value is a hash, array, or raw value with equal or less size, we can - # reuse the same content area of the database. Otherwise, we have to create - # a new content area at the EOF. - ## - my $actual_length; - my $r = Scalar::Util::reftype( $value ) || ''; - if ( $r eq 'HASH' || $r eq 'ARRAY' ) { - $actual_length = $INDEX_SIZE; - - # if autobless is enabled, must also take into consideration - # the class name, as it is stored along with key/value. - if ( $root->{autobless} ) { - my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && !$value->isa('DBM::Deep') ) { - $actual_length += length($value_class); - } - } - } - else { $actual_length = length($value); } - - if ($actual_length <= $size) { - $location = $subloc; - } - else { - $location = $root->{end}; - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE + $root->{file_offset}, SEEK_SET); - print( $fh pack($LONG_PACK, $location) ); - } - - last; - } - } - - ## - # If this is an internal reference, return now. - # No need to write value or plain key - ## - if ($internal_ref) { - return $result; - } - - ## - # If bucket didn't fit into list, split into a new index level - ## - if (!$location) { - seek($fh, $tag->{ref_loc} + $root->{file_offset}, SEEK_SET); - print( $fh pack($LONG_PACK, $root->{end}) ); - - my $index_tag = $self->_create_tag($root->{end}, SIG_INDEX, chr(0) x $INDEX_SIZE); - my @offsets = (); - - $keys .= $md5 . pack($LONG_PACK, 0); - - for (my $i=0; $i<=$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - if ($key) { - my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - my $num = ord(substr($key, $tag->{ch} + 1, 1)); - - if ($offsets[$num]) { - my $offset = $offsets[$num] + SIG_SIZE + $DATA_LENGTH_SIZE; - seek($fh, $offset + $root->{file_offset}, SEEK_SET); - my $subkeys; - read( $fh, $subkeys, $BUCKET_LIST_SIZE); - - for (my $k=0; $k<$MAX_BUCKETS; $k++) { - my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - if (!$subloc) { - seek($fh, $offset + ($k * $BUCKET_SIZE) + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) ); - last; - } - } # k loop - } - else { - $offsets[$num] = $root->{end}; - seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE) + $root->{file_offset}, SEEK_SET); - print( $fh pack($LONG_PACK, $root->{end}) ); - - my $blist_tag = $self->_create_tag($root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); - - seek($fh, $blist_tag->{offset} + $root->{file_offset}, SEEK_SET); - print( $fh $key . pack($LONG_PACK, $old_subloc || $root->{end}) ); - } - } # key is real - } # i loop - - $location ||= $root->{end}; - } # re-index bucket list - - ## - # Seek to content area and store signature, value and plaintext key - ## - if ($location) { - my $content_length; - seek($fh, $location + $root->{file_offset}, SEEK_SET); - - ## - # Write signature based on content type, set content length and write actual value. - ## - my $r = Scalar::Util::reftype($value) || ''; - if ($r eq 'HASH') { - print( $fh TYPE_HASH ); - print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); - $content_length = $INDEX_SIZE; - } - elsif ($r eq 'ARRAY') { - print( $fh TYPE_ARRAY ); - print( $fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); - $content_length = $INDEX_SIZE; - } - elsif (!defined($value)) { - print( $fh SIG_NULL ); - print( $fh pack($DATA_LENGTH_PACK, 0) ); - $content_length = 0; - } - else { - print( $fh SIG_DATA ); - print( $fh pack($DATA_LENGTH_PACK, length($value)) . $value ); - $content_length = length($value); - } - - ## - # Plain key is stored AFTER value, as keys are typically fetched less often. - ## - print( $fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); - - ## - # If value is blessed, preserve class name - ## - if ( $root->{autobless} ) { - my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && $value_class ne 'DBM::Deep' ) { - ## - # Blessed ref -- will restore later - ## - print( $fh chr(1) ); - print( $fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class ); - $content_length += 1; - $content_length += $DATA_LENGTH_SIZE + length($value_class); - } - else { - print( $fh chr(0) ); - $content_length += 1; - } - } - - ## - # If this is a new content area, advance EOF counter - ## - if ($location == $root->{end}) { - $root->{end} += SIG_SIZE; - $root->{end} += $DATA_LENGTH_SIZE + $content_length; - $root->{end} += $DATA_LENGTH_SIZE + length($plain_key); - } - - ## - # If content is a hash or array, create new child DBM::Deep object and - # pass each key or element to it. - ## - if ($r eq 'HASH') { - my $branch = DBM::Deep->new( - type => TYPE_HASH, - base_offset => $location, - root => $root, - ); - foreach my $key (keys %{$value}) { - $branch->STORE( $key, $value->{$key} ); - } - } - elsif ($r eq 'ARRAY') { - my $branch = DBM::Deep->new( - type => TYPE_ARRAY, - base_offset => $location, - root => $root, - ); - my $index = 0; - foreach my $element (@{$value}) { - $branch->STORE( $index, $element ); - $index++; - } - } - - return $result; - } - - return $self->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); -} - -sub _get_bucket_value { - ## - # Fetch single value given tag and MD5 digested key. - ## - my $self = shift; - my ($tag, $md5) = @_; - my $keys = $tag->{content}; - - my $fh = $self->_fh; - - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Found match -- seek to offset and read signature - ## - my $signature; - seek($fh, $subloc + $self->_root->{file_offset}, SEEK_SET); - read( $fh, $signature, SIG_SIZE); - - ## - # If value is a hash or array, return new DBM::Deep object with correct offset - ## - if (($signature eq TYPE_HASH) || ($signature eq TYPE_ARRAY)) { - my $obj = DBM::Deep->new( - type => $signature, - base_offset => $subloc, - root => $self->_root - ); - - if ($self->_root->{autobless}) { - ## - # Skip over value and plain key to see if object needs - # to be re-blessed - ## - seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR); - - my $size; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { seek($fh, $size, SEEK_CUR); } - - my $bless_bit; - read( $fh, $bless_bit, 1); - if (ord($bless_bit)) { - ## - # Yes, object needs to be re-blessed - ## - my $class_name; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { read( $fh, $class_name, $size); } - if ($class_name) { $obj = bless( $obj, $class_name ); } - } - } - - return $obj; - } - - ## - # Otherwise return actual value - ## - elsif ($signature eq SIG_DATA) { - my $size; - my $value = ''; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { read( $fh, $value, $size); } - return $value; - } - - ## - # Key exists, but content is null - ## - else { return; } - } # i loop - - return; -} - -sub _delete_bucket { - ## - # Delete single key/value pair given tag and MD5 digested key. - ## - my $self = shift; - my ($tag, $md5) = @_; - my $keys = $tag->{content}; - - my $fh = $self->_fh; - - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Matched key -- delete bucket and return - ## - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $self->_root->{file_offset}, SEEK_SET); - print( $fh substr($keys, ($i+1) * $BUCKET_SIZE ) ); - print( $fh chr(0) x $BUCKET_SIZE ); - - return 1; - } # i loop - - return; -} - -sub _bucket_exists { - ## - # Check existence of single key given tag and MD5 digested key. - ## - my $self = shift; - my ($tag, $md5) = @_; - my $keys = $tag->{content}; - - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Matched key -- return true - ## - return 1; - } # i loop - - return; -} - sub _find_bucket_list { ## # Locate offset for bucket list, given digested key @@ -704,11 +214,11 @@ sub _find_bucket_list { # Locate offset for bucket list using digest index system ## my $ch = 0; - my $tag = $self->_load_tag($self->_base_offset); + my $tag = $self->{engine}->load_tag($self, $self->_base_offset); if (!$tag) { return; } while ($tag->{signature} ne SIG_BLIST) { - $tag = $self->_index_lookup($tag, ord(substr($md5, $ch, 1))); + $tag = $self->{engine}->index_lookup($self, $tag, ord(substr($md5, $ch, 1))); if (!$tag) { return; } $ch++; } @@ -723,7 +233,7 @@ sub _traverse_index { my ($self, $offset, $ch, $force_return_next) = @_; $force_return_next = undef unless $force_return_next; - my $tag = $self->_load_tag( $offset ); + my $tag = $self->{engine}->load_tag($self, $offset ); my $fh = $self->_fh; @@ -843,7 +353,7 @@ sub lock { # double-check file inode, in case another process # has optimize()d our file while we were waiting. if ($stats[1] != $self->_root->{inode}) { - $self->{engine}->open($self); # re-open + $self->{engine}->open( $self ); # re-open flock($self->_fh, $type); # re-lock $self->_root->{end} = (stat($self->_fh))[7]; # re-end } @@ -1025,7 +535,7 @@ sub optimize { # with a soft copy. ## $self->unlock(); - $self->_close(); + $self->{engine}->close( $self ); } if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) { @@ -1035,8 +545,8 @@ sub optimize { } $self->unlock(); - $self->_close(); - $self->{engine}->open($self); + $self->{engine}->close( $self ); + $self->{engine}->open( $self ); return 1; } @@ -1207,9 +717,9 @@ sub STORE { ## # Locate offset for bucket list using digest index system ## - my $tag = $self->_load_tag($self->_base_offset); + my $tag = $self->{engine}->load_tag($self, $self->_base_offset); if (!$tag) { - $tag = $self->_create_tag($self->_base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE); + $tag = $self->{engine}->create_tag($self, $self->_base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE); } my $ch = 0; @@ -1217,13 +727,13 @@ sub STORE { my $num = ord(substr($md5, $ch, 1)); my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE); - my $new_tag = $self->_index_lookup($tag, $num); + my $new_tag = $self->{engine}->index_lookup($self, $tag, $num); if (!$new_tag) { seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET); print( $fh pack($LONG_PACK, $self->_root->{end}) ); - $tag = $self->_create_tag($self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); + $tag = $self->{engine}->create_tag($self, $self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); $tag->{ref_loc} = $ref_loc; $tag->{ch} = $ch; @@ -1242,7 +752,7 @@ sub STORE { ## # Add key/value to bucket list ## - my $result = $self->_add_bucket( $tag, $md5, $key, $value ); + my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value ); $self->unlock(); @@ -1272,7 +782,7 @@ sub FETCH { ## # Get value from bucket list ## - my $result = $self->_get_bucket_value( $tag, $md5 ); + my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 ); $self->unlock(); @@ -1307,12 +817,12 @@ sub DELETE { ## # Delete bucket ## - my $value = $self->_get_bucket_value( $tag, $md5 ); + my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 ); if ($value && !ref($value) && $self->_root->{filter_fetch_value}) { $value = $self->_root->{filter_fetch_value}->($value); } - my $result = $self->_delete_bucket( $tag, $md5 ); + my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 ); ## # If this object is an array and the key deleted was on the end of the stack, @@ -1351,7 +861,7 @@ sub EXISTS { ## # Check if bucket exists and return 1 or '' ## - my $result = $self->_bucket_exists( $tag, $md5 ) || ''; + my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || ''; $self->unlock(); @@ -1377,7 +887,7 @@ sub CLEAR { return; } - $self->_create_tag($self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE); + $self->{engine}->create_tag($self, $self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE); $self->unlock();