# print "This module " . $db->{my_complex}->[1]->{perl} . "!\n";
#
# Copyright:
-# (c) 2002-2005 Joseph Huckaby. All Rights Reserved.
+# (c) 2002-2006 Joseph Huckaby. All Rights Reserved.
# This program is free software; you can redistribute it and/or
# modify it under the same terms as Perl itself.
##
use Digest::MD5 ();
use Scalar::Util ();
-use vars qw( $VERSION );
-$VERSION = q(0.98);
-
-##
-# Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file.
-# (Perl must be compiled with largefile support for files > 2 GB)
-#
-# Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file.
-# (Perl must be compiled with largefile and 64-bit long support)
-##
-#my $LONG_SIZE = 4;
-#my $LONG_PACK = 'N';
-
-##
-# Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value.
-# Upgrading this is possible (see above) but probably not necessary. If you need
-# more than 4 GB for a single key or value, this module is really not for you :-)
-##
-#my $DATA_LENGTH_SIZE = 4;
-#my $DATA_LENGTH_PACK = 'N';
-our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK);
-
-##
-# Maximum number of buckets per list before another level of indexing is done.
-# Increase this value for slightly greater speed, but larger database files.
-# DO NOT decrease this value below 16, due to risk of recursive reindex overrun.
-##
-my $MAX_BUCKETS = 16;
+use DBM::Deep::Engine;
-##
-# Better not adjust anything below here, unless you're me :-)
-##
-
-##
-# Setup digest function for keys
-##
-our ($DIGEST_FUNC, $HASH_SIZE);
-#my $DIGEST_FUNC = \&Digest::MD5::md5;
-
-##
-# Precalculate index and bucket sizes based on values above.
-##
-#my $HASH_SIZE = 16;
-my ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE);
+use vars qw( $VERSION );
+$VERSION = q(0.99_01);
-set_digest();
-#set_pack();
-#_precalc_sizes();
##
# Setup file and tag signatures. These should never change.
# These are the defaults to be optionally overridden below
my $self = bless {
- type => TYPE_HASH,
+ type => TYPE_HASH,
base_offset => length(SIG_FILE),
+ engine => 'DBM::Deep::Engine',
}, $class;
foreach my $param ( keys %$self ) {
? $args->{root}
: DBM::Deep::_::Root->new( $args );
- if (!defined($self->_fh)) { $self->_open(); }
+ if (!defined($self->_fh)) { $self->{engine}->open( $self ); }
return $self;
}
#sub DESTROY {
#}
-sub _open {
- ##
- # Open a fh to the database, create if nonexistent.
- # Make sure file signature matches DBM::Deep spec.
- ##
- my $self = $_[0]->_get_self;
-
- if (defined($self->_fh)) { $self->_close(); }
-
- eval {
- local $SIG{'__DIE__'};
- # Theoretically, adding O_BINARY should remove the need for the binmode
- # Of course, testing it is going to be ... interesting.
- my $flags = O_RDWR | O_CREAT | O_BINARY;
-
- my $fh;
- sysopen( $fh, $self->_root->{file}, $flags )
- or $fh = undef;
- $self->_root->{fh} = $fh;
- }; if ($@ ) { $self->_throw_error( "Received error: $@\n" ); }
- if (! defined($self->_fh)) {
- return $self->_throw_error("Cannot sysopen file: " . $self->_root->{file} . ": $!");
- }
-
- my $fh = $self->_fh;
-
- #XXX Can we remove this by using the right sysopen() flags?
- # Maybe ... q.v. above
- binmode $fh; # for win32
-
- if ($self->_root->{autoflush}) {
- my $old = select $fh;
- $|=1;
- select $old;
- }
-
- # Set the
- seek($fh, 0, SEEK_SET);
-
- my $signature;
- my $bytes_read = read( $fh, $signature, length(SIG_FILE));
-
- ##
- # File is empty -- write signature and master index
- ##
- if (!$bytes_read) {
- seek($fh, 0, SEEK_SET);
- print($fh SIG_FILE);
- $self->_create_tag($self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE);
-
- my $plain_key = "[base]";
- print($fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
-
- # Flush the filehandle
- my $old_fh = select $fh;
- my $old_af = $|;
- $| = 1;
- $| = $old_af;
- select $old_fh;
-
- my @stats = stat($fh);
- $self->_root->{inode} = $stats[1];
- $self->_root->{end} = $stats[7];
-
- return 1;
- }
-
- ##
- # Check signature was valid
- ##
- unless ($signature eq SIG_FILE) {
- $self->_close();
- return $self->_throw_error("Signature not found -- file is not a Deep DB");
- }
-
- my @stats = stat($fh);
- $self->_root->{inode} = $stats[1];
- $self->_root->{end} = $stats[7];
-
- ##
- # Get our type from master index signature
- ##
- my $tag = $self->_load_tag($self->_base_offset);
-
-#XXX We probably also want to store the hash algorithm name and not assume anything
-#XXX The cool thing would be to allow a different hashing algorithm at every level
-
- if (!$tag) {
- return $self->_throw_error("Corrupted file, no master index record");
- }
- if ($self->{type} ne $tag->{signature}) {
- return $self->_throw_error("File type mismatch");
- }
-
- return 1;
-}
-
-sub _close {
- ##
- # Close database fh
- ##
- my $self = $_[0]->_get_self;
- close $self->_root->{fh} if $self->_root->{fh};
- $self->_root->{fh} = undef;
-}
-
-sub _create_tag {
- ##
- # Given offset, signature and content, create tag and write to disk
- ##
- my ($self, $offset, $sig, $content) = @_;
- my $size = length($content);
-
- my $fh = $self->_fh;
-
- seek($fh, $offset, SEEK_SET);
- print($fh $sig . pack($DATA_LENGTH_PACK, $size) . $content );
-
- if ($offset == $self->_root->{end}) {
- $self->_root->{end} += SIG_SIZE + $DATA_LENGTH_SIZE + $size;
- }
-
- return {
- signature => $sig,
- size => $size,
- offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE,
- content => $content
- };
-}
-
-sub _load_tag {
- ##
- # Given offset, load single tag and return signature, size and data
- ##
- my $self = shift;
- my $offset = shift;
-
- my $fh = $self->_fh;
-
- seek($fh, $offset, SEEK_SET);
- if (eof $fh) { return undef; }
-
- my $sig;
- read( $fh, $sig, SIG_SIZE);
-
- my $size;
- read( $fh, $size, $DATA_LENGTH_SIZE);
- $size = unpack($DATA_LENGTH_PACK, $size);
-
- my $buffer;
- read( $fh, $buffer, $size);
-
- return {
- signature => $sig,
- size => $size,
- offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE,
- content => $buffer
- };
-}
-
-sub _index_lookup {
- ##
- # Given index tag, lookup single entry in index and return .
- ##
- my $self = shift;
- my ($tag, $index) = @_;
-
- my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) );
- if (!$location) { return; }
-
- return $self->_load_tag( $location );
-}
-
-sub _add_bucket {
- ##
- # Adds one key/value pair to bucket list, given offset, MD5 digest of key,
- # plain (undigested) key and value.
- ##
- my $self = shift;
- my ($tag, $md5, $plain_key, $value) = @_;
- my $keys = $tag->{content};
- my $location = 0;
- my $result = 2;
-
- # added ref() check first to avoid eval and runtime exception for every
- # scalar value being stored. performance tweak.
- my $is_dbm_deep = eval { local $SIG{'__DIE__'}; $value->isa( 'DBM::Deep' ) };
-
- my $internal_ref = $is_dbm_deep && ($value->_root eq $self->_root);
-
- my $fh = $self->_fh;
-
- ##
- # Iterate through buckets, seeing if this is a new entry or a replace.
- ##
- for (my $i=0; $i<$MAX_BUCKETS; $i++) {
- my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
- my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
- if (!$subloc) {
- ##
- # Found empty bucket (end of list). Populate and exit loop.
- ##
- $result = 2;
-
- $location = $internal_ref
- ? $value->_base_offset
- : $self->_root->{end};
-
- seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE), SEEK_SET);
- print($fh $md5 . pack($LONG_PACK, $location) );
- last;
- }
- elsif ($md5 eq $key) {
- ##
- # Found existing bucket with same key. Replace with new value.
- ##
- $result = 1;
-
- if ($internal_ref) {
- $location = $value->_base_offset;
- seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE), SEEK_SET);
- print($fh $md5 . pack($LONG_PACK, $location) );
- }
- else {
- seek($fh, $subloc + SIG_SIZE, SEEK_SET);
- my $size;
- read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
-
- ##
- # If value is a hash, array, or raw value with equal or less size, we can
- # reuse the same content area of the database. Otherwise, we have to create
- # a new content area at the EOF.
- ##
- my $actual_length;
- my $r = Scalar::Util::reftype( $value ) || '';
- if ( $r eq 'HASH' || $r eq 'ARRAY' ) {
- $actual_length = $INDEX_SIZE;
-
- # if autobless is enabled, must also take into consideration
- # the class name, as it is stored along with key/value.
- if ( $self->_root->{autobless} ) {
- my $value_class = Scalar::Util::blessed($value);
- if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
- $actual_length += length($value_class);
- }
- } # autobless
- }
- else { $actual_length = length($value); }
-
- if ($actual_length <= $size) {
- $location = $subloc;
- }
- else {
- $location = $self->_root->{end};
- seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE, SEEK_SET);
- print($fh pack($LONG_PACK, $location) );
- }
- }
- last;
- }
- } # i loop
-
- ##
- # If this is an internal reference, return now.
- # No need to write value or plain key
- ##
- if ($internal_ref) {
- return $result;
- }
-
- ##
- # If bucket didn't fit into list, split into a new index level
- ##
- if (!$location) {
- seek($fh, $tag->{ref_loc}, SEEK_SET);
- print($fh pack($LONG_PACK, $self->_root->{end}) );
-
- my $index_tag = $self->_create_tag($self->_root->{end}, SIG_INDEX, chr(0) x $INDEX_SIZE);
- my @offsets = ();
-
- $keys .= $md5 . pack($LONG_PACK, 0);
-
- for (my $i=0; $i<=$MAX_BUCKETS; $i++) {
- my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
- if ($key) {
- my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
- my $num = ord(substr($key, $tag->{ch} + 1, 1));
-
- if ($offsets[$num]) {
- my $offset = $offsets[$num] + SIG_SIZE + $DATA_LENGTH_SIZE;
- seek($fh, $offset, SEEK_SET);
- my $subkeys;
- read( $fh, $subkeys, $BUCKET_LIST_SIZE);
-
- for (my $k=0; $k<$MAX_BUCKETS; $k++) {
- my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
- if (!$subloc) {
- seek($fh, $offset + ($k * $BUCKET_SIZE), SEEK_SET);
- print($fh $key . pack($LONG_PACK, $old_subloc || $self->_root->{end}) );
- last;
- }
- } # k loop
- }
- else {
- $offsets[$num] = $self->_root->{end};
- seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE), SEEK_SET);
- print($fh pack($LONG_PACK, $self->_root->{end}) );
-
- my $blist_tag = $self->_create_tag($self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
-
- seek($fh, $blist_tag->{offset}, SEEK_SET);
- print($fh $key . pack($LONG_PACK, $old_subloc || $self->_root->{end}) );
- }
- } # key is real
- } # i loop
-
- $location ||= $self->_root->{end};
- } # re-index bucket list
-
- ##
- # Seek to content area and store signature, value and plaintext key
- ##
- if ($location) {
- my $content_length;
- seek($fh, $location, SEEK_SET);
-
- ##
- # Write signature based on content type, set content length and write actual value.
- ##
- my $r = Scalar::Util::reftype($value) || '';
- if ($r eq 'HASH') {
- print($fh TYPE_HASH );
- print($fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
- $content_length = $INDEX_SIZE;
- }
- elsif ($r eq 'ARRAY') {
- print($fh TYPE_ARRAY );
- print($fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE );
- $content_length = $INDEX_SIZE;
- }
- elsif (!defined($value)) {
- print($fh SIG_NULL );
- print($fh pack($DATA_LENGTH_PACK, 0) );
- $content_length = 0;
- }
- else {
- print($fh SIG_DATA );
- print($fh pack($DATA_LENGTH_PACK, length($value)) . $value );
- $content_length = length($value);
- }
-
- ##
- # Plain key is stored AFTER value, as keys are typically fetched less often.
- ##
- print($fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key );
-
- ##
- # If value is blessed, preserve class name
- ##
- if ( $self->_root->{autobless} ) {
- my $value_class = Scalar::Util::blessed($value);
- if ( defined $value_class && $value_class ne 'DBM::Deep' ) {
- ##
- # Blessed ref -- will restore later
- ##
- print($fh chr(1) );
- print($fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class );
- $content_length += 1;
- $content_length += $DATA_LENGTH_SIZE + length($value_class);
- }
- else {
- print($fh chr(0) );
- $content_length += 1;
- }
- }
-
- ##
- # If this is a new content area, advance EOF counter
- ##
- if ($location == $self->_root->{end}) {
- $self->_root->{end} += SIG_SIZE;
- $self->_root->{end} += $DATA_LENGTH_SIZE + $content_length;
- $self->_root->{end} += $DATA_LENGTH_SIZE + length($plain_key);
- }
-
- ##
- # If content is a hash or array, create new child DBM::Deep object and
- # pass each key or element to it.
- ##
- if ($r eq 'HASH') {
- my $branch = DBM::Deep->new(
- type => TYPE_HASH,
- base_offset => $location,
- root => $self->_root,
- );
- foreach my $key (keys %{$value}) {
- $branch->STORE( $key, $value->{$key} );
- }
- }
- elsif ($r eq 'ARRAY') {
- my $branch = DBM::Deep->new(
- type => TYPE_ARRAY,
- base_offset => $location,
- root => $self->_root,
- );
- my $index = 0;
- foreach my $element (@{$value}) {
- $branch->STORE( $index, $element );
- $index++;
- }
- }
-
- return $result;
- }
-
- return $self->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file");
-}
-
-sub _get_bucket_value {
- ##
- # Fetch single value given tag and MD5 digested key.
- ##
- my $self = shift;
- my ($tag, $md5) = @_;
- my $keys = $tag->{content};
-
- my $fh = $self->_fh;
-
- ##
- # Iterate through buckets, looking for a key match
- ##
- BUCKET:
- for (my $i=0; $i<$MAX_BUCKETS; $i++) {
- my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
- my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
-
- if (!$subloc) {
- ##
- # Hit end of list, no match
- ##
- return;
- }
-
- if ( $md5 ne $key ) {
- next BUCKET;
- }
-
- ##
- # Found match -- seek to offset and read signature
- ##
- my $signature;
- seek($fh, $subloc, SEEK_SET);
- read( $fh, $signature, SIG_SIZE);
-
- ##
- # If value is a hash or array, return new DBM::Deep object with correct offset
- ##
- if (($signature eq TYPE_HASH) || ($signature eq TYPE_ARRAY)) {
- my $obj = DBM::Deep->new(
- type => $signature,
- base_offset => $subloc,
- root => $self->_root
- );
-
- if ($self->_root->{autobless}) {
- ##
- # Skip over value and plain key to see if object needs
- # to be re-blessed
- ##
- seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR);
-
- my $size;
- read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
- if ($size) { seek($fh, $size, SEEK_CUR); }
-
- my $bless_bit;
- read( $fh, $bless_bit, 1);
- if (ord($bless_bit)) {
- ##
- # Yes, object needs to be re-blessed
- ##
- my $class_name;
- read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
- if ($size) { read( $fh, $class_name, $size); }
- if ($class_name) { $obj = bless( $obj, $class_name ); }
- }
- }
-
- return $obj;
- }
-
- ##
- # Otherwise return actual value
- ##
- elsif ($signature eq SIG_DATA) {
- my $size;
- my $value = '';
- read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
- if ($size) { read( $fh, $value, $size); }
- return $value;
- }
-
- ##
- # Key exists, but content is null
- ##
- else { return; }
- } # i loop
-
- return;
-}
-
-sub _delete_bucket {
- ##
- # Delete single key/value pair given tag and MD5 digested key.
- ##
- my $self = shift;
- my ($tag, $md5) = @_;
- my $keys = $tag->{content};
-
- my $fh = $self->_fh;
-
- ##
- # Iterate through buckets, looking for a key match
- ##
- BUCKET:
- for (my $i=0; $i<$MAX_BUCKETS; $i++) {
- my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
- my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
-
- if (!$subloc) {
- ##
- # Hit end of list, no match
- ##
- return;
- }
-
- if ( $md5 ne $key ) {
- next BUCKET;
- }
-
- ##
- # Matched key -- delete bucket and return
- ##
- seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE), SEEK_SET);
- print($fh substr($keys, ($i+1) * $BUCKET_SIZE ) );
- print($fh chr(0) x $BUCKET_SIZE );
-
- return 1;
- } # i loop
-
- return;
-}
-
-sub _bucket_exists {
- ##
- # Check existence of single key given tag and MD5 digested key.
- ##
- my $self = shift;
- my ($tag, $md5) = @_;
- my $keys = $tag->{content};
-
- ##
- # Iterate through buckets, looking for a key match
- ##
- BUCKET:
- for (my $i=0; $i<$MAX_BUCKETS; $i++) {
- my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
- my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
-
- if (!$subloc) {
- ##
- # Hit end of list, no match
- ##
- return;
- }
-
- if ( $md5 ne $key ) {
- next BUCKET;
- }
-
- ##
- # Matched key -- return true
- ##
- return 1;
- } # i loop
-
- return;
-}
-
-sub _find_bucket_list {
- ##
- # Locate offset for bucket list, given digested key
- ##
- my $self = shift;
- my $md5 = shift;
-
- ##
- # Locate offset for bucket list using digest index system
- ##
- my $ch = 0;
- my $tag = $self->_load_tag($self->_base_offset);
- if (!$tag) { return; }
-
- while ($tag->{signature} ne SIG_BLIST) {
- $tag = $self->_index_lookup($tag, ord(substr($md5, $ch, 1)));
- if (!$tag) { return; }
- $ch++;
- }
-
- return $tag;
-}
-
-sub _traverse_index {
- ##
- # Scan index and recursively step into deeper levels, looking for next key.
- ##
- my ($self, $offset, $ch, $force_return_next) = @_;
- $force_return_next = undef unless $force_return_next;
-
- my $tag = $self->_load_tag( $offset );
-
- my $fh = $self->_fh;
-
- if ($tag->{signature} ne SIG_BLIST) {
- my $content = $tag->{content};
- my $start;
- if ($self->{return_next}) { $start = 0; }
- else { $start = ord(substr($self->{prev_md5}, $ch, 1)); }
-
- for (my $index = $start; $index < 256; $index++) {
- my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) );
- if ($subloc) {
- my $result = $self->_traverse_index( $subloc, $ch + 1, $force_return_next );
- if (defined($result)) { return $result; }
- }
- } # index loop
-
- $self->{return_next} = 1;
- } # tag is an index
-
- elsif ($tag->{signature} eq SIG_BLIST) {
- my $keys = $tag->{content};
- if ($force_return_next) { $self->{return_next} = 1; }
-
- ##
- # Iterate through buckets, looking for a key match
- ##
- for (my $i=0; $i<$MAX_BUCKETS; $i++) {
- my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE);
- my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE));
-
- if (!$subloc) {
- ##
- # End of bucket list -- return to outer loop
- ##
- $self->{return_next} = 1;
- last;
- }
- elsif ($key eq $self->{prev_md5}) {
- ##
- # Located previous key -- return next one found
- ##
- $self->{return_next} = 1;
- next;
- }
- elsif ($self->{return_next}) {
- ##
- # Seek to bucket location and skip over signature
- ##
- seek($fh, $subloc + SIG_SIZE, SEEK_SET);
-
- ##
- # Skip over value to get to plain key
- ##
- my $size;
- read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
- if ($size) { seek($fh, $size, SEEK_CUR); }
-
- ##
- # Read in plain key and return as scalar
- ##
- my $plain_key;
- read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size);
- if ($size) { read( $fh, $plain_key, $size); }
-
- return $plain_key;
- }
- } # bucket loop
-
- $self->{return_next} = 1;
- } # tag is a bucket list
-
- return;
-}
-
-sub _get_next_key {
- ##
- # Locate next key, given digested previous one
- ##
- my $self = $_[0]->_get_self;
-
- $self->{prev_md5} = $_[1] ? $_[1] : undef;
- $self->{return_next} = 0;
-
- ##
- # If the previous key was not specifed, start at the top and
- # return the first one found.
- ##
- if (!$self->{prev_md5}) {
- $self->{prev_md5} = chr(0) x $HASH_SIZE;
- $self->{return_next} = 1;
- }
-
- return $self->_traverse_index( $self->_base_offset, 0 );
-}
-
sub lock {
##
# If db locking is set, flock() the db file. If called multiple
# double-check file inode, in case another process
# has optimize()d our file while we were waiting.
if ($stats[1] != $self->_root->{inode}) {
- $self->_open(); # re-open
+ $self->{engine}->open( $self ); # re-open
flock($self->_fh, $type); # re-lock
$self->_root->{end} = (stat($self->_fh))[7]; # re-end
}
return;
}
-#XXX These uses of ref() need verified
+sub _copy_value {
+ my $self = shift->_get_self;
+ my ($spot, $value) = @_;
+
+ if ( !ref $value ) {
+ ${$spot} = $value;
+ }
+ elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) {
+ my $type = $value->_type;
+ ${$spot} = $type eq TYPE_HASH ? {} : [];
+ $value->_copy_node( ${$spot} );
+ }
+ else {
+ my $r = Scalar::Util::reftype( $value );
+ my $c = Scalar::Util::blessed( $value );
+ if ( $r eq 'ARRAY' ) {
+ ${$spot} = [ @{$value} ];
+ }
+ else {
+ ${$spot} = { %{$value} };
+ }
+ ${$spot} = bless ${$spot}, $c
+ if defined $c;
+ }
+
+ return 1;
+}
+
sub _copy_node {
##
# Copy single level of keys or elements to new DB handle.
# Recurse for nested structures
##
- my $self = $_[0]->_get_self;
- my $db_temp = $_[1];
+ my $self = shift->_get_self;
+ my ($db_temp) = @_;
if ($self->_type eq TYPE_HASH) {
my $key = $self->first_key();
while ($key) {
my $value = $self->get($key);
-#XXX This doesn't work with autobless
- if (!ref($value)) { $db_temp->{$key} = $value; }
- else {
- my $type = $value->_type;
- if ($type eq TYPE_HASH) { $db_temp->{$key} = {}; }
- else { $db_temp->{$key} = []; }
- $value->_copy_node( $db_temp->{$key} );
- }
+ $self->_copy_value( \$db_temp->{$key}, $value );
$key = $self->next_key($key);
}
}
my $length = $self->length();
for (my $index = 0; $index < $length; $index++) {
my $value = $self->get($index);
- if (!ref($value)) { $db_temp->[$index] = $value; }
- #XXX NO tests for this code
- else {
- my $type = $value->_type;
- if ($type eq TYPE_HASH) { $db_temp->[$index] = {}; }
- else { $db_temp->[$index] = []; }
- $value->_copy_node( $db_temp->[$index] );
- }
+ $self->_copy_value( \$db_temp->[$index], $value );
}
}
+
+ return 1;
}
sub export {
# with a soft copy.
##
$self->unlock();
- $self->_close();
+ $self->{engine}->close( $self );
}
if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) {
}
$self->unlock();
- $self->_close();
- $self->_open();
+ $self->{engine}->close( $self );
+ $self->{engine}->open( $self );
return 1;
}
return $self->{base_offset};
}
-sub error {
- ##
- # Get last error string, or undef if no error
- ##
- return $_[0]
- #? ( _get_self($_[0])->{root}->{error} or undef )
- ? ( $_[0]->_get_self->{root}->{error} or undef )
- : $@;
-}
-
##
# Utility methods
##
sub _throw_error {
- ##
- # Store error string in self
- ##
- my $error_text = $_[1];
-
- if ( Scalar::Util::blessed $_[0] ) {
- my $self = $_[0]->_get_self;
- $self->_root->{error} = $error_text;
-
- unless ($self->_root->{debug}) {
- die "DBM::Deep: $error_text\n";
- }
-
- warn "DBM::Deep: $error_text\n";
- return;
- }
- else {
- die "DBM::Deep: $error_text\n";
- }
-}
-
-sub clear_error {
- ##
- # Clear error state
- ##
- my $self = $_[0]->_get_self;
-
- undef $self->_root->{error};
-}
-
-sub _precalc_sizes {
- ##
- # Precalculate index, bucket and bucket list sizes
- ##
-
- #XXX I don't like this ...
- set_pack() unless defined $LONG_SIZE;
-
- $INDEX_SIZE = 256 * $LONG_SIZE;
- $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE;
- $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE;
+ die "DBM::Deep: $_[1]\n";
}
-sub set_pack {
- ##
- # Set pack/unpack modes (see file header for more)
- ##
- my ($long_s, $long_p, $data_s, $data_p) = @_;
-
- $LONG_SIZE = $long_s ? $long_s : 4;
- $LONG_PACK = $long_p ? $long_p : 'N';
-
- $DATA_LENGTH_SIZE = $data_s ? $data_s : 4;
- $DATA_LENGTH_PACK = $data_p ? $data_p : 'N';
-
- _precalc_sizes();
+sub _is_writable {
+ my $fh = shift;
+ (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0);
}
-sub set_digest {
- ##
- # Set key digest function (default is MD5)
- ##
- my ($digest_func, $hash_size) = @_;
-
- $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5;
- $HASH_SIZE = $hash_size ? $hash_size : 16;
-
- _precalc_sizes();
-}
+#sub _is_readable {
+# my $fh = shift;
+# (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0);
+#}
##
# tie() methods (hashes and arrays)
? $self->_root->{filter_store_value}->($_[2])
: $_[2];
- my $md5 = $DIGEST_FUNC->($key);
+ my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key);
- ##
- # Make sure file is open
- ##
- if (!defined($self->_fh) && !$self->_open()) {
- return;
- }
- ##
+ unless ( _is_writable( $self->_fh ) ) {
+ $self->_throw_error( 'Cannot write to a readonly filehandle' );
+ }
##
# Request exclusive lock for writing
##
# Locate offset for bucket list using digest index system
##
- my $tag = $self->_load_tag($self->_base_offset);
+ my $tag = $self->{engine}->load_tag($self, $self->_base_offset);
if (!$tag) {
- $tag = $self->_create_tag($self->_base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE);
+ $tag = $self->{engine}->create_tag($self, $self->_base_offset, SIG_INDEX, chr(0) x $DBM::Deep::Engine::INDEX_SIZE);
}
my $ch = 0;
while ($tag->{signature} ne SIG_BLIST) {
my $num = ord(substr($md5, $ch, 1));
- my $new_tag = $self->_index_lookup($tag, $num);
+
+ my $ref_loc = $tag->{offset} + ($num * $DBM::Deep::Engine::LONG_SIZE);
+ my $new_tag = $self->{engine}->index_lookup($self, $tag, $num);
+
if (!$new_tag) {
- my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE);
- seek($fh, $ref_loc, SEEK_SET);
- print($fh pack($LONG_PACK, $self->_root->{end}) );
+ seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET);
+ print( $fh pack($DBM::Deep::Engine::LONG_PACK, $self->_root->{end}) );
- $tag = $self->_create_tag($self->_root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE);
+ $tag = $self->{engine}->create_tag($self, $self->_root->{end}, SIG_BLIST, chr(0) x $DBM::Deep::Engine::BUCKET_LIST_SIZE);
+
$tag->{ref_loc} = $ref_loc;
$tag->{ch} = $ch;
+
last;
}
else {
- my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE);
$tag = $new_tag;
+
$tag->{ref_loc} = $ref_loc;
$tag->{ch} = $ch;
}
##
# Add key/value to bucket list
##
- my $result = $self->_add_bucket( $tag, $md5, $key, $value );
+ my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value );
$self->unlock();
my $self = shift->_get_self;
my $key = shift;
- ##
- # Make sure file is open
- ##
- if (!defined($self->_fh)) { $self->_open(); }
-
- my $md5 = $DIGEST_FUNC->($key);
+ my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key);
##
# Request shared lock for reading
##
$self->lock( LOCK_SH );
- my $tag = $self->_find_bucket_list( $md5 );
+ my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
if (!$tag) {
$self->unlock();
return;
##
# Get value from bucket list
##
- my $result = $self->_get_bucket_value( $tag, $md5 );
+ my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 );
$self->unlock();
my $self = $_[0]->_get_self;
my $key = $_[1];
- my $md5 = $DIGEST_FUNC->($key);
+ my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key);
##
- # Make sure file is open
- ##
- if (!defined($self->_fh)) { $self->_open(); }
-
- ##
# Request exclusive lock for writing
##
$self->lock( LOCK_EX );
- my $tag = $self->_find_bucket_list( $md5 );
+ my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
if (!$tag) {
$self->unlock();
return;
##
# Delete bucket
##
- my $value = $self->_get_bucket_value( $tag, $md5 );
+ my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 );
if ($value && !ref($value) && $self->_root->{filter_fetch_value}) {
$value = $self->_root->{filter_fetch_value}->($value);
}
- my $result = $self->_delete_bucket( $tag, $md5 );
+ my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 );
##
# If this object is an array and the key deleted was on the end of the stack,
my $self = $_[0]->_get_self;
my $key = $_[1];
- my $md5 = $DIGEST_FUNC->($key);
+ my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key);
##
- # Make sure file is open
- ##
- if (!defined($self->_fh)) { $self->_open(); }
-
- ##
# Request shared lock for reading
##
$self->lock( LOCK_SH );
- my $tag = $self->_find_bucket_list( $md5 );
+ my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
##
# For some reason, the built-in exists() function returns '' for false
##
# Check if bucket exists and return 1 or ''
##
- my $result = $self->_bucket_exists( $tag, $md5 ) || '';
+ my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || '';
$self->unlock();
my $self = $_[0]->_get_self;
##
- # Make sure file is open
- ##
- if (!defined($self->_fh)) { $self->_open(); }
-
- ##
# Request exclusive lock for writing
##
$self->lock( LOCK_EX );
my $fh = $self->_fh;
- seek($fh, $self->_base_offset, SEEK_SET);
+ seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET);
if (eof $fh) {
$self->unlock();
return;
}
- $self->_create_tag($self->_base_offset, $self->_type, chr(0) x $INDEX_SIZE);
+ $self->{engine}->create_tag($self, $self->_base_offset, $self->_type, chr(0) x $DBM::Deep::Engine::INDEX_SIZE);
$self->unlock();
my $self = bless {
file => undef,
fh => undef,
+ file_offset => 0,
end => 0,
autoflush => undef,
locking => undef,
%$args,
}, $class;
+ if ( $self->{fh} && !$self->{file_offset} ) {
+ $self->{file_offset} = tell( $self->{fh} );
+ }
+
return $self;
}
Filename of the DB file to link the handle to. You can pass a full absolute
filesystem path, partial path, or a plain filename if the file is in the
-current working directory. This is a required parameter.
+current working directory. This is a required parameter (though q.v. fh).
+
+=item * fh
+
+If you want, you can pass in the fh instead of the file. This is most useful for doing
+something like:
+
+ my $db = DBM::Deep->new( { fh => \*DATA } );
+
+You are responsible for making sure that the fh has been opened appropriately for your
+needs. If you open it read-only and attempt to write, an exception will be thrown. If you
+open it write-only or append-only, an exception will be thrown immediately as DBM::Deep
+needs to read from the fh.
+
+=item * file_offset
+
+This is the offset within the file that the DBM::Deep db starts. Most of the time, you will
+not need to set this. However, it's there if you want it.
+
+If you pass in fh and do not set this, it will be set appropriately.
=item * type
STDERR, and continue on. This is for debugging purposes only, and probably
not what you want. This is an optional parameter, and defaults to 0 (disabled).
-=item * fh
-
-Instead of passing a file path, you can instead pass a handle to an pre-opened
-filehandle. Note: Beware of using the magick *DATA handle, as this actually
-contains your entire Perl script, as well as the data following the __DATA__
-marker. This will not work, because DBM::Deep uses absolute seek()s into the
-file. Instead, consider reading *DATA into an IO::Scalar handle, then passing
-in that. Also please note optimize() will NOT work when passing in only a
-handle. Pass in a real filename in order to use optimize().
+B<NOTE>: This parameter is considered deprecated and should not be used anymore.
=back
q.v. adjusting the interal parameters.
-=item * error() / clear_error()
-
-Error handling methods (may be deprecated).
-.
=back
=head2 HASHES
=head1 ERROR HANDLING
Most DBM::Deep methods return a true value for success, and call die() on
-failure. You can wrap calls in an eval block to catch the die. Also, the
-actual error message is stored in an internal scalar, which can be fetched by
-calling the C<error()> method.
+failure. You can wrap calls in an eval block to catch the die.
my $db = DBM::Deep->new( "foo.db" ); # create hash
eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call
print $@; # prints error message
- print $db->error(); # prints error message
-
-You can then call C<clear_error()> to clear the current error state.
-
- $db->clear_error();
-
-If you set the C<debug> option to true when creating your DBM::Deep object,
-all errors are considered NON-FATAL, and dumped to STDERR. This should only
-be used for debugging purposes and not production work. DBM::Deep expects errors
-to be thrown, not propagated back up the stack.
=head1 LARGEFILE SUPPORT
on DBM::Deep, as every element has to be fetched from disk, then stored again in
a different location. This will be addressed in the forthcoming version 1.00.
+=head2 WRITEONLY FILES
+
+If you pass in a filehandle to new(), you may have opened it in either a readonly or
+writeonly mode. STORE will verify that the filehandle is writable. However, there
+doesn't seem to be a good way to determine if a filehandle is readable. And, if the
+filehandle isn't readable, it's not clear what will happen. So, don't do that.
+
=head1 PERFORMANCE
This section discusses DBM::Deep's speed and memory usage.
---------------------------- ------ ------ ------ ------ ------ ------ ------
File stmt bran cond sub pod time total
---------------------------- ------ ------ ------ ------ ------ ------ ------
- blib/lib/DBM/Deep.pm 94.1 82.5 68.7 98.1 100.0 58.0 89.9
- blib/lib/DBM/Deep/Array.pm 98.9 88.9 87.5 100.0 n/a 28.9 96.4
- blib/lib/DBM/Deep/Hash.pm 95.3 80.0 100.0 100.0 n/a 13.2 92.4
- Total 95.1 83.4 72.8 98.8 100.0 100.0 91.3
+ blib/lib/DBM/Deep.pm 95.2 83.8 70.0 98.2 100.0 58.0 91.0
+ blib/lib/DBM/Deep/Array.pm 100.0 91.1 100.0 100.0 n/a 26.7 98.0
+ blib/lib/DBM/Deep/Hash.pm 95.3 80.0 100.0 100.0 n/a 15.3 92.4
+ Total 96.2 84.8 74.4 98.8 100.0 100.0 92.4
---------------------------- ------ ------ ------ ------ ------ ------ ------
=head1 MORE INFORMATION