X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep.pm;h=9346bb408934797e86618d91fc2501555a2dd870;hb=1bf65be7994492bbe8373ec4167915f304116a37;hp=448ac8640971f24ee398049a9d67629086289c8f;hpb=e3cb84dc2588f2fcc3af31941e39bdc7fb273a02;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index 448ac86..9346bb4 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -24,7 +24,7 @@ package DBM::Deep; # print "This module " . $db->{my_complex}->[1]->{perl} . "!\n"; # # Copyright: -# (c) 2002-2005 Joseph Huckaby. All Rights Reserved. +# (c) 2002-2006 Joseph Huckaby. All Rights Reserved. # This program is free software; you can redistribute it and/or # modify it under the same terms as Perl itself. ## @@ -35,54 +35,11 @@ use Fcntl qw( :DEFAULT :flock :seek ); use Digest::MD5 (); use Scalar::Util (); -use vars qw( $VERSION ); -$VERSION = q(0.98); - -## -# Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file. -# (Perl must be compiled with largefile support for files > 2 GB) -# -# Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. -# (Perl must be compiled with largefile and 64-bit long support) -## -#my $LONG_SIZE = 4; -#my $LONG_PACK = 'N'; - -## -# Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value. -# Upgrading this is possible (see above) but probably not necessary. If you need -# more than 4 GB for a single key or value, this module is really not for you :-) -## -#my $DATA_LENGTH_SIZE = 4; -#my $DATA_LENGTH_PACK = 'N'; -our ($LONG_SIZE, $LONG_PACK, $DATA_LENGTH_SIZE, $DATA_LENGTH_PACK); - -## -# Maximum number of buckets per list before another level of indexing is done. -# Increase this value for slightly greater speed, but larger database files. -# DO NOT decrease this value below 16, due to risk of recursive reindex overrun. -## -my $MAX_BUCKETS = 16; +use DBM::Deep::Engine; -## -# Better not adjust anything below here, unless you're me :-) -## - -## -# Setup digest function for keys -## -our ($DIGEST_FUNC, $HASH_SIZE); -#my $DIGEST_FUNC = \&Digest::MD5::md5; - -## -# Precalculate index and bucket sizes based on values above. -## -#my $HASH_SIZE = 16; -my ($INDEX_SIZE, $BUCKET_SIZE, $BUCKET_LIST_SIZE); +use vars qw( $VERSION ); +$VERSION = q(0.99_01); -set_digest(); -#set_pack(); -#precalc_sizes(); ## # Setup file and tag signatures. These should never change. @@ -100,9 +57,9 @@ sub SIG_SIZE () { 1 } ## # Setup constants for users to pass to new() ## -sub TYPE_HASH () { return SIG_HASH; } -sub TYPE_ARRAY () { return SIG_ARRAY; } -sub TYPE_SCALAR () { return SIG_SCALAR; } +sub TYPE_HASH () { SIG_HASH } +sub TYPE_ARRAY () { SIG_ARRAY } +sub TYPE_SCALAR () { SIG_SCALAR } sub _get_args { my $proto = shift; @@ -114,8 +71,8 @@ sub _get_args { } $args = {@_}; } - elsif ( my $type = Scalar::Util::reftype($_[0]) ) { - if ( $type ne 'HASH' ) { + elsif ( ref $_[0] ) { + unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) { $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] ); } $args = $_[0]; @@ -163,8 +120,9 @@ sub _init { # These are the defaults to be optionally overridden below my $self = bless { - type => TYPE_HASH, + type => TYPE_HASH, base_offset => length(SIG_FILE), + engine => 'DBM::Deep::Engine', }, $class; foreach my $param ( keys %$self ) { @@ -179,7 +137,7 @@ sub _init { ? $args->{root} : DBM::Deep::_::Root->new( $args ); - if (!defined($self->fh)) { $self->_open(); } + if (!defined($self->_fh)) { $self->{engine}->open( $self ); } return $self; } @@ -200,721 +158,6 @@ sub TIEARRAY { #sub DESTROY { #} -sub _open { - ## - # Open a fh to the database, create if nonexistent. - # Make sure file signature matches DBM::Deep spec. - ## - my $self = $_[0]->_get_self; - - if (defined($self->fh)) { $self->_close(); } - - eval { - # Theoretically, adding O_BINARY should remove the need for the binmode - # Of course, testing it is going to be ... interesting. - my $flags = O_RDWR | O_CREAT | O_BINARY; - - my $fh; - sysopen( $fh, $self->root->{file}, $flags ) - or $fh = undef; - $self->root->{fh} = $fh; - }; if ($@ ) { $self->_throw_error( "Received error: $@\n" ); } - if (! defined($self->fh)) { - return $self->_throw_error("Cannot sysopen file: " . $self->root->{file} . ": $!"); - } - - my $fh = $self->fh; - - #XXX Can we remove this by using the right sysopen() flags? - # Maybe ... q.v. above - binmode $fh; # for win32 - - if ($self->root->{autoflush}) { - my $old = select $fh; - $|=1; - select $old; - } - - # Set the - seek($fh, 0, SEEK_SET); - - my $signature; - my $bytes_read = read( $fh, $signature, length(SIG_FILE)); - - ## - # File is empty -- write signature and master index - ## - if (!$bytes_read) { - seek($fh, 0, SEEK_SET); - print($fh SIG_FILE); - $self->_create_tag($self->base_offset, $self->type, chr(0) x $INDEX_SIZE); - - my $plain_key = "[base]"; - print($fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); - - # Flush the filehandle - my $old_fh = select $fh; - my $old_af = $|; - $| = 1; - $| = $old_af; - select $old_fh; - - my @stats = stat($fh); - $self->root->{inode} = $stats[1]; - $self->root->{end} = $stats[7]; - - return 1; - } - - ## - # Check signature was valid - ## - unless ($signature eq SIG_FILE) { - $self->_close(); - return $self->_throw_error("Signature not found -- file is not a Deep DB"); - } - - my @stats = stat($fh); - $self->root->{inode} = $stats[1]; - $self->root->{end} = $stats[7]; - - ## - # Get our type from master index signature - ## - my $tag = $self->_load_tag($self->base_offset); - -#XXX We probably also want to store the hash algorithm name and not assume anything -#XXX The cool thing would be to allow a different hashing algorithm at every level - - if (!$tag) { - return $self->_throw_error("Corrupted file, no master index record"); - } - if ($self->{type} ne $tag->{signature}) { - return $self->_throw_error("File type mismatch"); - } - - return 1; -} - -sub _close { - ## - # Close database fh - ## - my $self = $_[0]->_get_self; - close $self->root->{fh} if $self->root->{fh}; - $self->root->{fh} = undef; -} - -sub _create_tag { - ## - # Given offset, signature and content, create tag and write to disk - ## - my ($self, $offset, $sig, $content) = @_; - my $size = length($content); - - my $fh = $self->fh; - - seek($fh, $offset, SEEK_SET); - print($fh $sig . pack($DATA_LENGTH_PACK, $size) . $content ); - - if ($offset == $self->root->{end}) { - $self->root->{end} += SIG_SIZE + $DATA_LENGTH_SIZE + $size; - } - - return { - signature => $sig, - size => $size, - offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE, - content => $content - }; -} - -sub _load_tag { - ## - # Given offset, load single tag and return signature, size and data - ## - my $self = shift; - my $offset = shift; - - my $fh = $self->fh; - - seek($fh, $offset, SEEK_SET); - if (eof $fh) { return undef; } - - my $sig; - read( $fh, $sig, SIG_SIZE); - - my $size; - read( $fh, $size, $DATA_LENGTH_SIZE); - $size = unpack($DATA_LENGTH_PACK, $size); - - my $buffer; - read( $fh, $buffer, $size); - - return { - signature => $sig, - size => $size, - offset => $offset + SIG_SIZE + $DATA_LENGTH_SIZE, - content => $buffer - }; -} - -sub _index_lookup { - ## - # Given index tag, lookup single entry in index and return . - ## - my $self = shift; - my ($tag, $index) = @_; - - my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) ); - if (!$location) { return; } - - return $self->_load_tag( $location ); -} - -sub _add_bucket { - ## - # Adds one key/value pair to bucket list, given offset, MD5 digest of key, - # plain (undigested) key and value. - ## - my $self = shift; - my ($tag, $md5, $plain_key, $value) = @_; - my $keys = $tag->{content}; - my $location = 0; - my $result = 2; - - # added ref() check first to avoid eval and runtime exception for every - # scalar value being stored. performance tweak. - my $is_dbm_deep = ref($value) && eval { $value->isa( 'DBM::Deep' ) }; - - my $internal_ref = $is_dbm_deep && ($value->root eq $self->root); - - my $fh = $self->fh; - - ## - # Iterate through buckets, seeing if this is a new entry or a replace. - ## - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - if (!$subloc) { - ## - # Found empty bucket (end of list). Populate and exit loop. - ## - $result = 2; - - $location = $internal_ref - ? $value->base_offset - : $self->root->{end}; - - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE), SEEK_SET); - print($fh $md5 . pack($LONG_PACK, $location) ); - last; - } - elsif ($md5 eq $key) { - ## - # Found existing bucket with same key. Replace with new value. - ## - $result = 1; - - if ($internal_ref) { - $location = $value->base_offset; - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE), SEEK_SET); - print($fh $md5 . pack($LONG_PACK, $location) ); - } - else { - seek($fh, $subloc + SIG_SIZE, SEEK_SET); - my $size; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - - ## - # If value is a hash, array, or raw value with equal or less size, we can - # reuse the same content area of the database. Otherwise, we have to create - # a new content area at the EOF. - ## - my $actual_length; - my $r = Scalar::Util::reftype( $value ) || ''; - if ( $r eq 'HASH' || $r eq 'ARRAY' ) { - $actual_length = $INDEX_SIZE; - - # if autobless is enabled, must also take into consideration - # the class name, as it is stored along with key/value. - if ( $self->root->{autobless} ) { - my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && $value_class ne 'DBM::Deep' ) { - $actual_length += length($value_class); - } - } # autobless - } - else { $actual_length = length($value); } - - if ($actual_length <= $size) { - $location = $subloc; - } - else { - $location = $self->root->{end}; - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE, SEEK_SET); - print($fh pack($LONG_PACK, $location) ); - } - } - last; - } - } # i loop - - ## - # If this is an internal reference, return now. - # No need to write value or plain key - ## - if ($internal_ref) { - return $result; - } - - ## - # If bucket didn't fit into list, split into a new index level - ## - if (!$location) { - seek($fh, $tag->{ref_loc}, SEEK_SET); - print($fh pack($LONG_PACK, $self->root->{end}) ); - - my $index_tag = $self->_create_tag($self->root->{end}, SIG_INDEX, chr(0) x $INDEX_SIZE); - my @offsets = (); - - $keys .= $md5 . pack($LONG_PACK, 0); - - for (my $i=0; $i<=$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - if ($key) { - my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - my $num = ord(substr($key, $tag->{ch} + 1, 1)); - - if ($offsets[$num]) { - my $offset = $offsets[$num] + SIG_SIZE + $DATA_LENGTH_SIZE; - seek($fh, $offset, SEEK_SET); - my $subkeys; - read( $fh, $subkeys, $BUCKET_LIST_SIZE); - - for (my $k=0; $k<$MAX_BUCKETS; $k++) { - my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - if (!$subloc) { - seek($fh, $offset + ($k * $BUCKET_SIZE), SEEK_SET); - print($fh $key . pack($LONG_PACK, $old_subloc || $self->root->{end}) ); - last; - } - } # k loop - } - else { - $offsets[$num] = $self->root->{end}; - seek($fh, $index_tag->{offset} + ($num * $LONG_SIZE), SEEK_SET); - print($fh pack($LONG_PACK, $self->root->{end}) ); - - my $blist_tag = $self->_create_tag($self->root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); - - seek($fh, $blist_tag->{offset}, SEEK_SET); - print($fh $key . pack($LONG_PACK, $old_subloc || $self->root->{end}) ); - } - } # key is real - } # i loop - - $location ||= $self->root->{end}; - } # re-index bucket list - - ## - # Seek to content area and store signature, value and plaintext key - ## - if ($location) { - my $content_length; - seek($fh, $location, SEEK_SET); - - ## - # Write signature based on content type, set content length and write actual value. - ## - my $r = Scalar::Util::reftype($value) || ''; - if ($r eq 'HASH') { - print($fh TYPE_HASH ); - print($fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); - $content_length = $INDEX_SIZE; - } - elsif ($r eq 'ARRAY') { - print($fh TYPE_ARRAY ); - print($fh pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); - $content_length = $INDEX_SIZE; - } - elsif (!defined($value)) { - print($fh SIG_NULL ); - print($fh pack($DATA_LENGTH_PACK, 0) ); - $content_length = 0; - } - else { - print($fh SIG_DATA ); - print($fh pack($DATA_LENGTH_PACK, length($value)) . $value ); - $content_length = length($value); - } - - ## - # Plain key is stored AFTER value, as keys are typically fetched less often. - ## - print($fh pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); - - ## - # If value is blessed, preserve class name - ## - if ( $self->root->{autobless} ) { - my $value_class = Scalar::Util::blessed($value); - if ( defined $value_class && $value_class ne 'DBM::Deep' ) { - ## - # Blessed ref -- will restore later - ## - print($fh chr(1) ); - print($fh pack($DATA_LENGTH_PACK, length($value_class)) . $value_class ); - $content_length += 1; - $content_length += $DATA_LENGTH_SIZE + length($value_class); - } - else { - print($fh chr(0) ); - $content_length += 1; - } - } - - ## - # If this is a new content area, advance EOF counter - ## - if ($location == $self->root->{end}) { - $self->root->{end} += SIG_SIZE; - $self->root->{end} += $DATA_LENGTH_SIZE + $content_length; - $self->root->{end} += $DATA_LENGTH_SIZE + length($plain_key); - } - - ## - # If content is a hash or array, create new child DBM::Deep object and - # pass each key or element to it. - ## - if ($r eq 'HASH') { - my $branch = DBM::Deep->new( - type => TYPE_HASH, - base_offset => $location, - root => $self->root, - ); - foreach my $key (keys %{$value}) { - $branch->STORE( $key, $value->{$key} ); - } - } - elsif ($r eq 'ARRAY') { - my $branch = DBM::Deep->new( - type => TYPE_ARRAY, - base_offset => $location, - root => $self->root, - ); - my $index = 0; - foreach my $element (@{$value}) { - $branch->STORE( $index, $element ); - $index++; - } - } - - return $result; - } - - return $self->_throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); -} - -sub _get_bucket_value { - ## - # Fetch single value given tag and MD5 digested key. - ## - my $self = shift; - my ($tag, $md5) = @_; - my $keys = $tag->{content}; - - my $fh = $self->fh; - - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Found match -- seek to offset and read signature - ## - my $signature; - seek($fh, $subloc, SEEK_SET); - read( $fh, $signature, SIG_SIZE); - - ## - # If value is a hash or array, return new DBM::Deep object with correct offset - ## - if (($signature eq TYPE_HASH) || ($signature eq TYPE_ARRAY)) { - my $obj = DBM::Deep->new( - type => $signature, - base_offset => $subloc, - root => $self->root - ); - - if ($self->root->{autobless}) { - ## - # Skip over value and plain key to see if object needs - # to be re-blessed - ## - seek($fh, $DATA_LENGTH_SIZE + $INDEX_SIZE, SEEK_CUR); - - my $size; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { seek($fh, $size, SEEK_CUR); } - - my $bless_bit; - read( $fh, $bless_bit, 1); - if (ord($bless_bit)) { - ## - # Yes, object needs to be re-blessed - ## - my $class_name; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { read( $fh, $class_name, $size); } - if ($class_name) { $obj = bless( $obj, $class_name ); } - } - } - - return $obj; - } - - ## - # Otherwise return actual value - ## - elsif ($signature eq SIG_DATA) { - my $size; - my $value = ''; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { read( $fh, $value, $size); } - return $value; - } - - ## - # Key exists, but content is null - ## - else { return; } - } # i loop - - return; -} - -sub _delete_bucket { - ## - # Delete single key/value pair given tag and MD5 digested key. - ## - my $self = shift; - my ($tag, $md5) = @_; - my $keys = $tag->{content}; - - my $fh = $self->fh; - - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Matched key -- delete bucket and return - ## - seek($fh, $tag->{offset} + ($i * $BUCKET_SIZE), SEEK_SET); - print($fh substr($keys, ($i+1) * $BUCKET_SIZE ) ); - print($fh chr(0) x $BUCKET_SIZE ); - - return 1; - } # i loop - - return; -} - -sub _bucket_exists { - ## - # Check existence of single key given tag and MD5 digested key. - ## - my $self = shift; - my ($tag, $md5) = @_; - my $keys = $tag->{content}; - - ## - # Iterate through buckets, looking for a key match - ## - BUCKET: - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # Hit end of list, no match - ## - return; - } - - if ( $md5 ne $key ) { - next BUCKET; - } - - ## - # Matched key -- return true - ## - return 1; - } # i loop - - return; -} - -sub _find_bucket_list { - ## - # Locate offset for bucket list, given digested key - ## - my $self = shift; - my $md5 = shift; - - ## - # Locate offset for bucket list using digest index system - ## - my $ch = 0; - my $tag = $self->_load_tag($self->base_offset); - if (!$tag) { return; } - - while ($tag->{signature} ne SIG_BLIST) { - $tag = $self->_index_lookup($tag, ord(substr($md5, $ch, 1))); - if (!$tag) { return; } - $ch++; - } - - return $tag; -} - -sub _traverse_index { - ## - # Scan index and recursively step into deeper levels, looking for next key. - ## - my ($self, $offset, $ch, $force_return_next) = @_; - $force_return_next = undef unless $force_return_next; - - my $tag = $self->_load_tag( $offset ); - - my $fh = $self->fh; - - if ($tag->{signature} ne SIG_BLIST) { - my $content = $tag->{content}; - my $start; - if ($self->{return_next}) { $start = 0; } - else { $start = ord(substr($self->{prev_md5}, $ch, 1)); } - - for (my $index = $start; $index < 256; $index++) { - my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) ); - if ($subloc) { - my $result = $self->_traverse_index( $subloc, $ch + 1, $force_return_next ); - if (defined($result)) { return $result; } - } - } # index loop - - $self->{return_next} = 1; - } # tag is an index - - elsif ($tag->{signature} eq SIG_BLIST) { - my $keys = $tag->{content}; - if ($force_return_next) { $self->{return_next} = 1; } - - ## - # Iterate through buckets, looking for a key match - ## - for (my $i=0; $i<$MAX_BUCKETS; $i++) { - my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); - my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); - - if (!$subloc) { - ## - # End of bucket list -- return to outer loop - ## - $self->{return_next} = 1; - last; - } - elsif ($key eq $self->{prev_md5}) { - ## - # Located previous key -- return next one found - ## - $self->{return_next} = 1; - next; - } - elsif ($self->{return_next}) { - ## - # Seek to bucket location and skip over signature - ## - seek($fh, $subloc + SIG_SIZE, SEEK_SET); - - ## - # Skip over value to get to plain key - ## - my $size; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { seek($fh, $size, SEEK_CUR); } - - ## - # Read in plain key and return as scalar - ## - my $plain_key; - read( $fh, $size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); - if ($size) { read( $fh, $plain_key, $size); } - - return $plain_key; - } - } # bucket loop - - $self->{return_next} = 1; - } # tag is a bucket list - - return; -} - -sub _get_next_key { - ## - # Locate next key, given digested previous one - ## - my $self = $_[0]->_get_self; - - $self->{prev_md5} = $_[1] ? $_[1] : undef; - $self->{return_next} = 0; - - ## - # If the previous key was not specifed, start at the top and - # return the first one found. - ## - if (!$self->{prev_md5}) { - $self->{prev_md5} = chr(0) x $HASH_SIZE; - $self->{return_next} = 1; - } - - return $self->_traverse_index( $self->base_offset, 0 ); -} - sub lock { ## # If db locking is set, flock() the db file. If called multiple @@ -925,25 +168,25 @@ sub lock { my $type = $_[1]; $type = LOCK_EX unless defined $type; - if (!defined($self->fh)) { return; } + if (!defined($self->_fh)) { return; } - if ($self->root->{locking}) { - if (!$self->root->{locked}) { - flock($self->fh, $type); + if ($self->_root->{locking}) { + if (!$self->_root->{locked}) { + flock($self->_fh, $type); # refresh end counter in case file has changed size - my @stats = stat($self->root->{file}); - $self->root->{end} = $stats[7]; + my @stats = stat($self->_root->{file}); + $self->_root->{end} = $stats[7]; # double-check file inode, in case another process # has optimize()d our file while we were waiting. - if ($stats[1] != $self->root->{inode}) { - $self->_open(); # re-open - flock($self->fh, $type); # re-lock - $self->root->{end} = (stat($self->fh))[7]; # re-end + if ($stats[1] != $self->_root->{inode}) { + $self->{engine}->open( $self ); # re-open + flock($self->_fh, $type); # re-lock + $self->_root->{end} = (stat($self->_fh))[7]; # re-end } } - $self->root->{locked}++; + $self->_root->{locked}++; return 1; } @@ -958,11 +201,11 @@ sub unlock { ## my $self = $_[0]->_get_self; - if (!defined($self->fh)) { return; } + if (!defined($self->_fh)) { return; } - if ($self->root->{locking} && $self->root->{locked} > 0) { - $self->root->{locked}--; - if (!$self->root->{locked}) { flock($self->fh, LOCK_UN); } + if ($self->_root->{locking} && $self->_root->{locked} > 0) { + $self->_root->{locked}--; + if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); } return 1; } @@ -970,27 +213,47 @@ sub unlock { return; } -#XXX These uses of ref() need verified +sub _copy_value { + my $self = shift->_get_self; + my ($spot, $value) = @_; + + if ( !ref $value ) { + ${$spot} = $value; + } + elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) { + my $type = $value->_type; + ${$spot} = $type eq TYPE_HASH ? {} : []; + $value->_copy_node( ${$spot} ); + } + else { + my $r = Scalar::Util::reftype( $value ); + my $c = Scalar::Util::blessed( $value ); + if ( $r eq 'ARRAY' ) { + ${$spot} = [ @{$value} ]; + } + else { + ${$spot} = { %{$value} }; + } + ${$spot} = bless ${$spot}, $c + if defined $c; + } + + return 1; +} + sub _copy_node { ## # Copy single level of keys or elements to new DB handle. # Recurse for nested structures ## - my $self = $_[0]->_get_self; - my $db_temp = $_[1]; + my $self = shift->_get_self; + my ($db_temp) = @_; - if ($self->type eq TYPE_HASH) { + if ($self->_type eq TYPE_HASH) { my $key = $self->first_key(); while ($key) { my $value = $self->get($key); -#XXX This doesn't work with autobless - if (!ref($value)) { $db_temp->{$key} = $value; } - else { - my $type = $value->type; - if ($type eq TYPE_HASH) { $db_temp->{$key} = {}; } - else { $db_temp->{$key} = []; } - $value->_copy_node( $db_temp->{$key} ); - } + $self->_copy_value( \$db_temp->{$key}, $value ); $key = $self->next_key($key); } } @@ -998,16 +261,11 @@ sub _copy_node { my $length = $self->length(); for (my $index = 0; $index < $length; $index++) { my $value = $self->get($index); - if (!ref($value)) { $db_temp->[$index] = $value; } - #XXX NO tests for this code - else { - my $type = $value->type; - if ($type eq TYPE_HASH) { $db_temp->[$index] = {}; } - else { $db_temp->[$index] = []; } - $value->_copy_node( $db_temp->[$index] ); - } + $self->_copy_value( \$db_temp->[$index], $value ); } } + + return 1; } sub export { @@ -1017,8 +275,8 @@ sub export { my $self = $_[0]->_get_self; my $temp; - if ($self->type eq TYPE_HASH) { $temp = {}; } - elsif ($self->type eq TYPE_ARRAY) { $temp = []; } + if ($self->_type eq TYPE_HASH) { $temp = {}; } + elsif ($self->_type eq TYPE_ARRAY) { $temp = []; } $self->lock(); $self->_copy_node( $temp ); @@ -1044,15 +302,15 @@ sub import { ## shift @_; - if ($self->type eq TYPE_HASH) { $struct = {@_}; } - elsif ($self->type eq TYPE_ARRAY) { $struct = [@_]; } + if ($self->_type eq TYPE_HASH) { $struct = {@_}; } + elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; } } my $r = Scalar::Util::reftype($struct) || ''; - if ($r eq "HASH" && $self->type eq TYPE_HASH) { + if ($r eq "HASH" && $self->_type eq TYPE_HASH) { foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); } } - elsif ($r eq "ARRAY" && $self->type eq TYPE_ARRAY) { + elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) { $self->push( @$struct ); } else { @@ -1070,13 +328,13 @@ sub optimize { my $self = $_[0]->_get_self; #XXX Need to create a new test for this -# if ($self->root->{links} > 1) { +# if ($self->_root->{links} > 1) { # return $self->_throw_error("Cannot optimize: reference count is greater than 1"); # } my $db_temp = DBM::Deep->new( - file => $self->root->{file} . '.tmp', - type => $self->type + file => $self->_root->{file} . '.tmp', + type => $self->_type ); if (!$db_temp) { return $self->_throw_error("Cannot optimize: failed to open temp file: $!"); @@ -1089,12 +347,12 @@ sub optimize { ## # Attempt to copy user, group and permissions over to new file ## - my @stats = stat($self->fh); + my @stats = stat($self->_fh); my $perms = $stats[2] & 07777; my $uid = $stats[4]; my $gid = $stats[5]; - chown( $uid, $gid, $self->root->{file} . '.tmp' ); - chmod( $perms, $self->root->{file} . '.tmp' ); + chown( $uid, $gid, $self->_root->{file} . '.tmp' ); + chmod( $perms, $self->_root->{file} . '.tmp' ); # q.v. perlport for more information on this variable if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) { @@ -1105,18 +363,18 @@ sub optimize { # with a soft copy. ## $self->unlock(); - $self->_close(); + $self->{engine}->close( $self ); } - if (!rename $self->root->{file} . '.tmp', $self->root->{file}) { - unlink $self->root->{file} . '.tmp'; + if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) { + unlink $self->_root->{file} . '.tmp'; $self->unlock(); return $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); } $self->unlock(); - $self->_close(); - $self->_open(); + $self->{engine}->close( $self ); + $self->{engine}->open( $self ); return 1; } @@ -1128,9 +386,9 @@ sub clone { my $self = $_[0]->_get_self; return DBM::Deep->new( - type => $self->type, - base_offset => $self->base_offset, - root => $self->root + type => $self->_type, + base_offset => $self->_base_offset, + root => $self->_root ); } @@ -1151,7 +409,7 @@ sub clone { my $func = $_[2] ? $_[2] : undef; if ( $is_legal_filter{$type} ) { - $self->root->{"filter_$type"} = $func; + $self->_root->{"filter_$type"} = $func; return 1; } @@ -1163,7 +421,7 @@ sub clone { # Accessor methods ## -sub root { +sub _root { ## # Get access to the root structure ## @@ -1171,16 +429,16 @@ sub root { return $self->{root}; } -sub fh { +sub _fh { ## # Get access to the raw fh ## #XXX It will be useful, though, when we split out HASH and ARRAY my $self = $_[0]->_get_self; - return $self->root->{fh}; + return $self->_root->{fh}; } -sub type { +sub _type { ## # Get type of current node (TYPE_HASH or TYPE_ARRAY) ## @@ -1188,7 +446,7 @@ sub type { return $self->{type}; } -sub base_offset { +sub _base_offset { ## # Get base_offset of current node (TYPE_HASH or TYPE_ARRAY) ## @@ -1196,90 +454,23 @@ sub base_offset { return $self->{base_offset}; } -sub error { - ## - # Get last error string, or undef if no error - ## - return $_[0] - #? ( _get_self($_[0])->{root}->{error} or undef ) - ? ( $_[0]->_get_self->{root}->{error} or undef ) - : $@; -} - ## # Utility methods ## sub _throw_error { - ## - # Store error string in self - ## - my $self = $_[0]->_get_self; - my $error_text = $_[1]; - - if ( Scalar::Util::blessed $self ) { - $self->root->{error} = $error_text; - - unless ($self->root->{debug}) { - die "DBM::Deep: $error_text\n"; - } - - warn "DBM::Deep: $error_text\n"; - return; - } - else { - die "DBM::Deep: $error_text\n"; - } -} - -sub clear_error { - ## - # Clear error state - ## - my $self = $_[0]->_get_self; - - undef $self->root->{error}; -} - -sub precalc_sizes { - ## - # Precalculate index, bucket and bucket list sizes - ## - - #XXX I don't like this ... - set_pack() unless defined $LONG_SIZE; - - $INDEX_SIZE = 256 * $LONG_SIZE; - $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE; - $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE; + die "DBM::Deep: $_[1]\n"; } -sub set_pack { - ## - # Set pack/unpack modes (see file header for more) - ## - my ($long_s, $long_p, $data_s, $data_p) = @_; - - $LONG_SIZE = $long_s ? $long_s : 4; - $LONG_PACK = $long_p ? $long_p : 'N'; - - $DATA_LENGTH_SIZE = $data_s ? $data_s : 4; - $DATA_LENGTH_PACK = $data_p ? $data_p : 'N'; - - precalc_sizes(); +sub _is_writable { + my $fh = shift; + (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); } -sub set_digest { - ## - # Set key digest function (default is MD5) - ## - my ($digest_func, $hash_size) = @_; - - $DIGEST_FUNC = $digest_func ? $digest_func : \&Digest::MD5::md5; - $HASH_SIZE = $hash_size ? $hash_size : 16; - - precalc_sizes(); -} +#sub _is_readable { +# my $fh = shift; +# (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); +#} ## # tie() methods (hashes and arrays) @@ -1294,52 +485,52 @@ sub STORE { # User may be storing a hash, in which case we do not want it run # through the filtering system - my $value = ($self->root->{filter_store_value} && !ref($_[2])) - ? $self->root->{filter_store_value}->($_[2]) + my $value = ($self->_root->{filter_store_value} && !ref($_[2])) + ? $self->_root->{filter_store_value}->($_[2]) : $_[2]; - my $md5 = $DIGEST_FUNC->($key); + my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key); - ## - # Make sure file is open - ## - if (!defined($self->fh) && !$self->_open()) { - return; - } - ## + unless ( _is_writable( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } ## # Request exclusive lock for writing ## $self->lock( LOCK_EX ); - my $fh = $self->fh; + my $fh = $self->_fh; ## # Locate offset for bucket list using digest index system ## - my $tag = $self->_load_tag($self->base_offset); + my $tag = $self->{engine}->load_tag($self, $self->_base_offset); if (!$tag) { - $tag = $self->_create_tag($self->base_offset, SIG_INDEX, chr(0) x $INDEX_SIZE); + $tag = $self->{engine}->create_tag($self, $self->_base_offset, SIG_INDEX, chr(0) x $DBM::Deep::Engine::INDEX_SIZE); } my $ch = 0; while ($tag->{signature} ne SIG_BLIST) { my $num = ord(substr($md5, $ch, 1)); - my $new_tag = $self->_index_lookup($tag, $num); + + my $ref_loc = $tag->{offset} + ($num * $DBM::Deep::Engine::LONG_SIZE); + my $new_tag = $self->{engine}->index_lookup($self, $tag, $num); + if (!$new_tag) { - my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE); - seek($fh, $ref_loc, SEEK_SET); - print($fh pack($LONG_PACK, $self->root->{end}) ); + seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET); + print( $fh pack($DBM::Deep::Engine::LONG_PACK, $self->_root->{end}) ); - $tag = $self->_create_tag($self->root->{end}, SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); + $tag = $self->{engine}->create_tag($self, $self->_root->{end}, SIG_BLIST, chr(0) x $DBM::Deep::Engine::BUCKET_LIST_SIZE); + $tag->{ref_loc} = $ref_loc; $tag->{ch} = $ch; + last; } else { - my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE); $tag = $new_tag; + $tag->{ref_loc} = $ref_loc; $tag->{ch} = $ch; } @@ -1349,7 +540,7 @@ sub STORE { ## # Add key/value to bucket list ## - my $result = $self->_add_bucket( $tag, $md5, $key, $value ); + my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value ); $self->unlock(); @@ -1363,19 +554,14 @@ sub FETCH { my $self = shift->_get_self; my $key = shift; - ## - # Make sure file is open - ## - if (!defined($self->fh)) { $self->_open(); } - - my $md5 = $DIGEST_FUNC->($key); + my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key); ## # Request shared lock for reading ## $self->lock( LOCK_SH ); - my $tag = $self->_find_bucket_list( $md5 ); + my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); if (!$tag) { $self->unlock(); return; @@ -1384,15 +570,15 @@ sub FETCH { ## # Get value from bucket list ## - my $result = $self->_get_bucket_value( $tag, $md5 ); + my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 ); $self->unlock(); #XXX What is ref() checking here? #YYY Filters only apply on scalar values, so the ref check is making #YYY sure the fetched bucket is a scalar, not a child hash or array. - return ($result && !ref($result) && $self->root->{filter_fetch_value}) - ? $self->root->{filter_fetch_value}->($result) + return ($result && !ref($result) && $self->_root->{filter_fetch_value}) + ? $self->_root->{filter_fetch_value}->($result) : $result; } @@ -1403,19 +589,14 @@ sub DELETE { my $self = $_[0]->_get_self; my $key = $_[1]; - my $md5 = $DIGEST_FUNC->($key); + my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key); ## - # Make sure file is open - ## - if (!defined($self->fh)) { $self->_open(); } - - ## # Request exclusive lock for writing ## $self->lock( LOCK_EX ); - my $tag = $self->_find_bucket_list( $md5 ); + my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); if (!$tag) { $self->unlock(); return; @@ -1424,12 +605,12 @@ sub DELETE { ## # Delete bucket ## - my $value = $self->_get_bucket_value( $tag, $md5 ); - if ($value && !ref($value) && $self->root->{filter_fetch_value}) { - $value = $self->root->{filter_fetch_value}->($value); + my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 ); + if ($value && !ref($value) && $self->_root->{filter_fetch_value}) { + $value = $self->_root->{filter_fetch_value}->($value); } - my $result = $self->_delete_bucket( $tag, $md5 ); + my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 ); ## # If this object is an array and the key deleted was on the end of the stack, @@ -1448,19 +629,14 @@ sub EXISTS { my $self = $_[0]->_get_self; my $key = $_[1]; - my $md5 = $DIGEST_FUNC->($key); + my $md5 = $DBM::Deep::Engine::DIGEST_FUNC->($key); ## - # Make sure file is open - ## - if (!defined($self->fh)) { $self->_open(); } - - ## # Request shared lock for reading ## $self->lock( LOCK_SH ); - my $tag = $self->_find_bucket_list( $md5 ); + my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); ## # For some reason, the built-in exists() function returns '' for false @@ -1473,7 +649,7 @@ sub EXISTS { ## # Check if bucket exists and return 1 or '' ## - my $result = $self->_bucket_exists( $tag, $md5 ) || ''; + my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || ''; $self->unlock(); @@ -1487,24 +663,19 @@ sub CLEAR { my $self = $_[0]->_get_self; ## - # Make sure file is open - ## - if (!defined($self->fh)) { $self->_open(); } - - ## # Request exclusive lock for writing ## $self->lock( LOCK_EX ); - my $fh = $self->fh; + my $fh = $self->_fh; - seek($fh, $self->base_offset, SEEK_SET); + seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET); if (eof $fh) { $self->unlock(); return; } - $self->_create_tag($self->base_offset, $self->type, chr(0) x $INDEX_SIZE); + $self->{engine}->create_tag($self, $self->_base_offset, $self->_type, chr(0) x $DBM::Deep::Engine::INDEX_SIZE); $self->unlock(); @@ -1531,6 +702,7 @@ sub new { my $self = bless { file => undef, fh => undef, + file_offset => 0, end => 0, autoflush => undef, locking => undef, @@ -1544,6 +716,10 @@ sub new { %$args, }, $class; + if ( $self->{fh} && !$self->{file_offset} ) { + $self->{file_offset} = tell( $self->{fh} ); + } + return $self; } @@ -1684,7 +860,26 @@ DBM::Deep objects. These apply to both the OO- and tie- based approaches. Filename of the DB file to link the handle to. You can pass a full absolute filesystem path, partial path, or a plain filename if the file is in the -current working directory. This is a required parameter. +current working directory. This is a required parameter (though q.v. fh). + +=item * fh + +If you want, you can pass in the fh instead of the file. This is most useful for doing +something like: + + my $db = DBM::Deep->new( { fh => \*DATA } ); + +You are responsible for making sure that the fh has been opened appropriately for your +needs. If you open it read-only and attempt to write, an exception will be thrown. If you +open it write-only or append-only, an exception will be thrown immediately as DBM::Deep +needs to read from the fh. + +=item * file_offset + +This is the offset within the file that the DBM::Deep db starts. Most of the time, you will +not need to set this. However, it's there if you want it. + +If you pass in fh and do not set this, it will be set appropriately. =item * type @@ -1728,15 +923,7 @@ Setting I mode will make all errors non-fatal, dump them out to STDERR, and continue on. This is for debugging purposes only, and probably not what you want. This is an optional parameter, and defaults to 0 (disabled). -=item * fh - -Instead of passing a file path, you can instead pass a handle to an pre-opened -filehandle. Note: Beware of using the magick *DATA handle, as this actually -contains your entire Perl script, as well as the data following the __DATA__ -marker. This will not work, because DBM::Deep uses absolute seek()s into the -file. Instead, consider reading *DATA into an IO::Scalar handle, then passing -in that. Also please note optimize() will NOT work when passing in only a -handle. Pass in a real filename in order to use optimize(). +B: This parameter is considered deprecated and should not be used anymore. =back @@ -1823,6 +1010,10 @@ C, C, C, C and C. =over +=item * new() / clone() + +These are the constructor and copy-functions. + =item * put() / store() Stores a new hash key/value pair, or sets an array element value. Takes two @@ -1871,6 +1062,22 @@ details and workarounds. $db->clear(); # hashes or arrays +=item * lock() / unlock() + +q.v. Locking. + +=item * optimize() + +Recover lost disk space. + +=item * import() / export() + +Data going in and out. + +=item * set_digest() / set_pack() / set_filter() + +q.v. adjusting the interal parameters. + =back =head2 HASHES @@ -2242,24 +1449,12 @@ actually numerical index numbers, and are not filtered. =head1 ERROR HANDLING Most DBM::Deep methods return a true value for success, and call die() on -failure. You can wrap calls in an eval block to catch the die. Also, the -actual error message is stored in an internal scalar, which can be fetched by -calling the C method. +failure. You can wrap calls in an eval block to catch the die. my $db = DBM::Deep->new( "foo.db" ); # create hash eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call print $@; # prints error message - print $db->error(); # prints error message - -You can then call C to clear the current error state. - - $db->clear_error(); - -If you set the C option to true when creating your DBM::Deep object, -all errors are considered NON-FATAL, and dumped to STDERR. This should only -be used for debugging purposes and not production work. DBM::Deep expects errors -to be thrown, not propagated back up the stack. =head1 LARGEFILE SUPPORT @@ -2287,9 +1482,9 @@ indeed work! =head1 LOW-LEVEL ACCESS If you require low-level access to the underlying filehandle that DBM::Deep uses, -you can call the C method, which returns the handle: +you can call the C<_fh()> method, which returns the handle: - my $fh = $db->fh(); + my $fh = $db->_fh(); This method can be called on the root level of the datbase, or any child hashes or arrays. All levels share a I structure, which contains things @@ -2297,7 +1492,7 @@ like the filehandle, a reference counter, and all the options specified when you created the object. You can get access to this root structure by calling the C method. - my $root = $db->root(); + my $root = $db->_root(); This is useful for changing options after the object has already been created, such as enabling/disabling locking, or debug modes. You can also @@ -2456,6 +1651,13 @@ These functions cause every element in the array to move, which can be murder on DBM::Deep, as every element has to be fetched from disk, then stored again in a different location. This will be addressed in the forthcoming version 1.00. +=head2 WRITEONLY FILES + +If you pass in a filehandle to new(), you may have opened it in either a readonly or +writeonly mode. STORE will verify that the filehandle is writable. However, there +doesn't seem to be a good way to determine if a filehandle is readable. And, if the +filehandle isn't readable, it's not clear what will happen. So, don't do that. + =head1 PERFORMANCE This section discusses DBM::Deep's speed and memory usage. @@ -2643,7 +1845,7 @@ plain key are stored. Fetching the plain key occurs when calling the I and I methods. In this process the indexes are walked systematically, and each key fetched in increasing MD5 order (which is why it appears random). Once the -I is found, the value is skipped the plain key returned instead. +I is found, the value is skipped and the plain key returned instead. B Do not count on keys being fetched as if the MD5 hashes were alphabetically sorted. This only happens on an index-level -- as soon as the I are hit, the keys will come out in the order they went in -- @@ -2658,10 +1860,10 @@ B report on this module's test suite. ---------------------------- ------ ------ ------ ------ ------ ------ ------ File stmt bran cond sub pod time total ---------------------------- ------ ------ ------ ------ ------ ------ ------ - blib/lib/DBM/Deep.pm 93.9 82.5 70.0 96.5 33.3 84.3 88.1 - blib/lib/DBM/Deep/Array.pm 98.8 88.9 87.5 100.0 n/a 9.0 96.4 - blib/lib/DBM/Deep/Hash.pm 95.2 80.0 100.0 100.0 n/a 6.7 92.3 - Total 95.0 83.4 73.8 97.6 33.3 100.0 89.9 + blib/lib/DBM/Deep.pm 95.2 83.8 70.0 98.2 100.0 58.0 91.0 + blib/lib/DBM/Deep/Array.pm 100.0 91.1 100.0 100.0 n/a 26.7 98.0 + blib/lib/DBM/Deep/Hash.pm 95.3 80.0 100.0 100.0 n/a 15.3 92.4 + Total 96.2 84.8 74.4 98.8 100.0 100.0 92.4 ---------------------------- ------ ------ ------ ------ ------ ------ ------ =head1 MORE INFORMATION