From: rkinyon Date: Tue, 14 Feb 2006 20:08:51 +0000 (+0000) Subject: Tagged 0.96 X-Git-Tag: 0-96^0 X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=fa77c6b86e92612ce4310924688cee45390a4eb3;p=dbsrgits%2FDBM-Deep.git Tagged 0.96 --- fa77c6b86e92612ce4310924688cee45390a4eb3 diff --git a/Changes b/Changes new file mode 100644 index 0000000..6a7266c --- /dev/null +++ b/Changes @@ -0,0 +1,90 @@ +Revision history for DBM::Deep. + +0.96 Oct 14 09:55:00 2005 Pacific + - Fixed build (OS X hidden files killed it) + - You can now pass in an optional filehandle to the constructor + +0.95 Oct 12 13:58:00 2005 Pacific + - Added optional autobless flag to preserve and restore blessed hashes + - Fixed bug where 0 could not be fetched using get_next_key + - Fixed bug where tie() constructor didn't accept a hash ref for args + - optimize() now preserves user/group/permissions + - Errors are now FATAL (meaning it calls die()), unless you set debug flag + +0.94 Apr 13 19:00:26 2004 Pacific + - Fixed bug reported by John Cardenas (corruption at key level when + replace of less data was done on bucket) + +0.93 Feb 15 19:53:17 2004 Pacific + - Fixed optmize() on Win32 where orig file couldn't be overwritten unless + filehandle was closed first. This change introduces a potential race + condition when using locking and optmize() on Win32, but it can be + fixed in the future using a soft copy instead of Perl's rename(). + +0.92 Feb 12 19:10:22 2004 Pacific + - Fixed bug where passing a reference to a different DBM::Deep object + would still result in an internal reference. + - Added export() method for recursively extracting hashes/arrays into + standard in-memory Perl structures. + - Added import() method for recursively importing existing Perl hash/ + array structures + - Fixed bug where optimize() wouldn't work if base level of DB was + an array instead of a hash. + +0.91 Feb 12 02:30:22 2004 Pacific + - Fixed bug with splice() when length of removed section was 0 + - Updated POD re: circular refs and optimize() + - Had to jump version numbers to 0.91 because in previous releases + I used only a single digit after the decimal which was confusing + the CPAN indexer. + +0.10 Feb 11 08:58:35 2004 Pacific + - Fixed bug where default file mode was CLEARING files (Thanks Rich!) + - Added experimental support for circular references + - Fixed bugs in shift(), unshift() and splice() where nested objects + in array would be recursively re-stored as basic hashes/arrays + - Fixed typos in POD docs + +0.9 Feb 10 03:25:48 2004 Pacific + - Added Filters for storing/fetching keys/values + - Added hook for supplying own hashing algorithm + - FIxed some typos in POD docs, added new sections + +0.8 Feb 8 02:38:22 2004 Pacific + - Renamed to DBM::Deep for CPAN + - Added optimize() method for rekindling unused space + - Now returning hybrid tie()/OO object from new() + - Basic error handling introduced + - Added debug mode for printing errors to STDERR + - Added TYPE_HASH and TYPE_ARRAY constants for "type" param + - Added clone() method for safe copying of objects + - Wrote POD documentation + - Added set_pack() function for manipulating LONG_SIZE / LONG_PACK + - Added aliases for most tied functions for public use + - Now setting binmode() on FileHandle for Win32 + - Added 45 unit tests + +0.7 Jan 4 11:31:50 2003 UTC + - Renamed to DeepDB + - Changed file signature to DPDB (not compatible with older versions) + - Converted array length to packed long instead of sprintf()ed string + +0.6 Dec 31 15:12:03 2002 UTC + - Some misc optimizations for speed + +0.5 Oct 18 08:55:29 2002 UTC + - support for force_return_next parameter in traverse_index() method for + ultra-fast combined key search/removal + +0.4 Oct 15 20:07:47 2002 UTC + - now making sure filehandle is open for all DB calls + +0.3 Oct 3 19:04:13 2002 UTC + - fixed bug that could cause corrupted data when using locking + +0.2 Aug 6 16:37:32 2002 UTC + - Removed base index caching, as it can cause problems when two processes + are populating the db at the same time (even with locking) + +0.1 Jun 3 08:06:26 2002 UTC + - initial release diff --git a/Deep.pm b/Deep.pm new file mode 100644 index 0000000..5022e91 --- /dev/null +++ b/Deep.pm @@ -0,0 +1,2814 @@ +package DBM::Deep; + +## +# DBM::Deep +# +# Description: +# Multi-level database module for storing hash trees, arrays and simple +# key/value pairs into FTP-able, cross-platform binary database files. +# +# Type `perldoc DBM::Deep` for complete documentation. +# +# Usage Examples: +# my %db; +# tie %db, 'DBM::Deep', 'my_database.db'; # standard tie() method +# +# my $db = new DBM::Deep( 'my_database.db' ); # preferred OO method +# +# $db->{my_scalar} = 'hello world'; +# $db->{my_hash} = { larry => 'genius', hashes => 'fast' }; +# $db->{my_array} = [ 1, 2, 3, time() ]; +# $db->{my_complex} = [ 'hello', { perl => 'rules' }, 42, 99 ]; +# push @{$db->{my_array}}, 'another value'; +# my @key_list = keys %{$db->{my_hash}}; +# print "This module " . $db->{my_complex}->[1]->{perl} . "!\n"; +# +# Copyright: +# (c) 2002-2005 Joseph Huckaby. All Rights Reserved. +# This program is free software; you can redistribute it and/or +# modify it under the same terms as Perl itself. +## + +use strict; +use Config; +use FileHandle; +use Fcntl qw/:flock/; +use Digest::MD5 qw/md5/; +use UNIVERSAL qw/isa/; +use vars qw/$VERSION/; + +$VERSION = "0.96"; + +## +# Set to 4 and 'N' for 32-bit offset tags (default). Theoretical limit of 4 GB per file. +# (Perl must be compiled with largefile support for files > 2 GB) +# +# Set to 8 and 'Q' for 64-bit offsets. Theoretical limit of 16 XB per file. +# (Perl must be compiled with largefile and 64-bit long support) +## +my $LONG_SIZE = 4; +my $LONG_PACK = 'N'; + +## +# Set to 4 and 'N' for 32-bit data length prefixes. Limit of 4 GB for each key/value. +# Upgrading this is possible (see above) but probably not necessary. If you need +# more than 4 GB for a single key or value, this module is really not for you :-) +## +my $DATA_LENGTH_SIZE = 4; +my $DATA_LENGTH_PACK = 'N'; + +## +# Maximum number of buckets per list before another level of indexing is done. +# Increase this value for slightly greater speed, but larger database files. +# DO NOT decrease this value below 16, due to risk of recursive reindex overrun. +## +my $MAX_BUCKETS = 16; + +## +# Better not adjust anything below here, unless you're me :-) +## + +## +# Setup digest function for keys +## +my $DIGEST_FUNC = \&md5; + +## +# Precalculate index and bucket sizes based on values above. +## +my $HASH_SIZE = 16; +my $INDEX_SIZE = 256 * $LONG_SIZE; +my $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE; +my $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE; + +## +# Setup file and tag signatures. These should never change. +## +my $SIG_FILE = 'DPDB'; +my $SIG_HASH = 'H'; +my $SIG_ARRAY = 'A'; +my $SIG_NULL = 'N'; +my $SIG_DATA = 'D'; +my $SIG_INDEX = 'I'; +my $SIG_BLIST = 'B'; +my $SIG_SIZE = 1; + +## +# Setup constants for users to pass to new() +## +sub TYPE_HASH { return $SIG_HASH; } +sub TYPE_ARRAY { return $SIG_ARRAY; } + +sub new { + ## + # Class constructor method for Perl OO interface. + # Calls tie() and returns blessed reference to tied hash or array, + # providing a hybrid OO/tie interface. + ## + my $class = shift; + my $args; + if (scalar(@_) > 1) { $args = {@_}; } + else { $args = { file => shift }; } + + ## + # Check if we want a tied hash or array. + ## + my $self; + if (defined($args->{type}) && $args->{type} eq $SIG_ARRAY) { + tie @$self, $class, %$args; + } + else { + tie %$self, $class, %$args; + } + + return bless $self, $class; +} + +sub init { + ## + # Setup $self and bless into this class. + ## + my $class = shift; + my $args = shift; + + my $self = { + type => $args->{type} || $SIG_HASH, + base_offset => $args->{base_offset} || length($SIG_FILE), + root => $args->{root} || { + file => $args->{file} || undef, + fh => $args->{fh} || undef, + end => 0, + links => 0, + autoflush => $args->{autoflush} || undef, + locking => $args->{locking} || undef, + volatile => $args->{volatile} || undef, + debug => $args->{debug} || undef, + mode => $args->{mode} || 'r+', + filter_store_key => $args->{filter_store_key} || undef, + filter_store_value => $args->{filter_store_value} || undef, + filter_fetch_key => $args->{filter_fetch_key} || undef, + filter_fetch_value => $args->{filter_fetch_value} || undef, + autobless => $args->{autobless} || undef, + locked => 0 + } + }; + $self->{root}->{links}++; + + bless $self, $class; + + if (!defined($self->{root}->{fh})) { $self->open(); } + + return $self; +} + +sub TIEHASH { + ## + # Tied hash constructor method, called by Perl's tie() function. + ## + my $class = shift; + my $args; + if (scalar(@_) > 1) { $args = {@_}; } + elsif (ref($_[0])) { $args = $_[0]; } + else { $args = { file => shift }; } + + return $class->init($args); +} + +sub TIEARRAY { + ## + # Tied array constructor method, called by Perl's tie() function. + ## + my $class = shift; + my $args; + if (scalar(@_) > 1) { $args = {@_}; } + elsif (ref($_[0])) { $args = $_[0]; } + else { $args = { file => shift }; } + + return $class->init($args); +} + +sub DESTROY { + ## + # Class deconstructor. Close file handle if there are no more refs. + ## + my $self = tied( %{$_[0]} ) || return; + + $self->{root}->{links}--; + + if (!$self->{root}->{links}) { + $self->close(); + } +} + +sub open { + ## + # Open a FileHandle to the database, create if nonexistent. + # Make sure file signature matches DeepDB spec. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + + if (defined($self->{root}->{fh})) { $self->close(); } + + if (!(-e $self->{root}->{file}) && $self->{root}->{mode} eq 'r+') { + my $temp = new FileHandle $self->{root}->{file}, 'w'; + undef $temp; + } + + $self->{root}->{fh} = new FileHandle $self->{root}->{file}, $self->{root}->{mode}; + if (defined($self->{root}->{fh})) { + binmode $self->{root}->{fh}; # for win32 + if ($self->{root}->{autoflush}) { $self->{root}->{fh}->autoflush(); } + + my $signature; + seek($self->{root}->{fh}, 0, 0); + my $bytes_read = $self->{root}->{fh}->read($signature, length($SIG_FILE)); + + ## + # File is empty -- write signature and master index + ## + if (!$bytes_read) { + seek($self->{root}->{fh}, 0, 0); + $self->{root}->{fh}->print($SIG_FILE); + $self->{root}->{end} = length($SIG_FILE); + $self->create_tag($self->{base_offset}, $self->{type}, chr(0) x $INDEX_SIZE); + my $plain_key = "[base]"; + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); + $self->{root}->{end} += $DATA_LENGTH_SIZE + length($plain_key); + $signature = $SIG_FILE; + $self->{root}->{fh}->flush(); + } + + ## + # Check signature was valid + ## + if ($signature eq $SIG_FILE) { + $self->{root}->{end} = (stat($self->{root}->{fh}))[7]; + + ## + # Get our type from master index signature + ## + my $tag = $self->load_tag($self->{base_offset}); + $self->{type} = $tag->{signature}; + + return 1; + } + else { + $self->close(); + $self->throw_error("Signature not found -- file is not a Deep DB"); + } + } + else { + $self->throw_error("Cannot open file: " . $self->{root}->{file} . ": $!"); + } + + return undef; +} + +sub close { + ## + # Close database FileHandle + ## + my $self = tied( %{$_[0]} ) || $_[0]; + undef $self->{root}->{fh}; +} + +sub create_tag { + ## + # Given offset, signature and content, create tag and write to disk + ## + my ($self, $offset, $sig, $content) = @_; + my $size = length($content); + + seek($self->{root}->{fh}, $offset, 0); + $self->{root}->{fh}->print( $sig . pack($DATA_LENGTH_PACK, $size) . $content ); + + if ($offset == $self->{root}->{end}) { + $self->{root}->{end} += $SIG_SIZE + $DATA_LENGTH_SIZE + $size; + } + + return { + signature => $sig, + size => $size, + offset => $offset + $SIG_SIZE + $DATA_LENGTH_SIZE, + content => $content + }; +} + +sub load_tag { + ## + # Given offset, load single tag and return signature, size and data + ## + my $self = shift; + my $offset = shift; + + seek($self->{root}->{fh}, $offset, 0); + if ($self->{root}->{fh}->eof()) { return undef; } + + my $sig; + $self->{root}->{fh}->read($sig, $SIG_SIZE); + + my $size; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); + $size = unpack($DATA_LENGTH_PACK, $size); + + my $buffer; + $self->{root}->{fh}->read($buffer, $size); + + return { + signature => $sig, + size => $size, + offset => $offset + $SIG_SIZE + $DATA_LENGTH_SIZE, + content => $buffer + }; +} + +sub index_lookup { + ## + # Given index tag, lookup single entry in index and return . + ## + my $self = shift; + my ($tag, $index) = @_; + + my $location = unpack($LONG_PACK, substr($tag->{content}, $index * $LONG_SIZE, $LONG_SIZE) ); + if (!$location) { return undef; } + + return $self->load_tag( $location ); +} + +sub add_bucket { + ## + # Adds one key/value pair to bucket list, given offset, MD5 digest of key, + # plain (undigested) key and value. + ## + my $self = shift; + my ($tag, $md5, $plain_key, $value) = @_; + my $keys = $tag->{content}; + my $location = 0; + my $result = 2; + my $internal_ref = isa($value, "DBM::Deep") && ($value->root() eq $self->{root}); + + ## + # Iterate through buckets, seeing if this is a new entry or a replace. + ## + for (my $i=0; $i<$MAX_BUCKETS; $i++) { + my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); + my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + if (!$subloc) { + ## + # Found empty bucket (end of list). Populate and exit loop. + ## + $result = 2; + + if ($internal_ref) { $location = $value->base_offset(); } + else { $location = $self->{root}->{end}; } + + seek($self->{root}->{fh}, $tag->{offset} + ($i * $BUCKET_SIZE), 0); + $self->{root}->{fh}->print( $md5 . pack($LONG_PACK, $location) ); + last; + } + elsif ($md5 eq $key) { + ## + # Found existing bucket with same key. Replace with new value. + ## + $result = 1; + + if ($internal_ref) { + $location = $value->base_offset(); + seek($self->{root}->{fh}, $tag->{offset} + ($i * $BUCKET_SIZE), 0); + $self->{root}->{fh}->print( $md5 . pack($LONG_PACK, $location) ); + } + else { + seek($self->{root}->{fh}, $subloc + $SIG_SIZE, 0); + my $size; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); + + ## + # If value is a hash, array, or raw value with equal or less size, we can + # reuse the same content area of the database. Otherwise, we have to create + # a new content area at the EOF. + ## + my $actual_length; + if (isa($value, 'HASH') || isa($value, 'ARRAY')) { $actual_length = $INDEX_SIZE; } + else { $actual_length = length($value); } + + if ($actual_length <= $size) { + $location = $subloc; + } + else { + $location = $self->{root}->{end}; + seek($self->{root}->{fh}, $tag->{offset} + ($i * $BUCKET_SIZE) + $HASH_SIZE, 0); + $self->{root}->{fh}->print( pack($LONG_PACK, $location) ); + } + } + last; + } + } # i loop + + ## + # If this is an internal reference, return now. + # No need to write value or plain key + ## + if ($internal_ref) { return $result; } + + ## + # If bucket didn't fit into list, split into a new index level + ## + if (!$location) { + seek($self->{root}->{fh}, $tag->{ref_loc}, 0); + $self->{root}->{fh}->print( pack($LONG_PACK, $self->{root}->{end}) ); + + my $index_tag = $self->create_tag($self->{root}->{end}, $SIG_INDEX, chr(0) x $INDEX_SIZE); + my @offsets = (); + + if ($internal_ref) { + $keys .= $md5 . pack($LONG_PACK, $value->base_offset()); + $location = $value->base_offset(); + } + else { $keys .= $md5 . pack($LONG_PACK, 0); } + + for (my $i=0; $i<=$MAX_BUCKETS; $i++) { + my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); + if ($key) { + my $old_subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + my $num = ord(substr($key, $tag->{ch} + 1, 1)); + + if ($offsets[$num]) { + my $offset = $offsets[$num] + $SIG_SIZE + $DATA_LENGTH_SIZE; + seek($self->{root}->{fh}, $offset, 0); + my $subkeys; + $self->{root}->{fh}->read($subkeys, $BUCKET_LIST_SIZE); + + for (my $k=0; $k<$MAX_BUCKETS; $k++) { + my $subloc = unpack($LONG_PACK, substr($subkeys, ($k * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + if (!$subloc) { + seek($self->{root}->{fh}, $offset + ($k * $BUCKET_SIZE), 0); + $self->{root}->{fh}->print( $key . pack($LONG_PACK, $old_subloc || $self->{root}->{end}) ); + last; + } + } # k loop + } + else { + $offsets[$num] = $self->{root}->{end}; + seek($self->{root}->{fh}, $index_tag->{offset} + ($num * $LONG_SIZE), 0); + $self->{root}->{fh}->print( pack($LONG_PACK, $self->{root}->{end}) ); + + my $blist_tag = $self->create_tag($self->{root}->{end}, $SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); + + seek($self->{root}->{fh}, $blist_tag->{offset}, 0); + $self->{root}->{fh}->print( $key . pack($LONG_PACK, $old_subloc || $self->{root}->{end}) ); + } + } # key is real + } # i loop + + $location ||= $self->{root}->{end}; + } # re-index bucket list + + ## + # Seek to content area and store signature, value and plaintext key + ## + if ($location) { + my $content_length; + seek($self->{root}->{fh}, $location, 0); + + ## + # Write signature based on content type, set content length and write actual value. + ## + if (isa($value, 'HASH')) { + $self->{root}->{fh}->print( $SIG_HASH ); + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); + $content_length = $INDEX_SIZE; + } + elsif (isa($value, 'ARRAY')) { + $self->{root}->{fh}->print( $SIG_ARRAY ); + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, $INDEX_SIZE) . chr(0) x $INDEX_SIZE ); + $content_length = $INDEX_SIZE; + } + elsif (!defined($value)) { + $self->{root}->{fh}->print( $SIG_NULL ); + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, 0) ); + $content_length = 0; + } + else { + $self->{root}->{fh}->print( $SIG_DATA ); + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, length($value)) . $value ); + $content_length = length($value); + } + + ## + # Plain key is stored AFTER value, as keys are typically fetched less often. + ## + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, length($plain_key)) . $plain_key ); + + ## + # If value is blessed, preserve class name + ## + my $value_ref = ref($value); + if ($self->{root}->{autobless} && $value_ref) { + if ($value_ref !~ /^(HASH|ARRAY|DBM::Deep)$/) { + ## + # Blessed ref -- will restore later + ## + $self->{root}->{fh}->print( chr(1) ); + $self->{root}->{fh}->print( pack($DATA_LENGTH_PACK, length($value_ref)) . $value_ref ); + $content_length += 1; + $content_length += $DATA_LENGTH_SIZE + length($value_ref); + } + else { + ## + # Simple unblessed ref -- no restore needed + ## + $self->{root}->{fh}->print( chr(0) ); + $content_length += 1; + } + } + + ## + # If this is a new content area, advance EOF counter + ## + if ($location == $self->{root}->{end}) { + $self->{root}->{end} += $SIG_SIZE; + $self->{root}->{end} += $DATA_LENGTH_SIZE + $content_length; + $self->{root}->{end} += $DATA_LENGTH_SIZE + length($plain_key); + } + + ## + # If content is a hash or array, create new child DeepDB object and + # pass each key or element to it. + ## + if (isa($value, 'HASH')) { + my $branch = new DBM::Deep( + type => $SIG_HASH, + base_offset => $location, + root => $self->{root} + ); + foreach my $key (keys %{$value}) { + $branch->{$key} = $value->{$key}; + } + } + elsif (isa($value, 'ARRAY')) { + my $branch = new DBM::Deep( + type => $SIG_ARRAY, + base_offset => $location, + root => $self->{root} + ); + my $index = 0; + foreach my $element (@{$value}) { + $branch->[$index] = $element; + $index++; + } + } + + return $result; + } + + return $self->throw_error("Fatal error: indexing failed -- possibly due to corruption in file"); +} + +sub get_bucket_value { + ## + # Fetch single value given tag and MD5 digested key. + ## + my $self = shift; + my ($tag, $md5) = @_; + my $keys = $tag->{content}; + + ## + # Iterate through buckets, looking for a key match + ## + for (my $i=0; $i<$MAX_BUCKETS; $i++) { + my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); + my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + + if (!$subloc) { + ## + # Hit end of list, no match + ## + return undef; + } + elsif ($md5 eq $key) { + ## + # Found match -- seek to offset and read signature + ## + my $signature; + seek($self->{root}->{fh}, $subloc, 0); + $self->{root}->{fh}->read($signature, $SIG_SIZE); + + ## + # If value is a hash or array, return new DeepDB object with correct offset + ## + if (($signature eq $SIG_HASH) || ($signature eq $SIG_ARRAY)) { + my $obj = new DBM::Deep( + type => $signature, + base_offset => $subloc, + root => $self->{root} + ); + + if ($self->{root}->{autobless}) { + ## + # Skip over value and plain key to see if object needs + # to be re-blessed + ## + seek($self->{root}->{fh}, $DATA_LENGTH_SIZE + $INDEX_SIZE, 1); + + my $size; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); + if ($size) { seek($self->{root}->{fh}, $size, 1); } + + my $bless_bit; + $self->{root}->{fh}->read($bless_bit, 1); + if (ord($bless_bit)) { + ## + # Yes, object needs to be re-blessed + ## + my $class_name; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); + if ($size) { $self->{root}->{fh}->read($class_name, $size); } + if ($class_name) { $obj = bless( $obj, $class_name ); } + } + } + + return $obj; + } + + ## + # Otherwise return actual value + ## + elsif ($signature eq $SIG_DATA) { + my $size; + my $value = ''; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); + if ($size) { $self->{root}->{fh}->read($value, $size); } + return $value; + } + + ## + # Key exists, but content is null + ## + else { return undef; } + } + } # i loop + + return undef; +} + +sub delete_bucket { + ## + # Delete single key/value pair given tag and MD5 digested key. + ## + my $self = shift; + my ($tag, $md5) = @_; + my $keys = $tag->{content}; + + ## + # Iterate through buckets, looking for a key match + ## + for (my $i=0; $i<$MAX_BUCKETS; $i++) { + my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); + my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + + if (!$subloc) { + ## + # Hit end of list, no match + ## + return undef; + } + elsif ($md5 eq $key) { + ## + # Matched key -- delete bucket and return + ## + seek($self->{root}->{fh}, $tag->{offset} + ($i * $BUCKET_SIZE), 0); + $self->{root}->{fh}->print( substr($keys, ($i+1) * $BUCKET_SIZE ) ); + $self->{root}->{fh}->print( chr(0) x $BUCKET_SIZE ); + + return 1; + } + } # i loop + + return undef; +} + +sub bucket_exists { + ## + # Check existence of single key given tag and MD5 digested key. + ## + my $self = shift; + my ($tag, $md5) = @_; + my $keys = $tag->{content}; + + ## + # Iterate through buckets, looking for a key match + ## + for (my $i=0; $i<$MAX_BUCKETS; $i++) { + my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); + my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + + if (!$subloc) { + ## + # Hit end of list, no match + ## + return undef; + } + elsif ($md5 eq $key) { + ## + # Matched key -- return true + ## + return 1; + } + } # i loop + + return undef; +} + +sub find_bucket_list { + ## + # Locate offset for bucket list, given digested key + ## + my $self = shift; + my $md5 = shift; + + ## + # Locate offset for bucket list using digest index system + ## + my $ch = 0; + my $tag = $self->load_tag($self->{base_offset}); + if (!$tag) { return undef; } + + while ($tag->{signature} ne $SIG_BLIST) { + $tag = $self->index_lookup($tag, ord(substr($md5, $ch, 1))); + if (!$tag) { return undef; } + $ch++; + } + + return $tag; +} + +sub traverse_index { + ## + # Scan index and recursively step into deeper levels, looking for next key. + ## + my $self = shift; + my $offset = shift; + my $ch = shift; + my $force_return_next = shift || undef; + + my $tag = $self->load_tag( $offset ); + + if ($tag->{signature} ne $SIG_BLIST) { + my $content = $tag->{content}; + my $start; + if ($self->{return_next}) { $start = 0; } + else { $start = ord(substr($self->{prev_md5}, $ch, 1)); } + + for (my $index = $start; $index < 256; $index++) { + my $subloc = unpack($LONG_PACK, substr($content, $index * $LONG_SIZE, $LONG_SIZE) ); + if ($subloc) { + my $result = $self->traverse_index( $subloc, $ch + 1, $force_return_next ); + if (defined($result)) { return $result; } + } + } # index loop + + $self->{return_next} = 1; + } # tag is an index + + elsif ($tag->{signature} eq $SIG_BLIST) { + my $keys = $tag->{content}; + if ($force_return_next) { $self->{return_next} = 1; } + + ## + # Iterate through buckets, looking for a key match + ## + for (my $i=0; $i<$MAX_BUCKETS; $i++) { + my $key = substr($keys, $i * $BUCKET_SIZE, $HASH_SIZE); + my $subloc = unpack($LONG_PACK, substr($keys, ($i * $BUCKET_SIZE) + $HASH_SIZE, $LONG_SIZE)); + + if (!$subloc) { + ## + # End of bucket list -- return to outer loop + ## + $self->{return_next} = 1; + last; + } + elsif ($key eq $self->{prev_md5}) { + ## + # Located previous key -- return next one found + ## + $self->{return_next} = 1; + next; + } + elsif ($self->{return_next}) { + ## + # Seek to bucket location and skip over signature + ## + seek($self->{root}->{fh}, $subloc + $SIG_SIZE, 0); + + ## + # Skip over value to get to plain key + ## + my $size; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); + if ($size) { seek($self->{root}->{fh}, $size, 1); } + + ## + # Read in plain key and return as scalar + ## + my $plain_key; + $self->{root}->{fh}->read($size, $DATA_LENGTH_SIZE); $size = unpack($DATA_LENGTH_PACK, $size); + if ($size) { $self->{root}->{fh}->read($plain_key, $size); } + + return $plain_key; + } + } # bucket loop + + $self->{return_next} = 1; + } # tag is a bucket list + + return undef; +} + +sub get_next_key { + ## + # Locate next key, given digested previous one + ## + my $self = tied( %{$_[0]} ) || $_[0]; + + $self->{prev_md5} = $_[1] || undef; + $self->{return_next} = 0; + + ## + # If the previous key was not specifed, start at the top and + # return the first one found. + ## + if (!$self->{prev_md5}) { + $self->{prev_md5} = chr(0) x $HASH_SIZE; + $self->{return_next} = 1; + } + + return $self->traverse_index( $self->{base_offset}, 0 ); +} + +sub lock { + ## + # If db locking is set, flock() the db file. If called multiple + # times before unlock(), then the same number of unlocks() must + # be called before the lock is released. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $type = $_[1] || LOCK_EX; + + if ($self->{root}->{locking}) { + if (!$self->{root}->{locked}) { flock($self->{root}->{fh}, $type); } + $self->{root}->{locked}++; + } +} + +sub unlock { + ## + # If db locking is set, unlock the db file. See note in lock() + # regarding calling lock() multiple times. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $type = $_[1]; + + if ($self->{root}->{locking} && $self->{root}->{locked} > 0) { + $self->{root}->{locked}--; + if (!$self->{root}->{locked}) { flock($self->{root}->{fh}, LOCK_UN); } + } +} + +sub copy_node { + ## + # Copy single level of keys or elements to new DB handle. + # Recurse for nested structures + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $db_temp = $_[1]; + + if ($self->{type} eq $SIG_HASH) { + my $key = $self->first_key(); + while ($key) { + my $value = $self->get($key); + if (!ref($value)) { $db_temp->{$key} = $value; } + else { + my $type = $value->type(); + if ($type eq $SIG_HASH) { $db_temp->{$key} = {}; } + else { $db_temp->{$key} = []; } + $value->copy_node( $db_temp->{$key} ); + } + $key = $self->next_key($key); + } + } + else { + my $length = $self->length(); + for (my $index = 0; $index < $length; $index++) { + my $value = $self->get($index); + if (!ref($value)) { $db_temp->[$index] = $value; } + else { + my $type = $value->type(); + if ($type eq $SIG_HASH) { $db_temp->[$index] = {}; } + else { $db_temp->[$index] = []; } + $value->copy_node( $db_temp->[$index] ); + } + } + } +} + +sub export { + ## + # Recursively export into standard Perl hashes and arrays. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + + my $temp; + if ($self->{type} eq $SIG_HASH) { $temp = {}; } + elsif ($self->{type} eq $SIG_ARRAY) { $temp = []; } + + $self->lock(); + $self->copy_node( $temp ); + $self->unlock(); + + return $temp; +} + +sub import { + ## + # Recursively import Perl hash/array structure + ## + if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore + + my $self = tied( %{$_[0]} ) || $_[0]; + my $struct = $_[1]; + + if (!ref($struct)) { + ## + # struct is not a reference, so just import based on our type + ## + shift @_; + + if ($self->{type} eq $SIG_HASH) { $struct = {@_}; } + elsif ($self->{type} eq $SIG_ARRAY) { $struct = [@_]; } + } + + if (isa($struct, "HASH") && $self->{type} eq $SIG_HASH) { + foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); } + } + elsif (isa($struct, "ARRAY") && $self->{type} eq $SIG_ARRAY) { + $self->push( @$struct ); + } + else { + return $self->throw_error("Cannot import: type mismatch"); + } + + return 1; +} + +sub optimize { + ## + # Rebuild entire database into new file, then move + # it back on top of original. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{root}->{links} > 1) { + return $self->throw_error("Cannot optimize: reference count is greater than 1"); + } + + my $db_temp = new DBM::Deep( + file => $self->{root}->{file} . '.tmp', + type => $self->{type} + ); + if (!$db_temp) { + return $self->throw_error("Cannot optimize: failed to open temp file: $!"); + } + + $self->lock(); + $self->copy_node( $db_temp ); + undef $db_temp; + + ## + # Attempt to copy user, group and permissions over to new file + ## + my @stats = stat($self->{root}->{fh}); + my $perms = $stats[2] & 07777; + my $uid = $stats[4]; + my $gid = $stats[5]; + chown( $uid, $gid, $self->{root}->{file} . '.tmp' ); + chmod( $perms, $self->{root}->{file} . '.tmp' ); + + if ($Config{'osname'} =~ /win/i) { + ## + # Potential race condition when optmizing on Win32 with locking. + # The Windows filesystem requires that the filehandle be closed + # before it is overwritten with rename(). This could be redone + # with a soft copy. + ## + $self->unlock(); + $self->close(); + } + + if (!rename $self->{root}->{file} . '.tmp', $self->{root}->{file}) { + unlink $self->{root}->{file} . '.tmp'; + $self->unlock(); + return $self->throw_error("Optimize failed: Cannot copy temp file over original: $!"); + } + + $self->unlock(); + $self->close(); + $self->open(); + + return 1; +} + +sub clone { + ## + # Make copy of object and return + ## + my $self = tied( %{$_[0]} ) || $_[0]; + + return new DBM::Deep( + type => $self->{type}, + base_offset => $self->{base_offset}, + root => $self->{root} + ); +} + +sub set_filter { + ## + # Setup filter function for storing or fetching the key or value + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $type = $_[1]; + my $func = $_[2] || undef; + + if ($type =~ /store_key/i) { $self->{root}->{filter_store_key} = $func; return 1; } + elsif ($type =~ /store_value/i) { $self->{root}->{filter_store_value} = $func; return 1; } + elsif ($type =~ /fetch_key/i) { $self->{root}->{filter_fetch_key} = $func; return 1; } + elsif ($type =~ /fetch_value/i) { $self->{root}->{filter_fetch_value} = $func; return 1; } + + return undef; +} + +## +# Accessor methods +## + +sub root { + ## + # Get access to the root structure + ## + my $self = tied( %{$_[0]} ) || $_[0]; + return $self->{root}; +} + +sub fh { + ## + # Get access to the raw FileHandle + ## + my $self = tied( %{$_[0]} ) || $_[0]; + return $self->{root}->{fh}; +} + +sub type { + ## + # Get type of current node ($SIG_HASH or $SIG_ARRAY) + ## + my $self = tied( %{$_[0]} ) || $_[0]; + return $self->{type}; +} + +sub base_offset { + ## + # Get base_offset of current node ($SIG_HASH or $SIG_ARRAY) + ## + my $self = tied( %{$_[0]} ) || $_[0]; + return $self->{base_offset}; +} + +sub error { + ## + # Get last error string, or undef if no error + ## + my $self = tied( %{$_[0]} ) || $_[0]; + return $self->{root}->{error} || undef; +} + +## +# Utility methods +## + +sub throw_error { + ## + # Store error string in self + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $error_text = $_[1]; + + $self->{root}->{error} = $error_text; + + if ($self->{root}->{debug}) { warn "DBM::Deep: $error_text\n"; } + else { die "DBM::Deep: $error_text\n"; } + + return undef; +} + +sub clear_error { + ## + # Clear error state + ## + my $self = tied( %{$_[0]} ) || $_[0]; + + undef $self->{root}->{error}; +} + +sub precalc_sizes { + ## + # Precalculate index, bucket and bucket list sizes + ## + $INDEX_SIZE = 256 * $LONG_SIZE; + $BUCKET_SIZE = $HASH_SIZE + $LONG_SIZE; + $BUCKET_LIST_SIZE = $MAX_BUCKETS * $BUCKET_SIZE; +} + +sub set_pack { + ## + # Set pack/unpack modes (see file header for more) + ## + $LONG_SIZE = shift || 4; + $LONG_PACK = shift || 'N'; + + $DATA_LENGTH_SIZE = shift || 4; + $DATA_LENGTH_PACK = shift || 'N'; + + precalc_sizes(); +} + +sub set_digest { + ## + # Set key digest function (default is MD5) + ## + $DIGEST_FUNC = shift || \&md5; + $HASH_SIZE = shift || $HASH_SIZE; + + precalc_sizes(); +} + +## +# tie() methods (hashes and arrays) +## + +sub STORE { + ## + # Store single hash key/value or array element in database. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $key = ($self->{root}->{filter_store_key} && $self->{type} eq $SIG_HASH) ? $self->{root}->{filter_store_key}->($_[1]) : $_[1]; + my $value = ($self->{root}->{filter_store_value} && !ref($_[2])) ? $self->{root}->{filter_store_value}->($_[2]) : $_[2]; + + my $unpacked_key = $key; + if (($self->{type} eq $SIG_ARRAY) && ($key =~ /^\d+$/)) { $key = pack($LONG_PACK, $key); } + my $md5 = $DIGEST_FUNC->($key); + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh}) && !$self->open()) { + return undef; + } + + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + ## + # If locking is enabled, set 'end' parameter again, in case another + # DB instance appended to our file while we were unlocked. + ## + if ($self->{root}->{locking} || $self->{root}->{volatile}) { + $self->{root}->{end} = (stat($self->{root}->{fh}))[7]; + } + + ## + # Locate offset for bucket list using digest index system + ## + my $tag = $self->load_tag($self->{base_offset}); + if (!$tag) { + $tag = $self->create_tag($self->{base_offset}, $SIG_INDEX, chr(0) x $INDEX_SIZE); + } + + my $ch = 0; + while ($tag->{signature} ne $SIG_BLIST) { + my $num = ord(substr($md5, $ch, 1)); + my $new_tag = $self->index_lookup($tag, $num); + if (!$new_tag) { + my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE); + seek($self->{root}->{fh}, $ref_loc, 0); + $self->{root}->{fh}->print( pack($LONG_PACK, $self->{root}->{end}) ); + + $tag = $self->create_tag($self->{root}->{end}, $SIG_BLIST, chr(0) x $BUCKET_LIST_SIZE); + $tag->{ref_loc} = $ref_loc; + $tag->{ch} = $ch; + last; + } + else { + my $ref_loc = $tag->{offset} + ($num * $LONG_SIZE); + $tag = $new_tag; + $tag->{ref_loc} = $ref_loc; + $tag->{ch} = $ch; + } + $ch++; + } + + ## + # Add key/value to bucket list + ## + my $result = $self->add_bucket( $tag, $md5, $key, $value ); + + ## + # If this object is an array, and bucket was not a replace, and key is numerical, + # and index is equal or greater than current length, advance length variable. + ## + if (($result == 2) && ($self->{type} eq $SIG_ARRAY) && ($unpacked_key =~ /^\d+$/) && ($unpacked_key >= $self->FETCHSIZE())) { + $self->STORESIZE( $unpacked_key + 1 ); + } + + $self->unlock(); + + return $result; +} + +sub FETCH { + ## + # Fetch single value or element given plain key or array index + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $key = ($self->{root}->{filter_store_key} && $self->{type} eq $SIG_HASH) ? $self->{root}->{filter_store_key}->($_[1]) : $_[1]; + + if (($self->{type} eq $SIG_ARRAY) && ($key =~ /^\d+$/)) { $key = pack($LONG_PACK, $key); } + my $md5 = $DIGEST_FUNC->($key); + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh})) { $self->open(); } + + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); + + my $tag = $self->find_bucket_list( $md5 ); + if (!$tag) { + $self->unlock(); + return undef; + } + + ## + # Get value from bucket list + ## + my $result = $self->get_bucket_value( $tag, $md5 ); + + $self->unlock(); + + return ($result && !ref($result) && $self->{root}->{filter_fetch_value}) ? $self->{root}->{filter_fetch_value}->($result) : $result; +} + +sub DELETE { + ## + # Delete single key/value pair or element given plain key or array index + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $key = ($self->{root}->{filter_store_key} && $self->{type} eq $SIG_HASH) ? $self->{root}->{filter_store_key}->($_[1]) : $_[1]; + + my $unpacked_key = $key; + if (($self->{type} eq $SIG_ARRAY) && ($key =~ /^\d+$/)) { $key = pack($LONG_PACK, $key); } + my $md5 = $DIGEST_FUNC->($key); + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh})) { $self->open(); } + + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + my $tag = $self->find_bucket_list( $md5 ); + if (!$tag) { + $self->unlock(); + return undef; + } + + ## + # Delete bucket + ## + my $result = $self->delete_bucket( $tag, $md5 ); + + ## + # If this object is an array and the key deleted was on the end of the stack, + # decrement the length variable. + ## + if ($result && ($self->{type} eq $SIG_ARRAY) && ($unpacked_key == $self->FETCHSIZE() - 1)) { + $self->STORESIZE( $unpacked_key ); + } + + $self->unlock(); + + return $result; +} + +sub EXISTS { + ## + # Check if a single key or element exists given plain key or array index + ## + my $self = tied( %{$_[0]} ) || $_[0]; + my $key = ($self->{root}->{filter_store_key} && $self->{type} eq $SIG_HASH) ? $self->{root}->{filter_store_key}->($_[1]) : $_[1]; + + if (($self->{type} eq $SIG_ARRAY) && ($key =~ /^\d+$/)) { $key = pack($LONG_PACK, $key); } + my $md5 = $DIGEST_FUNC->($key); + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh})) { $self->open(); } + + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); + + my $tag = $self->find_bucket_list( $md5 ); + + ## + # For some reason, the built-in exists() function returns '' for false + ## + if (!$tag) { + $self->unlock(); + return ''; + } + + ## + # Check if bucket exists and return 1 or '' + ## + my $result = $self->bucket_exists( $tag, $md5 ) || ''; + + $self->unlock(); + + return $result; +} + +sub CLEAR { + ## + # Clear all keys from hash, or all elements from array. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh})) { $self->open(); } + + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + seek($self->{root}->{fh}, $self->{base_offset}, 0); + if ($self->{root}->{fh}->eof()) { + $self->unlock(); + return undef; + } + + $self->create_tag($self->{base_offset}, $self->{type}, chr(0) x $INDEX_SIZE); + + $self->unlock(); + + return 1; +} + +sub FIRSTKEY { + ## + # Locate and return first key (in no particular order) + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{type} ne $SIG_HASH) { + return $self->throw_error("FIRSTKEY method only supported for hashes"); + } + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh})) { $self->open(); } + + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); + + my $result = $self->get_next_key(); + + $self->unlock(); + + return ($result && $self->{root}->{filter_fetch_key}) ? $self->{root}->{filter_fetch_key}->($result) : $result; +} + +sub NEXTKEY { + ## + # Return next key (in no particular order), given previous one + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{type} ne $SIG_HASH) { + return $self->throw_error("NEXTKEY method only supported for hashes"); + } + my $prev_key = ($self->{root}->{filter_store_key} && $self->{type} eq $SIG_HASH) ? $self->{root}->{filter_store_key}->($_[1]) : $_[1]; + my $prev_md5 = $DIGEST_FUNC->($prev_key); + + ## + # Make sure file is open + ## + if (!defined($self->{root}->{fh})) { $self->open(); } + + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); + + my $result = $self->get_next_key( $prev_md5 ); + + $self->unlock(); + + return ($result && $self->{root}->{filter_fetch_key}) ? $self->{root}->{filter_fetch_key}->($result) : $result; +} + +## +# The following methods are for arrays only +## + +sub FETCHSIZE { + ## + # Return the length of the array + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("FETCHSIZE method only supported for arrays"); + } + + my $SAVE_FILTER = $self->{root}->{filter_fetch_value}; + $self->{root}->{filter_fetch_value} = undef; + + my $packed_size = $self->FETCH('length'); + + $self->{root}->{filter_fetch_value} = $SAVE_FILTER; + + if ($packed_size) { return int(unpack($LONG_PACK, $packed_size)); } + else { return 0; } +} + +sub STORESIZE { + ## + # Set the length of the array + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("STORESIZE method only supported for arrays"); + } + my $new_length = $_[1]; + + my $SAVE_FILTER = $self->{root}->{filter_store_value}; + $self->{root}->{filter_store_value} = undef; + + my $result = $self->STORE('length', pack($LONG_PACK, $new_length)); + + $self->{root}->{filter_store_value} = $SAVE_FILTER; + + return $result; +} + +sub POP { + ## + # Remove and return the last element on the array + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("POP method only supported for arrays"); + } + my $length = $self->FETCHSIZE(); + + if ($length) { + my $content = $self->FETCH( $length - 1 ); + $self->DELETE( $length - 1 ); + return $content; + } + else { + return undef; + } +} + +sub PUSH { + ## + # Add new element(s) to the end of the array + ## + my $self = tied( %{$_[0]} ) || $_[0]; shift @_; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("PUSH method only supported for arrays"); + } + my $length = $self->FETCHSIZE(); + + while (my $content = shift @_) { + $self->STORE( $length, $content ); + $length++; + } +} + +sub SHIFT { + ## + # Remove and return first element on the array. + # Shift over remaining elements to take up space. + ## + my $self = tied( %{$_[0]} ) || $_[0]; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("SHIFT method only supported for arrays"); + } + my $length = $self->FETCHSIZE(); + + if ($length) { + my $content = $self->FETCH( 0 ); + + ## + # Shift elements over and remove last one. + ## + for (my $i = 0; $i < $length - 1; $i++) { + $self->STORE( $i, $self->FETCH($i + 1) ); + } + $self->DELETE( $length - 1 ); + + return $content; + } + else { + return undef; + } +} + +sub UNSHIFT { + ## + # Insert new element(s) at beginning of array. + # Shift over other elements to make space. + ## + my $self = tied( %{$_[0]} ) || $_[0]; shift @_; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("UNSHIFT method only supported for arrays"); + } + my @new_elements = @_; + my $length = $self->FETCHSIZE(); + my $new_size = scalar @new_elements; + + if ($length) { + for (my $i = $length - 1; $i >= 0; $i--) { + $self->STORE( $i + $new_size, $self->FETCH($i) ); + } + } + + for (my $i = 0; $i < $new_size; $i++) { + $self->STORE( $i, $new_elements[$i] ); + } +} + +sub SPLICE { + ## + # Splices section of array with optional new section. + # Returns deleted section, or last element deleted in scalar context. + ## + my $self = tied( %{$_[0]} ) || $_[0]; shift @_; + if ($self->{type} ne $SIG_ARRAY) { + return $self->throw_error("SPLICE method only supported for arrays"); + } + my $length = $self->FETCHSIZE(); + + ## + # Calculate offset and length of splice + ## + my $offset = shift || 0; + if ($offset < 0) { $offset += $length; } + + my $splice_length; + if (scalar @_) { $splice_length = shift; } + else { $splice_length = $length - $offset; } + if ($splice_length < 0) { $splice_length += ($length - $offset); } + + ## + # Setup array with new elements, and copy out old elements for return + ## + my @new_elements = @_; + my $new_size = scalar @new_elements; + + my @old_elements = (); + for (my $i = $offset; $i < $offset + $splice_length; $i++) { + push @old_elements, $self->FETCH( $i ); + } + + ## + # Adjust array length, and shift elements to accomodate new section. + ## + if ($new_size > $splice_length) { + for (my $i = $length - 1; $i >= $offset + $splice_length; $i--) { + $self->STORE( $i + ($new_size - $splice_length), $self->FETCH($i) ); + } + } + elsif ($new_size < $splice_length) { + for (my $i = $offset + $splice_length; $i < $length; $i++) { + $self->STORE( $i + ($new_size - $splice_length), $self->FETCH($i) ); + } + for (my $i = 0; $i < $splice_length - $new_size; $i++) { + $self->DELETE( $length - 1 ); + $length--; + } + } + + ## + # Insert new elements into array + ## + for (my $i = $offset; $i < $offset + $new_size; $i++) { + $self->STORE( $i, shift @new_elements ); + } + + ## + # Return deleted section, or last element in scalar context. + ## + return wantarray ? @old_elements : $old_elements[-1]; +} + +sub EXTEND { + ## + # Perl will call EXTEND() when the array is likely to grow. + # We don't care, but include it for compatibility. + ## +} + +## +# Public method aliases +## +sub store { return STORE(@_); } +sub put { return STORE(@_); } + +sub fetch { return FETCH(@_); } +sub get { return FETCH(@_); } + +sub delete { return DELETE(@_); } +sub exists { return EXISTS(@_); } +sub clear { return CLEAR(@_); } + +sub first_key { return FIRSTKEY(@_); } +sub next_key { return NEXTKEY(@_); } + +sub length { return FETCHSIZE(@_); } +sub pop { return POP(@_); } +sub push { return PUSH(@_); } +sub shift { return SHIFT(@_); } +sub unshift { return UNSHIFT(@_); } +sub splice { return SPLICE(@_); } + +1; + +__END__ + +=head1 NAME + +DBM::Deep - A pure perl multi-level hash/array DBM + +=head1 SYNOPSIS + + use DBM::Deep; + my $db = new DBM::Deep "foo.db"; + + $db->{key} = 'value'; # tie() style + print $db->{key}; + + $db->put('key', 'value'); # OO style + print $db->get('key'); + + # true multi-level support + $db->{my_complex} = [ + 'hello', { perl => 'rules' }, + 42, 99 ]; + +=head1 DESCRIPTION + +A unique flat-file database module, written in pure perl. True +multi-level hash/array support (unlike MLDBM, which is faked), hybrid +OO / tie() interface, cross-platform FTPable files, and quite fast. Can +handle millions of keys and unlimited hash levels without significant +slow-down. Written from the ground-up in pure perl -- this is NOT a +wrapper around a C-based DBM. Out-of-the-box compatibility with Unix, +Mac OS X and Windows. + +=head1 INSTALLATION + +Hopefully you are using CPAN's excellent Perl module, which will download +and install the module for you. If not, get the tarball, and run these +commands: + + tar zxf DBM-Deep-* + cd DBM-Deep-* + perl Makefile.PL + make + make test + make install + +=head1 SETUP + +Construction can be done OO-style (which is the recommended way), or using +Perl's tie() function. Both are examined here. + +=head2 OO CONSTRUCTION + +The recommended way to construct a DBM::Deep object is to use the new() +method, which gets you a blessed, tied hash or array reference. + + my $db = new DBM::Deep "foo.db"; + +This opens a new database handle, mapped to the file "foo.db". If this +file does not exist, it will automatically be created. DB files are +opened in "r+" (read/write) mode, and the type of object returned is a +hash, unless otherwise specified (see L below). + + + +You can pass a number of options to the constructor to specify things like +locking, autoflush, etc. This is done by passing an inline hash: + + my $db = new DBM::Deep( + file => "foo.db", + locking => 1, + autoflush => 1 + ); + +Notice that the filename is now specified I the hash with +the "file" parameter, as opposed to being the sole argument to the +constructor. This is required if any options are specified. +See L below for the complete list. + + + +You can also start with an array instead of a hash. For this, you must +specify the C parameter: + + my $db = new DBM::Deep( + file => "foo.db", + type => DBM::Deep::TYPE_ARRAY + ); + +B Specifing the C parameter only takes effect when beginning +a new DB file. If you create a DBM::Deep object with an existing file, the +C will be loaded from the file header, and ignored if it is passed +to the constructor. + +=head2 TIE CONSTRUCTION + +Alternatively, you can create a DBM::Deep handle by using Perl's built-in +tie() function. This is not ideal, because you get only a basic, tied hash +(or array) which is not blessed, so you can't call any functions on it. + + my %hash; + tie %hash, "DBM::Deep", "foo.db"; + + my @array; + tie @array, "DBM::Deep", "bar.db"; + +As with the OO constructor, you can replace the DB filename parameter with +a hash containing one or more options (see L just below for the +complete list). + + tie %hash, "DBM::Deep", { + file => "foo.db", + locking => 1, + autoflush => 1 + }; + +=head2 OPTIONS + +There are a number of options that can be passed in when constructing your +DBM::Deep objects. These apply to both the OO- and tie- based approaches. + +=over + +=item * file + +Filename of the DB file to link the handle to. You can pass a full absolute +filesystem path, partial path, or a plain filename if the file is in the +current working directory. This is a required parameter. + +=item * mode + +File open mode (read-only, read-write, etc.) string passed to Perl's FileHandle +module. This is an optional parameter, and defaults to "r+" (read/write). +B If the default (r+) mode is selected, the file will also be auto- +created if it doesn't exist. + +=item * type + +This parameter specifies what type of object to create, a hash or array. Use +one of these two constants: C or C. +This only takes effect when beginning a new file. This is an optional +parameter, and defaults to hash. + +=item * locking + +Specifies whether locking is to be enabled. DBM::Deep uses Perl's Fnctl flock() +function to lock the database in exclusive mode for writes, and shared mode for +reads. Pass any true value to enable. This affects the base DB handle I that use the same DB file. This is an optional +parameter, and defaults to 0 (disabled). See L below for more. + +=item * autoflush + +Specifies whether autoflush is to be enabled on the underlying FileHandle. +This obviously slows down write operations, but is required if you may have +multiple processes accessing the same DB file (also consider enable I +or at least I). Pass any true value to enable. This is an optional +parameter, and defaults to 0 (disabled). + +=item * volatile + +If I mode is enabled, DBM::Deep will stat() the DB file before each +STORE() operation. This is required if an outside force may change the size of +the file between transactions. Locking also implicitly enables volatile. This +is useful if you want to use a different locking system or write your own. Pass +any true value to enable. This is an optional parameter, and defaults to 0 +(disabled). + +=item * autobless + +If I mode is enabled, DBM::Deep will preserve blessed hashes, and +restore them when fetched. This is an B feature, and does have +side-effects. Basically, when hashes are re-blessed into their original +classes, they are no longer blessed into the DBM::Deep class! So you won't be +able to call any DBM::Deep methods on them. You have been warned. +This is an optional parameter, and defaults to 0 (disabled). + +=item * filter_* + +See L below. + +=item * debug + +Setting I mode will make all errors non-fatal, dump them out to +STDERR, and continue on. This is for debugging purposes only, and probably +not what you want. This is an optional parameter, and defaults to 0 (disabled). + +=item * fh + +Instead of passing a file path, you can instead pass a handle to an pre-opened +filehandle. Note: Beware of using the magick *DATA handle, as this actually +contains your entire Perl script, as well as the data following the __DATA__ +marker. This will not work, because DBM::Deep uses absolute seek()s into the +file. Instead, consider reading *DATA into an IO::Scalar handle, then passing +in that. + +=back + +=head1 TIE INTERFACE + +With DBM::Deep you can access your databases using Perl's standard hash/array +syntax. Because all Deep objects are I to hashes or arrays, you can treat +them as such. Deep will intercept all reads/writes and direct them to the right +place -- the DB file. This has nothing to do with the L +section above. This simply tells you how to use DBM::Deep using regular hashes +and arrays, rather than calling functions like C and C (although those +work too). It is entirely up to you how to want to access your databases. + +=head2 HASHES + +You can treat any DBM::Deep object like a normal Perl hash reference. Add keys, +or even nested hashes (or arrays) using standard Perl syntax: + + my $db = new DBM::Deep "foo.db"; + + $db->{mykey} = "myvalue"; + $db->{myhash} = {}; + $db->{myhash}->{subkey} = "subvalue"; + + print $db->{myhash}->{subkey} . "\n"; + +You can even step through hash keys using the normal Perl C function: + + foreach my $key (keys %$db) { + print "$key: " . $db->{$key} . "\n"; + } + +Remember that Perl's C function extracts I key from the hash and +pushes them onto an array, all before the loop even begins. If you have an +extra large hash, this may exhaust Perl's memory. Instead, consider using +Perl's C function, which pulls keys/values one at a time, using very +little memory: + + while (my ($key, $value) = each %$db) { + print "$key: $value\n"; + } + +Please note that when using C, you should always pass a direct +hash reference, not a lookup. Meaning, you should B do this: + + # NEVER DO THIS + while (my ($key, $value) = each %{$db->{foo}}) { # BAD + +This causes an infinite loop, because for each iteration, Perl is calling +FETCH() on the $db handle, resulting in a "new" hash for foo every time, so +it effectively keeps returning the first key over and over again. Instead, +assign a temporary variable to C<$db->{foo}>, then pass that to each(). + +=head2 ARRAYS + +As with hashes, you can treat any DBM::Deep object like a normal Perl array +reference. This includes inserting, removing and manipulating elements, +and the C, C, C, C and C functions. +The object must have first been created using type C, +or simply be a nested array reference inside a hash. Example: + + my $db = new DBM::Deep( + file => "foo-array.db", + type => DBM::Deep::TYPE_ARRAY + ); + + $db->[0] = "foo"; + push @$db, "bar", "baz"; + unshift @$db, "bah"; + + my $last_elem = pop @$db; # baz + my $first_elem = shift @$db; # bah + my $second_elem = $db->[1]; # bar + + my $num_elements = scalar @$db; + +=head1 OO INTERFACE + +In addition to the I interface, you can also use a standard OO interface +to manipulate all aspects of DBM::Deep databases. Each type of object (hash or +array) has its own methods, but both types share the following common methods: +C, C, C, C and C. + +=over + +=item * put() + +Stores a new hash key/value pair, or sets an array element value. Takes two +arguments, the hash key or array index, and the new value. The value can be +a scalar, hash ref or array ref. Returns true on success, false on failure. + + $db->put("foo", "bar"); # for hashes + $db->put(1, "bar"); # for arrays + +=item * get() + +Fetches the value of a hash key or array element. Takes one argument: the hash +key or array index. Returns a scalar, hash ref or array ref, depending on the +data type stored. + + my $value = $db->get("foo"); # for hashes + my $value = $db->get(1); # for arrays + +=item * exists() + +Checks if a hash key or array index exists. Takes one argument: the hash key +or array index. Returns true if it exists, false if not. + + if ($db->exists("foo")) { print "yay!\n"; } # for hashes + if ($db->exists(1)) { print "yay!\n"; } # for arrays + +=item * delete() + +Deletes one hash key/value pair or array element. Takes one argument: the hash +key or array index. Returns true on success, false if not found. For arrays, +the remaining elements located after the deleted element are NOT moved over. +The deleted element is essentially just undefined, which is exactly how Perl's +internal arrays work. Please note that the space occupied by the deleted +key/value or element is B reused again -- see L +below for details and workarounds. + + $db->delete("foo"); # for hashes + $db->delete(1); # for arrays + +=item * clear() + +Deletes B hash keys or array elements. Takes no arguments. No return +value. Please note that the space occupied by the deleted keys/values or +elements is B reused again -- see L below for +details and workarounds. + + $db->clear(); # hashes or arrays + +=back + +=head2 HASHES + +For hashes, DBM::Deep supports all the common methods described above, and the +following additional methods: C and C. + +=over + +=item * first_key() + +Returns the "first" key in the hash. As with built-in Perl hashes, keys are +fetched in an undefined order (which appears random). Takes no arguments, +returns the key as a scalar value. + + my $key = $db->first_key(); + +=item * next_key() + +Returns the "next" key in the hash, given the previous one as the sole argument. +Returns undef if there are no more keys to be fetched. + + $key = $db->next_key($key); + +=back + +Here are some examples of using hashes: + + my $db = new DBM::Deep "foo.db"; + + $db->put("foo", "bar"); + print "foo: " . $db->get("foo") . "\n"; + + $db->put("baz", {}); # new child hash ref + $db->get("baz")->put("buz", "biz"); + print "buz: " . $db->get("baz")->get("buz") . "\n"; + + my $key = $db->first_key(); + while ($key) { + print "$key: " . $db->get($key) . "\n"; + $key = $db->next_key($key); + } + + if ($db->exists("foo")) { $db->delete("foo"); } + +=head2 ARRAYS + +For arrays, DBM::Deep supports all the common methods described above, and the +following additional methods: C, C, C, C, +C and C. + +=over + +=item * length() + +Returns the number of elements in the array. Takes no arguments. + + my $len = $db->length(); + +=item * push() + +Adds one or more elements onto the end of the array. Accepts scalars, hash +refs or array refs. No return value. + + $db->push("foo", "bar", {}); + +=item * pop() + +Fetches the last element in the array, and deletes it. Takes no arguments. +Returns undef if array is empty. Returns the element value. + + my $elem = $db->pop(); + +=item * shift() + +Fetches the first element in the array, deletes it, then shifts all the +remaining elements over to take up the space. Returns the element value. This +method is not recommended with large arrays -- see L below for +details. + + my $elem = $db->shift(); + +=item * unshift() + +Inserts one or more elements onto the beginning of the array, shifting all +existing elements over to make room. Accepts scalars, hash refs or array refs. +No return value. This method is not recommended with large arrays -- see + below for details. + + $db->unshift("foo", "bar", {}); + +=item * splice() + +Performs exactly like Perl's built-in function of the same name. See L for usage -- it is too complicated to document here. This method is +not recommended with large arrays -- see L below for details. + +=back + +Here are some examples of using arrays: + + my $db = new DBM::Deep( + file => "foo.db", + type => DBM::Deep::TYPE_ARRAY + ); + + $db->push("bar", "baz"); + $db->unshift("foo"); + $db->put(3, "buz"); + + my $len = $db->length(); + print "length: $len\n"; # 4 + + for (my $k=0; $k<$len; $k++) { + print "$k: " . $db->get($k) . "\n"; + } + + $db->splice(1, 2, "biz", "baf"); + + while (my $elem = shift @$db) { + print "shifted: $elem\n"; + } + +=head1 LOCKING + +Enable automatic file locking by passing a true value to the C +parameter when constructing your DBM::Deep object (see L above). + + my $db = new DBM::Deep( + file => "foo.db", + locking => 1 + ); + +This causes Deep to C the underlying FileHandle object with exclusive +mode for writes, and shared mode for reads. This is required if you have +multiple processes accessing the same database file, to avoid file corruption. +Please note that C does NOT work for files over NFS. See L below for more. + +=head2 EXPLICIT LOCKING + +You can explicitly lock a database, so it remains locked for multiple +transactions. This is done by calling the C method, and passing an +optional lock mode argument (defaults to exclusive mode). This is particularly +useful for things like counters, where the current value needs to be fetched, +then incremented, then stored again. + + $db->lock(); + my $counter = $db->get("counter"); + $counter++; + $db->put("counter", $counter); + $db->unlock(); + + # or... + + $db->lock(); + $db->{counter}++; + $db->unlock(); + +You can pass C an optional argument, which specifies which mode to use +(exclusive or shared). Use one of these two constants: C +or C. These are passed directly to C, and are the +same as the constants defined in Perl's C module. + + $db->lock( DBM::Deep::LOCK_SH ); + # something here + $db->unlock(); + +If you want to implement your own file locking scheme, be sure to create your +DBM::Deep objects setting the C option to true. This hints to Deep +that the DB file may change between transactions. See L +below for more. + +=head1 IMPORTING/EXPORTING + +You can import existing complex structures by calling the C method, +and export an entire database into an in-memory structure using the C +method. Both are examined here. + +=head2 IMPORTING + +Say you have an existing hash with nested hashes/arrays inside it. Instead of +walking the structure and adding keys/elements to the database as you go, +simply pass a reference to the C method. This recursively adds +everything to an existing DBM::Deep object for you. Here is an example: + + my $struct = { + key1 => "value1", + key2 => "value2", + array1 => [ "elem0", "elem1", "elem2" ], + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } + }; + + my $db = new DBM::Deep "foo.db"; + $db->import( $struct ); + + print $db->{key1} . "\n"; # prints "value1" + +This recursively imports the entire C<$struct> object into C<$db>, including +all nested hashes and arrays. If the DBM::Deep object contains exsiting data, +keys are merged with the existing ones, replacing if they already exist. +The C method can be called on any database level (not just the base +level), and works with both hash and array DB types. + + + +B Make sure your existing structure has no circular references in it. +These will cause an infinite loop when importing. + +=head2 EXPORTING + +Calling the C method on an existing DBM::Deep object will return +a reference to a new in-memory copy of the database. The export is done +recursively, so all nested hashes/arrays are all exported to standard Perl +objects. Here is an example: + + my $db = new DBM::Deep "foo.db"; + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + $db->{hash1} = {}; + $db->{hash1}->{subkey1} = "subvalue1"; + $db->{hash1}->{subkey2} = "subvalue2"; + + my $struct = $db->export(); + + print $struct->{key1} . "\n"; # prints "value1" + +This makes a complete copy of the database in memory, and returns a reference +to it. The C method can be called on any database level (not just +the base level), and works with both hash and array DB types. Be careful of +large databases -- you can store a lot more data in a DBM::Deep object than an +in-memory Perl structure. + + + +B Make sure your database has no circular references in it. +These will cause an infinite loop when exporting. + +=head1 FILTERS + +DBM::Deep has a number of hooks where you can specify your own Perl function +to perform filtering on incoming or outgoing data. This is a perfect +way to extend the engine, and implement things like real-time compression or +encryption. Filtering applies to the base DB level, and all child hashes / +arrays. Filter hooks can be specified when your DBM::Deep object is first +constructed, or by calling the C method at any time. There are +four available filter hooks, described below: + +=over + +=item * filter_store_key + +This filter is called whenever a hash key is stored. It +is passed the incoming key, and expected to return a transformed key. + +=item * filter_store_value + +This filter is called whenever a hash key or array element is stored. It +is passed the incoming value, and expected to return a transformed value. + +=item * filter_fetch_key + +This filter is called whenever a hash key is fetched (i.e. via +C or C). It is passed the transformed key, +and expected to return the plain key. + +=item * filter_fetch_value + +This filter is called whenever a hash key or array element is fetched. +It is passed the transformed value, and expected to return the plain value. + +=back + +Here are the two ways to setup a filter hook: + + my $db = new DBM::Deep( + file => "foo.db", + filter_store_value => \&my_filter_store, + filter_fetch_value => \&my_filter_fetch + ); + + # or... + + $db->set_filter( "filter_store_value", \&my_filter_store ); + $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); + +Your filter function will be called only when dealing with SCALAR keys or +values. When nested hashes and arrays are being stored/fetched, filtering +is bypassed. Filters are called as static functions, passed a single SCALAR +argument, and expected to return a single SCALAR value. If you want to +remove a filter, set the function reference to C: + + $db->set_filter( "filter_store_value", undef ); + +=head2 REAL-TIME ENCRYPTION EXAMPLE + +Here is a working example that uses the I module to +do real-time encryption / decryption of keys & values with DBM::Deep Filters. +Please visit L for more +on I. You'll also need the I module. + + use DBM::Deep; + use Crypt::Blowfish; + use Crypt::CBC; + + my $cipher = new Crypt::CBC({ + 'key' => 'my secret key', + 'cipher' => 'Blowfish', + 'iv' => '$KJh#(}q', + 'regenerate_key' => 0, + 'padding' => 'space', + 'prepend_iv' => 0 + }); + + my $db = new DBM::Deep( + file => "foo-encrypt.db", + filter_store_key => \&my_encrypt, + filter_store_value => \&my_encrypt, + filter_fetch_key => \&my_decrypt, + filter_fetch_value => \&my_decrypt, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_encrypt { + return $cipher->encrypt( $_[0] ); + } + sub my_decrypt { + return $cipher->decrypt( $_[0] ); + } + +=head2 REAL-TIME COMPRESSION EXAMPLE + +Here is a working example that uses the I module to do real-time +compression / decompression of keys & values with DBM::Deep Filters. +Please visit L for +more on I. + + use DBM::Deep; + use Compress::Zlib; + + my $db = new DBM::Deep( + file => "foo-compress.db", + filter_store_key => \&my_compress, + filter_store_value => \&my_compress, + filter_fetch_key => \&my_decompress, + filter_fetch_value => \&my_decompress, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_compress { + return Compress::Zlib::memGzip( $_[0] ) ; + } + sub my_decompress { + return Compress::Zlib::memGunzip( $_[0] ) ; + } + +B Filtering of keys only applies to hashes. Array "keys" are +actually numerical index numbers, and are not filtered. + +=head1 ERROR HANDLING + +Most DBM::Deep methods return a true value for success, and call die() on +failure. You can wrap calls in an eval block to catch the die. Also, the +actual error message is stored in an internal scalar, which can be fetched by +calling the C method. + + my $db = new DBM::Deep "foo.db"; # create hash + eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call + + print $db->error(); # prints error message + +You can then call C to clear the current error state. + + $db->clear_error(); + +If you set the C option to true when creating your DBM::Deep object, +all errors are considered NON-FATAL, and dumped to STDERR. This is only +for debugging purposes. + +=head1 LARGEFILE SUPPORT + +If you have a 64-bit system, and your Perl is compiled with both LARGEFILE +and 64-bit support, you I be able to create databases larger than 2 GB. +DBM::Deep by default uses 32-bit file offset tags, but these can be changed +by calling the static C method before you do anything else. + + DBM::Deep::set_pack(8, 'Q'); + +This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words +instead of 32-bit longs. After setting these values your DB files have a +theoretical maximum size of 16 XB (exabytes). + + + +B Changing these values will B work for existing database files. +Only change this for new files, and make sure it stays set consistently +throughout the file's life. If you do set these values, you can no longer +access 32-bit DB files. You can, however, call C to change +back to 32-bit mode. + + + +B I have not personally tested files > 2 GB -- all my systems have +only a 32-bit Perl. However, I have received user reports that this does +indeed work! + +=head1 LOW-LEVEL ACCESS + +If you require low-level access to the underlying FileHandle that Deep uses, +you can call the C method, which returns the handle: + + my $fh = $db->fh(); + +This method can be called on the root level of the datbase, or any child +hashes or arrays. All levels share a I structure, which contains things +like the FileHandle, a reference counter, and all your options you specified +when you created the object. You can get access to this root structure by +calling the C method. + + my $root = $db->root(); + +This is useful for changing options after the object has already been created, +such as enabling/disabling locking, volatile or debug modes. You can also +store your own temporary user data in this structure (be wary of name +collision), which is then accessible from any child hash or array. + +=head1 CUSTOM DIGEST ALGORITHM + +DBM::Deep by default uses the I (MD5) algorithm for hashing +keys. However you can override this, and use another algorithm (such as SHA-256) +or even write your own. But please note that Deep currently expects zero +collisions, so your algorithm has to be I, so to speak. +Collision detection may be introduced in a later version. + + + +You can specify a custom digest algorithm by calling the static C +function, passing a reference to a subroutine, and the length of the algorithm's +hashes (in bytes). This is a global static function, which affects ALL Deep +objects. Here is a working example that uses a 256-bit hash from the +I module. Please see +L for more. + + use DBM::Deep; + use Digest::SHA256; + + my $context = Digest::SHA256::new(256); + + DBM::Deep::set_digest( \&my_digest, 32 ); + + my $db = new DBM::Deep "foo-sha.db"; + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_digest { + return substr( $context->hash($_[0]), 0, 32 ); + } + +B Your returned digest strings must be B the number +of bytes you specify in the C function (in this case 32). + +=head1 CIRCULAR REFERENCES + +DBM::Deep has B support for circular references. Meaning you +can have a nested hash key or array element that points to a parent object. +This relationship is stored in the DB file, and is preserved between sessions. +Here is an example: + + my $db = new DBM::Deep "foo.db"; + + $db->{foo} = "bar"; + $db->{circle} = $db; # ref to self + + print $db->{foo} . "\n"; # prints "foo" + print $db->{circle}->{foo} . "\n"; # prints "foo" again + +One catch is, passing the object to a function that recursively walks the +object tree (such as I or even the built-in C or +C methods) will result in an infinite loop. The other catch is, +if you fetch the I of a circular reference (i.e. using the C +or C methods), you will get the I, not the +ref's key. This gets even more interesting with the above example, where +the I key points to the base DB object, which technically doesn't +have a key. So I made DBM::Deep return "[base]" as the key name in that +special case. + +=head1 CAVEATS / ISSUES / BUGS + +This section describes all the known issues with DBM::Deep. It you have found +something that is not listed here, please send e-mail to L. + +=head2 UNUSED SPACE RECOVERY + +One major caveat with Deep is that space occupied by existing keys and +values is not recovered when they are deleted. Meaning if you keep deleting +and adding new keys, your file will continuously grow. I am working on this, +but in the meantime you can call the built-in C method from time to +time (perhaps in a crontab or something) to recover all your unused space. + + $db->optimize(); # returns true on success + +This rebuilds the ENTIRE database into a new file, then moves it on top of +the original. The new file will have no unused space, thus it will take up as +little disk space as possible. Please note that this operation can take +a long time for large files, and you need enough disk space to temporarily hold +2 copies of your DB file. The temporary file is created in the same directory +as the original, named with a ".tmp" extension, and is deleted when the +operation completes. Oh, and if locking is enabled, the DB is automatically +locked for the entire duration of the copy. + + + +B Only call optimize() on the top-level node of the database, and +make sure there are no child references lying around. Deep keeps a reference +counter, and if it is greater than 1, optimize() will abort and return undef. + +=head2 AUTOVIVIFICATION + +Unfortunately, autovivification doesn't work with tied hashes. This appears to +be a bug in Perl's tie() system, as I encountered the very same +issue with his I module (see L), +and it is also mentioned in the BUGS section for the I module ). Basically, on a new db file, +this does not work: + + $db->{foo}->{bar} = "hello"; + +Since "foo" doesn't exist, you cannot add "bar" to it. You end up with "foo" +being an empty hash. Try this instead, which works fine: + + $db->{foo} = { bar => "hello" }; + +As of Perl 5.8.7, this bug still exists. I have walked very carefully through +the execution path, and Perl indeed passes an empty hash to the STORE() method. +Probably a bug in Perl. + +=head2 FILE CORRUPTION + +The current level of error handling in Deep is minimal. Files I checked +for a 32-bit signature on open(), but other corruption in files can cause +segmentation faults. Deep may try to seek() past the end of a file, or get +stuck in an infinite loop depending on the level of corruption. File write +operations are not checked for failure (for speed), so if you happen to run +out of disk space, Deep will probably fail in a bad way. These things will +be addressed in a later version of DBM::Deep. + +=head2 DB OVER NFS + +Beware of using DB files over NFS. Deep uses flock(), which works well on local +filesystems, but will NOT protect you from file corruption over NFS. I've heard +about setting up your NFS server with a locking daemon, then using lockf() to +lock your files, but your milage may vary there as well. From what I +understand, there is no real way to do it. However, if you need access to the +underlying FileHandle in Deep for using some other kind of locking scheme like +lockf(), see the L section above. + +=head2 COPYING OBJECTS + +Beware of copying tied objects in Perl. Very strange things can happen. +Instead, use Deep's C method which safely copies the object and +returns a new, blessed, tied hash or array to the same level in the DB. + + my $copy = $db->clone(); + +=head2 LARGE ARRAYS + +Beware of using C, C or C with large arrays. +These functions cause every element in the array to move, which can be murder +on DBM::Deep, as every element has to be fetched from disk, then stored again in +a different location. This may be addressed in a later version. + +=head1 PERFORMANCE + +This section discusses DBM::Deep's speed and memory usage. + +=head2 SPEED + +Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as +the almighty I. But it makes up for it in features like true +multi-level hash/array support, and cross-platform FTPable files. Even so, +DBM::Deep is still pretty fast, and the speed stays fairly consistent, even +with huge databases. Here is some test data: + + Adding 1,000,000 keys to new DB file... + + At 100 keys, avg. speed is 2,703 keys/sec + At 200 keys, avg. speed is 2,642 keys/sec + At 300 keys, avg. speed is 2,598 keys/sec + At 400 keys, avg. speed is 2,578 keys/sec + At 500 keys, avg. speed is 2,722 keys/sec + At 600 keys, avg. speed is 2,628 keys/sec + At 700 keys, avg. speed is 2,700 keys/sec + At 800 keys, avg. speed is 2,607 keys/sec + At 900 keys, avg. speed is 2,190 keys/sec + At 1,000 keys, avg. speed is 2,570 keys/sec + At 2,000 keys, avg. speed is 2,417 keys/sec + At 3,000 keys, avg. speed is 1,982 keys/sec + At 4,000 keys, avg. speed is 1,568 keys/sec + At 5,000 keys, avg. speed is 1,533 keys/sec + At 6,000 keys, avg. speed is 1,787 keys/sec + At 7,000 keys, avg. speed is 1,977 keys/sec + At 8,000 keys, avg. speed is 2,028 keys/sec + At 9,000 keys, avg. speed is 2,077 keys/sec + At 10,000 keys, avg. speed is 2,031 keys/sec + At 20,000 keys, avg. speed is 1,970 keys/sec + At 30,000 keys, avg. speed is 2,050 keys/sec + At 40,000 keys, avg. speed is 2,073 keys/sec + At 50,000 keys, avg. speed is 1,973 keys/sec + At 60,000 keys, avg. speed is 1,914 keys/sec + At 70,000 keys, avg. speed is 2,091 keys/sec + At 80,000 keys, avg. speed is 2,103 keys/sec + At 90,000 keys, avg. speed is 1,886 keys/sec + At 100,000 keys, avg. speed is 1,970 keys/sec + At 200,000 keys, avg. speed is 2,053 keys/sec + At 300,000 keys, avg. speed is 1,697 keys/sec + At 400,000 keys, avg. speed is 1,838 keys/sec + At 500,000 keys, avg. speed is 1,941 keys/sec + At 600,000 keys, avg. speed is 1,930 keys/sec + At 700,000 keys, avg. speed is 1,735 keys/sec + At 800,000 keys, avg. speed is 1,795 keys/sec + At 900,000 keys, avg. speed is 1,221 keys/sec + At 1,000,000 keys, avg. speed is 1,077 keys/sec + +This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl +5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM. The hash keys and +values were between 6 - 12 chars in length. The DB file ended up at 210MB. +Run time was 12 min 3 sec. + +=head2 MEMORY USAGE + +One of the great things about DBM::Deep is that it uses very little memory. +Even with huge databases (1,000,000+ keys) you will not see much increased +memory on your process. Deep relies solely on the filesystem for storing +and fetching data. Here is output from I before even opening a +database handle: + + PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND + 22831 root 11 0 2716 2716 1296 R 0.0 0.2 0:07 perl + +Basically the process is taking 2,716K of memory. And here is the same +process after storing and fetching 1,000,000 keys: + + PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND + 22831 root 14 0 2772 2772 1328 R 0.0 0.2 13:32 perl + +Notice the memory usage increased by only 56K. Test was performed on a 700mHz +x86 box running Linux RedHat 7.2 & Perl 5.6.1. + +=head1 DB FILE FORMAT + +In case you were interested in the underlying DB file format, it is documented +here in this section. You don't need to know this to use the module, it's just +included for reference. + +=head2 SIGNATURE + +DBM::Deep files always start with a 32-bit signature to identify the file type. +This is at offset 0. The signature is "DPDB" in network byte order. This is +checked upon each file open(). + +=head2 TAG + +The DBM::Deep file is in a I, meaning each section of the file +has a standard header containing the type of data, the length of data, and then +the data itself. The type is a single character (1 byte), the length is a +32-bit unsigned long in network byte order, and the data is, well, the data. +Here is how it unfolds: + +=head2 MASTER INDEX + +Immediately after the 32-bit file signature is the I record. +This is a standard tag header followed by 1024 bytes (in 32-bit mode) or 2048 +bytes (in 64-bit mode) of data. The type is I for hash or I for array, +depending on how the DBM::Deep object was constructed. + + + +The index works by looking at a I of the hash key (or array index +number). The first 8-bit char of the MD5 signature is the offset into the +index, multipled by 4 in 32-bit mode, or 8 in 64-bit mode. The value of the +index element is a file offset of the next tag for the key/element in question, +which is usually a I tag (see below). + + + +The next tag I be another index, depending on how many keys/elements +exist. See L below for details. + +=head2 BUCKET LIST + +A I is a collection of 16 MD5 hashes for keys/elements, plus +file offsets to where the actual data is stored. It starts with a standard +tag header, with type I, and a data size of 320 bytes in 32-bit mode, or +384 bytes in 64-bit mode. Each MD5 hash is stored in full (16 bytes), plus +the 32-bit or 64-bit file offset for the I containing the actual data. +When the list fills up, a I operation is performed (See +L below). + +=head2 BUCKET + +A I is a tag containing a key/value pair (in hash mode), or a +index/value pair (in array mode). It starts with a standard tag header with +type I for scalar data (string, binary, etc.), or it could be a nested +hash (type I) or array (type I). The value comes just after the tag +header. The size reported in the tag header is only for the value, but then, +just after the value is another size (32-bit unsigned long) and then the plain +key itself. Since the value is likely to be fetched more often than the plain +key, I figured it would be I faster to store the value first. + + + +If the type is I (hash) or I (array), the value is another I +record for the nested structure, where the process begins all over again. + +=head2 RE-INDEXING + +After a I grows to 16 records, its allocated space in the file is +exhausted. Then, when another key/element comes in, the list is converted to a +new index record. However, this index will look at the next char in the MD5 +hash, and arrange new Bucket List pointers accordingly. This process is called +I. Basically, a new index tag is created at the file EOF, and all +17 (16 + new one) keys/elements are removed from the old Bucket List and +inserted into the new index. Several new Bucket Lists are created in the +process, as a new MD5 char from the key is being examined (it is unlikely that +the keys will all share the same next char of their MD5s). + + + +Because of the way the I algorithm works, it is impossible to tell exactly +when the Bucket Lists will turn into indexes, but the first round tends to +happen right around 4,000 keys. You will see a I decrease in +performance here, but it picks back up pretty quick (see L above). Then +it takes B more keys to exhaust the next level of Bucket Lists. It's +right around 900,000 keys. This process can continue nearly indefinitely -- +right up until the point the I signatures start colliding with each other, +and this is B rare -- like winning the lottery 5 times in a row AND +getting struck by lightning while you are walking to cash in your tickets. +Theoretically, since I hashes are 128-bit values, you I have up to +340,282,366,921,000,000,000,000,000,000,000,000,000 keys/elements (I believe +this is 340 unodecillion, but don't quote me). + +=head2 STORING + +When a new key/element is stored, the key (or index number) is first ran through +I to get a 128-bit signature (example, in hex: +b05783b0773d894396d475ced9d2f4f6). Then, the I record is checked +for the first char of the signature (in this case I). If it does not exist, +a new I is created for our key (and the next 15 future keys that +happen to also have I as their first MD5 char). The entire MD5 is written +to the I along with the offset of the new I record (EOF at +this point, unless we are replacing an existing I), where the actual +data will be stored. + +=head2 FETCHING + +Fetching an existing key/element involves getting a I of the key +(or index number), then walking along the indexes. If there are enough +keys/elements in this DB level, there might be nested indexes, each linked to +a particular char of the MD5. Finally, a I is pointed to, which +contains up to 16 full MD5 hashes. Each is checked for equality to the key in +question. If we found a match, the I tag is loaded, where the value and +plain key are stored. + + + +Fetching the plain key occurs when calling the I and I +methods. In this process the indexes are walked systematically, and each key +fetched in increasing MD5 order (which is why it appears random). Once the +I is found, the value is skipped the plain key returned instead. +B Do not count on keys being fetched as if the MD5 hashes were +alphabetically sorted. This only happens on an index-level -- as soon as the +I are hit, the keys will come out in the order they went in -- +so it's pretty much undefined how the keys will come out -- just like Perl's +built-in hashes. + +=head1 AUTHOR + +Joseph Huckaby, L + +Special thanks to Adam Sah and Rich Gaushell! You know why :-) + +=head1 SEE ALSO + +perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), nfs(5), +Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3) + +=head1 LICENSE + +Copyright (c) 2002-2005 Joseph Huckaby. All Rights Reserved. +This is free software, you may use it and distribute it under the +same terms as Perl itself. + +=cut diff --git a/MANIFEST b/MANIFEST new file mode 100644 index 0000000..71786e0 --- /dev/null +++ b/MANIFEST @@ -0,0 +1,22 @@ +Changes +README +Makefile.PL +MANIFEST +Deep.pm +t/01basic.t +t/02hash.t +t/03bighash.t +t/04array.t +t/05bigarray.t +t/06reindex.t +t/07error.t +t/08locking.t +t/09deephash.t +t/10deeparray.t +t/11largekeys.t +t/12optimize.t +t/13clone.t +t/14setpack.t +t/15filter.t +t/16digest.t +t/17circular.t diff --git a/Makefile.PL b/Makefile.PL new file mode 100644 index 0000000..04fd018 --- /dev/null +++ b/Makefile.PL @@ -0,0 +1,13 @@ +## +# Makefile.PL for DBM::Deep +## + +use ExtUtils::MakeMaker; + +WriteMakefile( + 'NAME' => 'DBM::Deep', + 'VERSION_FROM' => 'Deep.pm', + 'PREREQ_PM' => { + Digest::MD5 => 1.0 + } +); diff --git a/README b/README new file mode 100644 index 0000000..d0b8b42 --- /dev/null +++ b/README @@ -0,0 +1,1052 @@ +NAME + DBM::Deep - A pure perl multi-level hash/array DBM + +SYNOPSIS + use DBM::Deep; + my $db = new DBM::Deep "foo.db"; + + $db->{key} = 'value'; # tie() style + print $db->{key}; + + $db->put('key', 'value'); # OO style + print $db->get('key'); + + # true multi-level support + $db->{my_complex} = [ + 'hello', { perl => 'rules' }, + 42, 99 ]; + +DESCRIPTION + A unique flat-file database module, written in pure perl. True + multi-level hash/array support (unlike MLDBM, which is faked), hybrid OO + / tie() interface, cross-platform FTPable files, and quite fast. Can + handle millions of keys and unlimited hash levels without significant + slow-down. Written from the ground-up in pure perl -- this is NOT a + wrapper around a C-based DBM. Out-of-the-box compatibility with Unix, + Mac OS X and Windows. + +INSTALLATION + Hopefully you are using CPAN's excellent Perl module, which will + download and install the module for you. If not, get the tarball, and + run these commands: + + tar zxf DBM-Deep-* + cd DBM-Deep-* + perl Makefile.PL + make + make test + make install + +SETUP + Construction can be done OO-style (which is the recommended way), or + using Perl's tie() function. Both are examined here. + + OO CONSTRUCTION + The recommended way to construct a DBM::Deep object is to use the new() + method, which gets you a blessed, tied hash or array reference. + + my $db = new DBM::Deep "foo.db"; + + This opens a new database handle, mapped to the file "foo.db". If this + file does not exist, it will automatically be created. DB files are + opened in "r+" (read/write) mode, and the type of object returned is a + hash, unless otherwise specified (see OPTIONS below). + + You can pass a number of options to the constructor to specify things + like locking, autoflush, etc. This is done by passing an inline hash: + + my $db = new DBM::Deep( + file => "foo.db", + locking => 1, + autoflush => 1 + ); + + Notice that the filename is now specified *inside* the hash with the + "file" parameter, as opposed to being the sole argument to the + constructor. This is required if any options are specified. See OPTIONS + below for the complete list. + + You can also start with an array instead of a hash. For this, you must + specify the "type" parameter: + + my $db = new DBM::Deep( + file => "foo.db", + type => DBM::Deep::TYPE_ARRAY + ); + + Note: Specifing the "type" parameter only takes effect when beginning a + new DB file. If you create a DBM::Deep object with an existing file, the + "type" will be loaded from the file header, and ignored if it is passed + to the constructor. + + TIE CONSTRUCTION + Alternatively, you can create a DBM::Deep handle by using Perl's + built-in tie() function. This is not ideal, because you get only a + basic, tied hash (or array) which is not blessed, so you can't call any + functions on it. + + my %hash; + tie %hash, "DBM::Deep", "foo.db"; + + my @array; + tie @array, "DBM::Deep", "bar.db"; + + As with the OO constructor, you can replace the DB filename parameter + with a hash containing one or more options (see OPTIONS just below for + the complete list). + + tie %hash, "DBM::Deep", { + file => "foo.db", + locking => 1, + autoflush => 1 + }; + + OPTIONS + There are a number of options that can be passed in when constructing + your DBM::Deep objects. These apply to both the OO- and tie- based + approaches. + + * file + Filename of the DB file to link the handle to. You can pass a full + absolute filesystem path, partial path, or a plain filename if the + file is in the current working directory. This is a required + parameter. + + * mode + File open mode (read-only, read-write, etc.) string passed to Perl's + FileHandle module. This is an optional parameter, and defaults to + "r+" (read/write). Note: If the default (r+) mode is selected, the + file will also be auto- created if it doesn't exist. + + * type + This parameter specifies what type of object to create, a hash or + array. Use one of these two constants: "DBM::Deep::TYPE_HASH" or + "DBM::Deep::TYPE_ARRAY". This only takes effect when beginning a new + file. This is an optional parameter, and defaults to hash. + + * locking + Specifies whether locking is to be enabled. DBM::Deep uses Perl's + Fnctl flock() function to lock the database in exclusive mode for + writes, and shared mode for reads. Pass any true value to enable. + This affects the base DB handle *and any child hashes or arrays* + that use the same DB file. This is an optional parameter, and + defaults to 0 (disabled). See LOCKING below for more. + + * autoflush + Specifies whether autoflush is to be enabled on the underlying + FileHandle. This obviously slows down write operations, but is + required if you may have multiple processes accessing the same DB + file (also consider enable *locking* or at least *volatile*). Pass + any true value to enable. This is an optional parameter, and + defaults to 0 (disabled). + + * volatile + If *volatile* mode is enabled, DBM::Deep will stat() the DB file + before each STORE() operation. This is required if an outside force + may change the size of the file between transactions. Locking also + implicitly enables volatile. This is useful if you want to use a + different locking system or write your own. Pass any true value to + enable. This is an optional parameter, and defaults to 0 (disabled). + + * autobless + If *autobless* mode is enabled, DBM::Deep will preserve blessed + hashes, and restore them when fetched. This is an experimental + feature, and does have side-effects. Basically, when hashes are + re-blessed into their original classes, they are no longer blessed + into the DBM::Deep class! So you won't be able to call any DBM::Deep + methods on them. You have been warned. This is an optional + parameter, and defaults to 0 (disabled). + + * filter_* + See FILTERS below. + + * debug + Setting *debug* mode will make all errors non-fatal, dump them out + to STDERR, and continue on. This is for debugging purposes only, and + probably not what you want. This is an optional parameter, and + defaults to 0 (disabled). + +TIE INTERFACE + With DBM::Deep you can access your databases using Perl's standard + hash/array syntax. Because all Deep objects are *tied* to hashes or + arrays, you can treat them as such. Deep will intercept all reads/writes + and direct them to the right place -- the DB file. This has nothing to + do with the "TIE CONSTRUCTION" section above. This simply tells you how + to use DBM::Deep using regular hashes and arrays, rather than calling + functions like "get()" and "put()" (although those work too). It is + entirely up to you how to want to access your databases. + + HASHES + You can treat any DBM::Deep object like a normal Perl hash reference. + Add keys, or even nested hashes (or arrays) using standard Perl syntax: + + my $db = new DBM::Deep "foo.db"; + + $db->{mykey} = "myvalue"; + $db->{myhash} = {}; + $db->{myhash}->{subkey} = "subvalue"; + + print $db->{myhash}->{subkey} . "\n"; + + You can even step through hash keys using the normal Perl "keys()" + function: + + foreach my $key (keys %$db) { + print "$key: " . $db->{$key} . "\n"; + } + + Remember that Perl's "keys()" function extracts *every* key from the + hash and pushes them onto an array, all before the loop even begins. If + you have an extra large hash, this may exhaust Perl's memory. Instead, + consider using Perl's "each()" function, which pulls keys/values one at + a time, using very little memory: + + while (my ($key, $value) = each %$db) { + print "$key: $value\n"; + } + + Please note that when using "each()", you should always pass a direct + hash reference, not a lookup. Meaning, you should never do this: + + # NEVER DO THIS + while (my ($key, $value) = each %{$db->{foo}}) { # BAD + + This causes an infinite loop, because for each iteration, Perl is + calling FETCH() on the $db handle, resulting in a "new" hash for foo + every time, so it effectively keeps returning the first key over and + over again. Instead, assign a temporary variable to "$db-"{foo}>, then + pass that to each(). + + ARRAYS + As with hashes, you can treat any DBM::Deep object like a normal Perl + array reference. This includes inserting, removing and manipulating + elements, and the "push()", "pop()", "shift()", "unshift()" and + "splice()" functions. The object must have first been created using type + "DBM::Deep::TYPE_ARRAY", or simply be a nested array reference inside a + hash. Example: + + my $db = new DBM::Deep( + file => "foo-array.db", + type => DBM::Deep::TYPE_ARRAY + ); + + $db->[0] = "foo"; + push @$db, "bar", "baz"; + unshift @$db, "bah"; + + my $last_elem = pop @$db; # baz + my $first_elem = shift @$db; # bah + my $second_elem = $db->[1]; # bar + + my $num_elements = scalar @$db; + +OO INTERFACE + In addition to the *tie()* interface, you can also use a standard OO + interface to manipulate all aspects of DBM::Deep databases. Each type of + object (hash or array) has its own methods, but both types share the + following common methods: "put()", "get()", "exists()", "delete()" and + "clear()". + + * put() + Stores a new hash key/value pair, or sets an array element value. + Takes two arguments, the hash key or array index, and the new value. + The value can be a scalar, hash ref or array ref. Returns true on + success, false on failure. + + $db->put("foo", "bar"); # for hashes + $db->put(1, "bar"); # for arrays + + * get() + Fetches the value of a hash key or array element. Takes one + argument: the hash key or array index. Returns a scalar, hash ref or + array ref, depending on the data type stored. + + my $value = $db->get("foo"); # for hashes + my $value = $db->get(1); # for arrays + + * exists() + Checks if a hash key or array index exists. Takes one argument: the + hash key or array index. Returns true if it exists, false if not. + + if ($db->exists("foo")) { print "yay!\n"; } # for hashes + if ($db->exists(1)) { print "yay!\n"; } # for arrays + + * delete() + Deletes one hash key/value pair or array element. Takes one + argument: the hash key or array index. Returns true on success, + false if not found. For arrays, the remaining elements located after + the deleted element are NOT moved over. The deleted element is + essentially just undefined, which is exactly how Perl's internal + arrays work. Please note that the space occupied by the deleted + key/value or element is not reused again -- see "UNUSED SPACE + RECOVERY" below for details and workarounds. + + $db->delete("foo"); # for hashes + $db->delete(1); # for arrays + + * clear() + Deletes all hash keys or array elements. Takes no arguments. No + return value. Please note that the space occupied by the deleted + keys/values or elements is not reused again -- see "UNUSED SPACE + RECOVERY" below for details and workarounds. + + $db->clear(); # hashes or arrays + + HASHES + For hashes, DBM::Deep supports all the common methods described above, + and the following additional methods: "first_key()" and "next_key()". + + * first_key() + Returns the "first" key in the hash. As with built-in Perl hashes, + keys are fetched in an undefined order (which appears random). Takes + no arguments, returns the key as a scalar value. + + my $key = $db->first_key(); + + * next_key() + Returns the "next" key in the hash, given the previous one as the + sole argument. Returns undef if there are no more keys to be + fetched. + + $key = $db->next_key($key); + + Here are some examples of using hashes: + + my $db = new DBM::Deep "foo.db"; + + $db->put("foo", "bar"); + print "foo: " . $db->get("foo") . "\n"; + + $db->put("baz", {}); # new child hash ref + $db->get("baz")->put("buz", "biz"); + print "buz: " . $db->get("baz")->get("buz") . "\n"; + + my $key = $db->first_key(); + while ($key) { + print "$key: " . $db->get($key) . "\n"; + $key = $db->next_key($key); + } + + if ($db->exists("foo")) { $db->delete("foo"); } + + ARRAYS + For arrays, DBM::Deep supports all the common methods described above, + and the following additional methods: "length()", "push()", "pop()", + "shift()", "unshift()" and "splice()". + + * length() + Returns the number of elements in the array. Takes no arguments. + + my $len = $db->length(); + + * push() + Adds one or more elements onto the end of the array. Accepts + scalars, hash refs or array refs. No return value. + + $db->push("foo", "bar", {}); + + * pop() + Fetches the last element in the array, and deletes it. Takes no + arguments. Returns undef if array is empty. Returns the element + value. + + my $elem = $db->pop(); + + * shift() + Fetches the first element in the array, deletes it, then shifts all + the remaining elements over to take up the space. Returns the + element value. This method is not recommended with large arrays -- + see "LARGE ARRAYS" below for details. + + my $elem = $db->shift(); + + * unshift() + Inserts one or more elements onto the beginning of the array, + shifting all existing elements over to make room. Accepts scalars, + hash refs or array refs. No return value. This method is not + recommended with large arrays -- see below for + details. + + $db->unshift("foo", "bar", {}); + + * splice() + Performs exactly like Perl's built-in function of the same name. See + "perldoc -f splice" for usage -- it is too complicated to document + here. This method is not recommended with large arrays -- see "LARGE + ARRAYS" below for details. + + Here are some examples of using arrays: + + my $db = new DBM::Deep( + file => "foo.db", + type => DBM::Deep::TYPE_ARRAY + ); + + $db->push("bar", "baz"); + $db->unshift("foo"); + $db->put(3, "buz"); + + my $len = $db->length(); + print "length: $len\n"; # 4 + + for (my $k=0; $k<$len; $k++) { + print "$k: " . $db->get($k) . "\n"; + } + + $db->splice(1, 2, "biz", "baf"); + + while (my $elem = shift @$db) { + print "shifted: $elem\n"; + } + +LOCKING + Enable automatic file locking by passing a true value to the "locking" + parameter when constructing your DBM::Deep object (see SETUP above). + + my $db = new DBM::Deep( + file => "foo.db", + locking => 1 + ); + + This causes Deep to "flock()" the underlying FileHandle object with + exclusive mode for writes, and shared mode for reads. This is required + if you have multiple processes accessing the same database file, to + avoid file corruption. Please note that "flock()" does NOT work for + files over NFS. See "DB OVER NFS" below for more. + + EXPLICIT LOCKING + You can explicitly lock a database, so it remains locked for multiple + transactions. This is done by calling the "lock()" method, and passing + an optional lock mode argument (defaults to exclusive mode). This is + particularly useful for things like counters, where the current value + needs to be fetched, then incremented, then stored again. + + $db->lock(); + my $counter = $db->get("counter"); + $counter++; + $db->put("counter", $counter); + $db->unlock(); + + # or... + + $db->lock(); + $db->{counter}++; + $db->unlock(); + + You can pass "lock()" an optional argument, which specifies which mode + to use (exclusive or shared). Use one of these two constants: + "DBM::Deep::LOCK_EX" or "DBM::Deep::LOCK_SH". These are passed directly + to "flock()", and are the same as the constants defined in Perl's + "Fcntl" module. + + $db->lock( DBM::Deep::LOCK_SH ); + # something here + $db->unlock(); + + If you want to implement your own file locking scheme, be sure to create + your DBM::Deep objects setting the "volatile" option to true. This hints + to Deep that the DB file may change between transactions. See "LOW-LEVEL + ACCESS" below for more. + +IMPORTING/EXPORTING + You can import existing complex structures by calling the "import()" + method, and export an entire database into an in-memory structure using + the "export()" method. Both are examined here. + + IMPORTING + Say you have an existing hash with nested hashes/arrays inside it. + Instead of walking the structure and adding keys/elements to the + database as you go, simply pass a reference to the "import()" method. + This recursively adds everything to an existing DBM::Deep object for + you. Here is an example: + + my $struct = { + key1 => "value1", + key2 => "value2", + array1 => [ "elem0", "elem1", "elem2" ], + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } + }; + + my $db = new DBM::Deep "foo.db"; + $db->import( $struct ); + + print $db->{key1} . "\n"; # prints "value1" + + This recursively imports the entire $struct object into $db, including + all nested hashes and arrays. If the DBM::Deep object contains exsiting + data, keys are merged with the existing ones, replacing if they already + exist. The "import()" method can be called on any database level (not + just the base level), and works with both hash and array DB types. + + Note: Make sure your existing structure has no circular references in + it. These will cause an infinite loop when importing. + + EXPORTING + Calling the "export()" method on an existing DBM::Deep object will + return a reference to a new in-memory copy of the database. The export + is done recursively, so all nested hashes/arrays are all exported to + standard Perl objects. Here is an example: + + my $db = new DBM::Deep "foo.db"; + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + $db->{hash1} = {}; + $db->{hash1}->{subkey1} = "subvalue1"; + $db->{hash1}->{subkey2} = "subvalue2"; + + my $struct = $db->export(); + + print $struct->{key1} . "\n"; # prints "value1" + + This makes a complete copy of the database in memory, and returns a + reference to it. The "export()" method can be called on any database + level (not just the base level), and works with both hash and array DB + types. Be careful of large databases -- you can store a lot more data in + a DBM::Deep object than an in-memory Perl structure. + + Note: Make sure your database has no circular references in it. These + will cause an infinite loop when exporting. + +FILTERS + DBM::Deep has a number of hooks where you can specify your own Perl + function to perform filtering on incoming or outgoing data. This is a + perfect way to extend the engine, and implement things like real-time + compression or encryption. Filtering applies to the base DB level, and + all child hashes / arrays. Filter hooks can be specified when your + DBM::Deep object is first constructed, or by calling the "set_filter()" + method at any time. There are four available filter hooks, described + below: + + * filter_store_key + This filter is called whenever a hash key is stored. It is passed + the incoming key, and expected to return a transformed key. + + * filter_store_value + This filter is called whenever a hash key or array element is + stored. It is passed the incoming value, and expected to return a + transformed value. + + * filter_fetch_key + This filter is called whenever a hash key is fetched (i.e. via + "first_key()" or "next_key()"). It is passed the transformed key, + and expected to return the plain key. + + * filter_fetch_value + This filter is called whenever a hash key or array element is + fetched. It is passed the transformed value, and expected to return + the plain value. + + Here are the two ways to setup a filter hook: + + my $db = new DBM::Deep( + file => "foo.db", + filter_store_value => \&my_filter_store, + filter_fetch_value => \&my_filter_fetch + ); + + # or... + + $db->set_filter( "filter_store_value", \&my_filter_store ); + $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); + + Your filter function will be called only when dealing with SCALAR keys + or values. When nested hashes and arrays are being stored/fetched, + filtering is bypassed. Filters are called as static functions, passed a + single SCALAR argument, and expected to return a single SCALAR value. If + you want to remove a filter, set the function reference to "undef": + + $db->set_filter( "filter_store_value", undef ); + + REAL-TIME ENCRYPTION EXAMPLE + Here is a working example that uses the *Crypt::Blowfish* module to do + real-time encryption / decryption of keys & values with DBM::Deep + Filters. Please visit + for more on + *Crypt::Blowfish*. You'll also need the *Crypt::CBC* module. + + use DBM::Deep; + use Crypt::Blowfish; + use Crypt::CBC; + + my $cipher = new Crypt::CBC({ + 'key' => 'my secret key', + 'cipher' => 'Blowfish', + 'iv' => '$KJh#(}q', + 'regenerate_key' => 0, + 'padding' => 'space', + 'prepend_iv' => 0 + }); + + my $db = new DBM::Deep( + file => "foo-encrypt.db", + filter_store_key => \&my_encrypt, + filter_store_value => \&my_encrypt, + filter_fetch_key => \&my_decrypt, + filter_fetch_value => \&my_decrypt, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_encrypt { + return $cipher->encrypt( $_[0] ); + } + sub my_decrypt { + return $cipher->decrypt( $_[0] ); + } + + REAL-TIME COMPRESSION EXAMPLE + Here is a working example that uses the *Compress::Zlib* module to do + real-time compression / decompression of keys & values with DBM::Deep + Filters. Please visit + for more on + *Compress::Zlib*. + + use DBM::Deep; + use Compress::Zlib; + + my $db = new DBM::Deep( + file => "foo-compress.db", + filter_store_key => \&my_compress, + filter_store_value => \&my_compress, + filter_fetch_key => \&my_decompress, + filter_fetch_value => \&my_decompress, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_compress { + return Compress::Zlib::memGzip( $_[0] ) ; + } + sub my_decompress { + return Compress::Zlib::memGunzip( $_[0] ) ; + } + + Note: Filtering of keys only applies to hashes. Array "keys" are + actually numerical index numbers, and are not filtered. + +ERROR HANDLING + Most DBM::Deep methods return a true value for success, and call die() + on failure. You can wrap calls in an eval block to catch the die. Also, + the actual error message is stored in an internal scalar, which can be + fetched by calling the "error()" method. + + my $db = new DBM::Deep "foo.db"; # create hash + eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call + + print $db->error(); # prints error message + + You can then call "clear_error()" to clear the current error state. + + $db->clear_error(); + + If you set the "debug" option to true when creating your DBM::Deep + object, all errors are considered NON-FATAL, and dumped to STDERR. This + is only for debugging purposes. + +LARGEFILE SUPPORT + If you have a 64-bit system, and your Perl is compiled with both + LARGEFILE and 64-bit support, you *may* be able to create databases + larger than 2 GB. DBM::Deep by default uses 32-bit file offset tags, but + these can be changed by calling the static "set_pack()" method before + you do anything else. + + DBM::Deep::set_pack(8, 'Q'); + + This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad + words instead of 32-bit longs. After setting these values your DB files + have a theoretical maximum size of 16 XB (exabytes). + + Note: Changing these values will NOT work for existing database files. + Only change this for new files, and make sure it stays set consistently + throughout the file's life. If you do set these values, you can no + longer access 32-bit DB files. You can, however, call "set_pack(4, 'N')" + to change back to 32-bit mode. + + Note: I have not personally tested files > 2 GB -- all my systems have + only a 32-bit Perl. However, I have received user reports that this does + indeed work! + +LOW-LEVEL ACCESS + If you require low-level access to the underlying FileHandle that Deep + uses, you can call the "fh()" method, which returns the handle: + + my $fh = $db->fh(); + + This method can be called on the root level of the datbase, or any child + hashes or arrays. All levels share a *root* structure, which contains + things like the FileHandle, a reference counter, and all your options + you specified when you created the object. You can get access to this + root structure by calling the "root()" method. + + my $root = $db->root(); + + This is useful for changing options after the object has already been + created, such as enabling/disabling locking, volatile or debug modes. + You can also store your own temporary user data in this structure (be + wary of name collision), which is then accessible from any child hash or + array. + +CUSTOM DIGEST ALGORITHM + DBM::Deep by default uses the *Message Digest 5* (MD5) algorithm for + hashing keys. However you can override this, and use another algorithm + (such as SHA-256) or even write your own. But please note that Deep + currently expects zero collisions, so your algorithm has to be + *perfect*, so to speak. Collision detection may be introduced in a later + version. + + You can specify a custom digest algorithm by calling the static + "set_digest()" function, passing a reference to a subroutine, and the + length of the algorithm's hashes (in bytes). This is a global static + function, which affects ALL Deep objects. Here is a working example that + uses a 256-bit hash from the *Digest::SHA256* module. Please see + for more. + + use DBM::Deep; + use Digest::SHA256; + + my $context = Digest::SHA256::new(256); + + DBM::Deep::set_digest( \&my_digest, 32 ); + + my $db = new DBM::Deep "foo-sha.db"; + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_digest { + return substr( $context->hash($_[0]), 0, 32 ); + } + + Note: Your returned digest strings must be EXACTLY the number of bytes + you specify in the "set_digest()" function (in this case 32). + +CIRCULAR REFERENCES + DBM::Deep has experimental support for circular references. Meaning you + can have a nested hash key or array element that points to a parent + object. This relationship is stored in the DB file, and is preserved + between sessions. Here is an example: + + my $db = new DBM::Deep "foo.db"; + + $db->{foo} = "bar"; + $db->{circle} = $db; # ref to self + + print $db->{foo} . "\n"; # prints "foo" + print $db->{circle}->{foo} . "\n"; # prints "foo" again + + One catch is, passing the object to a function that recursively walks + the object tree (such as *Data::Dumper* or even the built-in + "optimize()" or "export()" methods) will result in an infinite loop. The + other catch is, if you fetch the *key* of a circular reference (i.e. + using the "first_key()" or "next_key()" methods), you will get the + *target object's key*, not the ref's key. This gets even more + interesting with the above example, where the *circle* key points to the + base DB object, which technically doesn't have a key. So I made + DBM::Deep return "[base]" as the key name in that special case. + +CAVEATS / ISSUES / BUGS + This section describes all the known issues with DBM::Deep. It you have + found something that is not listed here, please send e-mail to + jhuckaby@cpan.org. + + UNUSED SPACE RECOVERY + One major caveat with Deep is that space occupied by existing keys and + values is not recovered when they are deleted. Meaning if you keep + deleting and adding new keys, your file will continuously grow. I am + working on this, but in the meantime you can call the built-in + "optimize()" method from time to time (perhaps in a crontab or + something) to recover all your unused space. + + $db->optimize(); # returns true on success + + This rebuilds the ENTIRE database into a new file, then moves it on top + of the original. The new file will have no unused space, thus it will + take up as little disk space as possible. Please note that this + operation can take a long time for large files, and you need enough disk + space to temporarily hold 2 copies of your DB file. The temporary file + is created in the same directory as the original, named with a ".tmp" + extension, and is deleted when the operation completes. Oh, and if + locking is enabled, the DB is automatically locked for the entire + duration of the copy. + + WARNING: Only call optimize() on the top-level node of the database, and + make sure there are no child references lying around. Deep keeps a + reference counter, and if it is greater than 1, optimize() will abort + and return undef. + + AUTOVIVIFICATION + Unfortunately, autovivification doesn't work with tied hashes. This + appears to be a bug in Perl's tie() system, as *Jakob Schmidt* + encountered the very same issue with his *DWH_FIle* module (see + ), and it is also + mentioned in the BUGS section for the *MLDBM* module ). Basically, on a new db + file, this does not work: + + $db->{foo}->{bar} = "hello"; + + Since "foo" doesn't exist, you cannot add "bar" to it. You end up with + "foo" being an empty hash. Try this instead, which works fine: + + $db->{foo} = { bar => "hello" }; + + As of Perl 5.8.7, this bug still exists. I have walked very carefully + through the execution path, and Perl indeed passes an empty hash to the + STORE() method. Probably a bug in Perl. + + FILE CORRUPTION + The current level of error handling in Deep is minimal. Files *are* + checked for a 32-bit signature on open(), but other corruption in files + can cause segmentation faults. Deep may try to seek() past the end of a + file, or get stuck in an infinite loop depending on the level of + corruption. File write operations are not checked for failure (for + speed), so if you happen to run out of disk space, Deep will probably + fail in a bad way. These things will be addressed in a later version of + DBM::Deep. + + DB OVER NFS + Beware of using DB files over NFS. Deep uses flock(), which works well + on local filesystems, but will NOT protect you from file corruption over + NFS. I've heard about setting up your NFS server with a locking daemon, + then using lockf() to lock your files, but your milage may vary there as + well. From what I understand, there is no real way to do it. However, if + you need access to the underlying FileHandle in Deep for using some + other kind of locking scheme like lockf(), see the "LOW-LEVEL ACCESS" + section above. + + COPYING OBJECTS + Beware of copying tied objects in Perl. Very strange things can happen. + Instead, use Deep's "clone()" method which safely copies the object and + returns a new, blessed, tied hash or array to the same level in the DB. + + my $copy = $db->clone(); + + LARGE ARRAYS + Beware of using "shift()", "unshift()" or "splice()" with large arrays. + These functions cause every element in the array to move, which can be + murder on DBM::Deep, as every element has to be fetched from disk, then + stored again in a different location. This may be addressed in a later + version. + +PERFORMANCE + This section discusses DBM::Deep's speed and memory usage. + + SPEED + Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, + such as the almighty *BerkeleyDB*. But it makes up for it in features + like true multi-level hash/array support, and cross-platform FTPable + files. Even so, DBM::Deep is still pretty fast, and the speed stays + fairly consistent, even with huge databases. Here is some test data: + + Adding 1,000,000 keys to new DB file... + + At 100 keys, avg. speed is 2,703 keys/sec + At 200 keys, avg. speed is 2,642 keys/sec + At 300 keys, avg. speed is 2,598 keys/sec + At 400 keys, avg. speed is 2,578 keys/sec + At 500 keys, avg. speed is 2,722 keys/sec + At 600 keys, avg. speed is 2,628 keys/sec + At 700 keys, avg. speed is 2,700 keys/sec + At 800 keys, avg. speed is 2,607 keys/sec + At 900 keys, avg. speed is 2,190 keys/sec + At 1,000 keys, avg. speed is 2,570 keys/sec + At 2,000 keys, avg. speed is 2,417 keys/sec + At 3,000 keys, avg. speed is 1,982 keys/sec + At 4,000 keys, avg. speed is 1,568 keys/sec + At 5,000 keys, avg. speed is 1,533 keys/sec + At 6,000 keys, avg. speed is 1,787 keys/sec + At 7,000 keys, avg. speed is 1,977 keys/sec + At 8,000 keys, avg. speed is 2,028 keys/sec + At 9,000 keys, avg. speed is 2,077 keys/sec + At 10,000 keys, avg. speed is 2,031 keys/sec + At 20,000 keys, avg. speed is 1,970 keys/sec + At 30,000 keys, avg. speed is 2,050 keys/sec + At 40,000 keys, avg. speed is 2,073 keys/sec + At 50,000 keys, avg. speed is 1,973 keys/sec + At 60,000 keys, avg. speed is 1,914 keys/sec + At 70,000 keys, avg. speed is 2,091 keys/sec + At 80,000 keys, avg. speed is 2,103 keys/sec + At 90,000 keys, avg. speed is 1,886 keys/sec + At 100,000 keys, avg. speed is 1,970 keys/sec + At 200,000 keys, avg. speed is 2,053 keys/sec + At 300,000 keys, avg. speed is 1,697 keys/sec + At 400,000 keys, avg. speed is 1,838 keys/sec + At 500,000 keys, avg. speed is 1,941 keys/sec + At 600,000 keys, avg. speed is 1,930 keys/sec + At 700,000 keys, avg. speed is 1,735 keys/sec + At 800,000 keys, avg. speed is 1,795 keys/sec + At 900,000 keys, avg. speed is 1,221 keys/sec + At 1,000,000 keys, avg. speed is 1,077 keys/sec + + This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & + Perl 5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM. The hash + keys and values were between 6 - 12 chars in length. The DB file ended + up at 210MB. Run time was 12 min 3 sec. + + MEMORY USAGE + One of the great things about DBM::Deep is that it uses very little + memory. Even with huge databases (1,000,000+ keys) you will not see much + increased memory on your process. Deep relies solely on the filesystem + for storing and fetching data. Here is output from */usr/bin/top* before + even opening a database handle: + + PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND + 22831 root 11 0 2716 2716 1296 R 0.0 0.2 0:07 perl + + Basically the process is taking 2,716K of memory. And here is the same + process after storing and fetching 1,000,000 keys: + + PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND + 22831 root 14 0 2772 2772 1328 R 0.0 0.2 13:32 perl + + Notice the memory usage increased by only 56K. Test was performed on a + 700mHz x86 box running Linux RedHat 7.2 & Perl 5.6.1. + +DB FILE FORMAT + In case you were interested in the underlying DB file format, it is + documented here in this section. You don't need to know this to use the + module, it's just included for reference. + + SIGNATURE + DBM::Deep files always start with a 32-bit signature to identify the + file type. This is at offset 0. The signature is "DPDB" in network byte + order. This is checked upon each file open(). + + TAG + The DBM::Deep file is in a *tagged format*, meaning each section of the + file has a standard header containing the type of data, the length of + data, and then the data itself. The type is a single character (1 byte), + the length is a 32-bit unsigned long in network byte order, and the data + is, well, the data. Here is how it unfolds: + + MASTER INDEX + Immediately after the 32-bit file signature is the *Master Index* + record. This is a standard tag header followed by 1024 bytes (in 32-bit + mode) or 2048 bytes (in 64-bit mode) of data. The type is *H* for hash + or *A* for array, depending on how the DBM::Deep object was constructed. + + The index works by looking at a *MD5 Hash* of the hash key (or array + index number). The first 8-bit char of the MD5 signature is the offset + into the index, multipled by 4 in 32-bit mode, or 8 in 64-bit mode. The + value of the index element is a file offset of the next tag for the + key/element in question, which is usually a *Bucket List* tag (see + below). + + The next tag *could* be another index, depending on how many + keys/elements exist. See RE-INDEXING below for details. + + BUCKET LIST + A *Bucket List* is a collection of 16 MD5 hashes for keys/elements, plus + file offsets to where the actual data is stored. It starts with a + standard tag header, with type *B*, and a data size of 320 bytes in + 32-bit mode, or 384 bytes in 64-bit mode. Each MD5 hash is stored in + full (16 bytes), plus the 32-bit or 64-bit file offset for the *Bucket* + containing the actual data. When the list fills up, a *Re-Index* + operation is performed (See RE-INDEXING below). + + BUCKET + A *Bucket* is a tag containing a key/value pair (in hash mode), or a + index/value pair (in array mode). It starts with a standard tag header + with type *D* for scalar data (string, binary, etc.), or it could be a + nested hash (type *H*) or array (type *A*). The value comes just after + the tag header. The size reported in the tag header is only for the + value, but then, just after the value is another size (32-bit unsigned + long) and then the plain key itself. Since the value is likely to be + fetched more often than the plain key, I figured it would be *slightly* + faster to store the value first. + + If the type is *H* (hash) or *A* (array), the value is another *Master + Index* record for the nested structure, where the process begins all + over again. + + RE-INDEXING + After a *Bucket List* grows to 16 records, its allocated space in the + file is exhausted. Then, when another key/element comes in, the list is + converted to a new index record. However, this index will look at the + next char in the MD5 hash, and arrange new Bucket List pointers + accordingly. This process is called *Re-Indexing*. Basically, a new + index tag is created at the file EOF, and all 17 (16 + new one) + keys/elements are removed from the old Bucket List and inserted into the + new index. Several new Bucket Lists are created in the process, as a new + MD5 char from the key is being examined (it is unlikely that the keys + will all share the same next char of their MD5s). + + Because of the way the *MD5* algorithm works, it is impossible to tell + exactly when the Bucket Lists will turn into indexes, but the first + round tends to happen right around 4,000 keys. You will see a *slight* + decrease in performance here, but it picks back up pretty quick (see + SPEED above). Then it takes a lot more keys to exhaust the next level of + Bucket Lists. It's right around 900,000 keys. This process can continue + nearly indefinitely -- right up until the point the *MD5* signatures + start colliding with each other, and this is EXTREMELY rare -- like + winning the lottery 5 times in a row AND getting struck by lightning + while you are walking to cash in your tickets. Theoretically, since + *MD5* hashes are 128-bit values, you *could* have up to + 340,282,366,921,000,000,000,000,000,000,000,000,000 keys/elements (I + believe this is 340 unodecillion, but don't quote me). + + STORING + When a new key/element is stored, the key (or index number) is first ran + through *Digest::MD5* to get a 128-bit signature (example, in hex: + b05783b0773d894396d475ced9d2f4f6). Then, the *Master Index* record is + checked for the first char of the signature (in this case *b*). If it + does not exist, a new *Bucket List* is created for our key (and the next + 15 future keys that happen to also have *b* as their first MD5 char). + The entire MD5 is written to the *Bucket List* along with the offset of + the new *Bucket* record (EOF at this point, unless we are replacing an + existing *Bucket*), where the actual data will be stored. + + FETCHING + Fetching an existing key/element involves getting a *Digest::MD5* of the + key (or index number), then walking along the indexes. If there are + enough keys/elements in this DB level, there might be nested indexes, + each linked to a particular char of the MD5. Finally, a *Bucket List* is + pointed to, which contains up to 16 full MD5 hashes. Each is checked for + equality to the key in question. If we found a match, the *Bucket* tag + is loaded, where the value and plain key are stored. + + Fetching the plain key occurs when calling the *first_key()* and + *next_key()* methods. In this process the indexes are walked + systematically, and each key fetched in increasing MD5 order (which is + why it appears random). Once the *Bucket* is found, the value is skipped + the plain key returned instead. Note: Do not count on keys being fetched + as if the MD5 hashes were alphabetically sorted. This only happens on an + index-level -- as soon as the *Bucket Lists* are hit, the keys will come + out in the order they went in -- so it's pretty much undefined how the + keys will come out -- just like Perl's built-in hashes. + +AUTHOR + Joseph Huckaby, jhuckaby@cpan.org + + Special thanks to Adam Sah and Rich Gaushell! You know why :-) + +SEE ALSO + perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), + nfs(5), Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3) + +LICENSE + Copyright (c) 2002-2005 Joseph Huckaby. All Rights Reserved. This is + free software, you may use it and distribute it under the same terms as + Perl itself. + diff --git a/t/01basic.t b/t/01basic.t new file mode 100644 index 0000000..b48d8cd --- /dev/null +++ b/t/01basic.t @@ -0,0 +1,20 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + print "ERROR: " . $db->error(); + ok(0); + exit(0); +} +else { ok(1); } diff --git a/t/02hash.t b/t/02hash.t new file mode 100644 index 0000000..f9d0c08 --- /dev/null +++ b/t/02hash.t @@ -0,0 +1,121 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 15 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# put/get key +## +$db->{key1} = "value1"; +ok( $db->{key1} eq "value1" ); + +$db->put("key2", "value2"); +ok( $db->get("key2") eq "value2" ); + +## +# key exists +## +ok( $db->exists("key1") ); +ok( exists $db->{key2} ); + +## +# count keys +## +ok( scalar keys %$db == 2 ); + +## +# step through keys +## +my $temphash = {}; +while ( my ($key, $value) = each %$db ) { + $temphash->{$key} = $value; +} + +ok( ($temphash->{key1} eq "value1") && ($temphash->{key2} eq "value2") ); + +$temphash = {}; +my $key = $db->first_key(); +while ($key) { + $temphash->{$key} = $db->get($key); + $key = $db->next_key($key); +} + +ok( ($temphash->{key1} eq "value1") && ($temphash->{key2} eq "value2") ); + +## +# delete keys +## +ok( delete $db->{key1} ); +ok( $db->delete("key2") ); + +ok( scalar keys %$db == 0 ); + +## +# delete all keys +## +$db->put("another", "value"); +$db->clear(); + +ok( scalar keys %$db == 0 ); + +## +# replace key +## +$db->put("key1", "value1"); +$db->put("key1", "value2"); + +ok( $db->get("key1") eq "value2" ); + +$db->put("key1", "value222222222222222222222222"); + +ok( $db->get("key1") eq "value222222222222222222222222" ); + +## +# Make sure DB still works after closing / opening +## +undef $db; +$db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} +ok( $db->get("key1") eq "value222222222222222222222222" ); + +## +# Make sure keys are still fetchable after replacing values +# with smaller ones (bug found by John Cardenas, DBM::Deep 0.93) +## +$db->clear(); +$db->put("key1", "long value here"); +$db->put("key2", "longer value here"); + +$db->put("key1", "short value"); +$db->put("key2", "shorter v"); + +my $first_key = $db->first_key(); +my $next_key = $db->next_key($first_key); + +ok( + (($first_key eq "key1") || ($first_key eq "key2")) && + (($next_key eq "key1") || ($next_key eq "key2")) && + ($first_key ne $next_key) +); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/03bighash.t b/t/03bighash.t new file mode 100644 index 0000000..6fd7615 --- /dev/null +++ b/t/03bighash.t @@ -0,0 +1,41 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# put/get many keys +## +my $max_keys = 1000; + +for (my $k=0; $k<$max_keys; $k++) { + $db->put( "hello" . $k, "there" . ($k * 2) ); +} + +my $count = 0; + +for (my $k=0; $k<$max_keys; $k++) { + if ($db->get("hello" . $k) eq "there" . ($k * 2)) { $count++; } +} + +ok( $count == $max_keys ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/04array.t b/t/04array.t new file mode 100644 index 0000000..8cf95f1 --- /dev/null +++ b/t/04array.t @@ -0,0 +1,131 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 11 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db", + type => DBM::Deep::TYPE_ARRAY +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# basic put/get/push +## +$db->[0] = "elem0"; +push @$db, "elem1"; +$db->put(2, "elem2"); + +ok( + ($db->[0] eq "elem0") && + ($db->[1] eq "elem1") && + ($db->[2] eq "elem2") +); + +ok( + ($db->get(0) eq "elem0") && + ($db->get(1) eq "elem1") && + ($db->get(2) eq "elem2") +); + +## +# pop, shift +## +my $popped = pop @$db; +ok( + ($db->length() == 2) && + ($db->[0] eq "elem0") && + ($db->[1] eq "elem1") && + ($popped eq "elem2") +); + +my $shifted = shift @$db; +ok( + ($db->length() == 1) && + ($db->[0] eq "elem1") && + ($shifted eq "elem0") +); + +## +# unshift +## +$db->unshift( "new elem" ); +ok( + ($db->length() == 2) && + ($db->[0] eq "new elem") && + ($db->[1] eq "elem1") +); + +## +# delete +## +$db->delete(0); +ok( + ($db->length() == 2) && + (!$db->[0]) && + ($db->[1] eq "elem1") +); + +## +# exists +## +ok( $db->exists(1) ); + +## +# clear +## +$db->clear(); +ok( $db->length() == 0 ); + +## +# multi-push +## +$db->push( "elem first", "elem middle", "elem last" ); +ok( + ($db->length() == 3) && + ($db->[0] eq "elem first") && + ($db->[1] eq "elem middle") && + ($db->[2] eq "elem last") +); + +## +# splice +## +$db->splice( 1, 1, "middle A", "middle B" ); +ok( + ($db->length() == 4) && + ($db->[0] eq "elem first") && + ($db->[1] eq "middle A") && + ($db->[2] eq "middle B") && + ($db->[3] eq "elem last") +); + +## +# splice with length of 0 +## +$db->splice( 3, 0, "middle C" ); +ok( + ($db->length() == 5) && + ($db->[0] eq "elem first") && + ($db->[1] eq "middle A") && + ($db->[2] eq "middle B") && + ($db->[3] eq "middle C") && + ($db->[4] eq "elem last") +); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/05bigarray.t b/t/05bigarray.t new file mode 100644 index 0000000..775f64e --- /dev/null +++ b/t/05bigarray.t @@ -0,0 +1,44 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db", + type => DBM::Deep::TYPE_ARRAY +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# put/get many keys +## +my $max_keys = 1000; + +for (my $k=0; $k<$max_keys; $k++) { + $db->[$k] = "there" . ($k * 2); +} + +my $count = 0; + +for (my $k=0; $k<$max_keys; $k++) { + if ($db->[$k] eq "there" . ($k * 2)) { $count++; } +} + +ok( $count == $max_keys ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/06reindex.t b/t/06reindex.t new file mode 100644 index 0000000..84bb64c --- /dev/null +++ b/t/06reindex.t @@ -0,0 +1,41 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# put/get many keys +## +my $max_keys = 3000; + +for (my $k=0; $k<$max_keys; $k++) { + $db->put( "hello" . $k, "there" . ($k * 2) ); +} + +my $count = 0; + +for (my $k=0; $k<$max_keys; $k++) { + if ($db->get("hello" . $k) eq "there" . ($k * 2)) { $count++; } +} + +ok( $count == $max_keys ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/07error.t b/t/07error.t new file mode 100644 index 0000000..1b79f28 --- /dev/null +++ b/t/07error.t @@ -0,0 +1,32 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 2 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; + +## +# cause an error +## +eval { $db->push("foo"); }; # ERROR -- array-only method + +ok( $db->error() ); + +$db->clear_error(); + +ok( !$db->error() ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/08locking.t b/t/08locking.t new file mode 100644 index 0000000..7ec1f38 --- /dev/null +++ b/t/08locking.t @@ -0,0 +1,41 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 2 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db", + locking => 1 +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# basic put/get +## +$db->{key1} = "value1"; +ok( $db->{key1} eq "value1" ); + +## +# explicit lock +## +$db->lock( DBM::Deep::LOCK_EX ); +$db->{key1} = "value2"; +$db->unlock(); +ok( $db->{key1} eq "value2" ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/09deephash.t b/t/09deephash.t new file mode 100644 index 0000000..be9bf46 --- /dev/null +++ b/t/09deephash.t @@ -0,0 +1,63 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 3 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db" +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# basic deep hash +## +$db->{company} = {}; +$db->{company}->{name} = "My Co."; +$db->{company}->{employees} = {}; +$db->{company}->{employees}->{"Henry Higgins"} = {}; +$db->{company}->{employees}->{"Henry Higgins"}->{salary} = 90000; + +ok( $db->{company}->{name} eq "My Co." ); +ok( $db->{company}->{employees}->{"Henry Higgins"}->{salary} == 90000 ); + +## +# super deep hash +## +my $max_levels = 1000; +$db->{base_level} = {}; +my $temp_db = $db->{base_level}; + +for (my $k=0; $k<$max_levels; $k++) { + $temp_db->{"level".$k} = {}; + $temp_db = $temp_db->{"level".$k}; +} +$temp_db->{deepkey} = "deepvalue"; +undef $temp_db; + +## +# start over, now validate all levels +## +$temp_db = $db->{base_level}; +for (my $k=0; $k<$max_levels; $k++) { + if ($temp_db) { $temp_db = $temp_db->{"level".$k}; } +} +ok( $temp_db && ($temp_db->{deepkey} eq "deepvalue") ); + +undef $temp_db; + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/10deeparray.t b/t/10deeparray.t new file mode 100644 index 0000000..4e8ae8d --- /dev/null +++ b/t/10deeparray.t @@ -0,0 +1,52 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db", + type => DBM::Deep::TYPE_ARRAY +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# super deep array +## +my $max_levels = 500; +$db->[0] = []; +my $temp_db = $db->[0]; + +for (my $k=0; $k<$max_levels; $k++) { + $temp_db->[$k] = []; + $temp_db = $temp_db->[$k]; +} +$temp_db->[0] = "deepvalue"; +undef $temp_db; + +## +# start over, now validate all levels +## +$temp_db = $db->[0]; +for (my $k=0; $k<$max_levels; $k++) { + if ($temp_db) { $temp_db = $temp_db->[$k]; } +} +ok( $temp_db && ($temp_db->[0] eq "deepvalue") ); + +undef $temp_db; + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/11largekeys.t b/t/11largekeys.t new file mode 100644 index 0000000..83c8161 --- /dev/null +++ b/t/11largekeys.t @@ -0,0 +1,55 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 4 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db" +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# large keys +## +my $key1 = "Now is the time for all good men to come to the aid of their country." x 100; +my $key2 = "The quick brown fox jumped over the lazy, sleeping dog." x 1000; + +$db->put($key1, "value1"); +$db->put($key2, "value2"); + +ok( + ($db->get($key1) eq "value1") && + ($db->get($key2) eq "value2") +); + +my $test_key = $db->first_key(); +ok( + ($test_key eq $key1) || + ($test_key eq $key2) +); + +$test_key = $db->next_key($test_key); +ok( + ($test_key eq $key1) || + ($test_key eq $key2) +); + +$test_key = $db->next_key($test_key); +ok( !$test_key ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/12optimize.t b/t/12optimize.t new file mode 100644 index 0000000..6b52c8b --- /dev/null +++ b/t/12optimize.t @@ -0,0 +1,71 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 3 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db", + autoflush => 1 +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# create some unused space +## +$db->{key1} = "value1"; +$db->{key2} = "value2"; + +$db->{a} = {}; +$db->{a}->{b} = []; + +my $b = $db->{a}->{b}; +$b->[0] = 1; +$b->[1] = 2; +$b->[2] = {}; +$b->[2]->{c} = []; + +my $c = $b->[2]->{c}; +$c->[0] = 'd'; +$c->[1] = {}; +$c->[1]->{e} = 'f'; + +undef $c; +undef $b; + +delete $db->{a}; + +## +# take byte count readings before, and after optimize +## +my $before = (stat($db->fh()))[7]; +my $result = $db->optimize(); +my $after = (stat($db->fh()))[7]; + +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +ok( $result ); +ok( $after < $before ); # make sure file shrunk + +ok( + ($db->{key1} eq "value1") && + ($db->{key2} eq "value2") +); # make sure content is still there + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/13clone.t b/t/13clone.t new file mode 100644 index 0000000..efc30b6 --- /dev/null +++ b/t/13clone.t @@ -0,0 +1,46 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 2 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db" +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +$db->{key1} = "value1"; + +## +# clone db handle, make sure both are usable +## +my $clone = $db->clone(); +$clone->{key2} = "value2"; + +ok( + ($db->{key1} eq "value1") && + ($db->{key2} eq "value2") +); + +ok( + ($clone->{key1} eq "value1") && + ($clone->{key2} eq "value2") +); + +undef $clone; + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/14setpack.t b/t/14setpack.t new file mode 100644 index 0000000..8f96f74 --- /dev/null +++ b/t/14setpack.t @@ -0,0 +1,51 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db", + autoflush => 1 +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} +$db->{key1} = "value1"; +$db->{key2} = "value2"; +my $before = (stat($db->fh()))[7]; +undef $db; + +## +# set pack to 2-byte (16-bit) words +## +DBM::Deep::set_pack(2, 'S'); + +unlink "test.db"; +$db = new DBM::Deep( + file => "test.db", + autoflush => 1 +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} +$db->{key1} = "value1"; +$db->{key2} = "value2"; +my $after = (stat($db->fh()))[7]; +undef $db; + +ok( $after < $before ); + +## +# close, delete file, exit +## +# undef $db; +unlink "test.db"; +exit(0); diff --git a/t/15filter.t b/t/15filter.t new file mode 100644 index 0000000..e5cb588 --- /dev/null +++ b/t/15filter.t @@ -0,0 +1,81 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 4 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db" +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# First try store filters only (values will be unfiltered) +## +$db->set_filter( 'store_key', \&my_filter_store_key ); +$db->set_filter( 'store_value', \&my_filter_store_value ); + +$db->{key1} = "value1"; +$db->{key2} = "value2"; + +ok( + ($db->{key1} eq "MYFILTERvalue1") && + ($db->{key2} eq "MYFILTERvalue2") +); + +## +# Now try fetch filters as well +## +$db->set_filter( 'fetch_key', \&my_filter_fetch_key ); +$db->set_filter( 'fetch_value', \&my_filter_fetch_value ); + +ok( + ($db->{key1} eq "value1") && + ($db->{key2} eq "value2") +); + +## +# Try fetching keys as well as values +## +my $first_key = $db->first_key(); +my $next_key = $db->next_key($first_key); + +ok( + (($first_key eq "key1") || ($first_key eq "key2")) && + (($next_key eq "key1") || ($next_key eq "key2")) +); + +## +# Now clear all filters, and make sure all is unfiltered +## +$db->set_filter( 'store_key', undef ); +$db->set_filter( 'store_value', undef ); +$db->set_filter( 'fetch_key', undef ); +$db->set_filter( 'fetch_value', undef ); + +ok( + ($db->{MYFILTERkey1} eq "MYFILTERvalue1") && + ($db->{MYFILTERkey2} eq "MYFILTERvalue2") +); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); + +sub my_filter_store_key { return 'MYFILTER' . $_[0]; } +sub my_filter_store_value { return 'MYFILTER' . $_[0]; } + +sub my_filter_fetch_key { $_[0] =~ s/^MYFILTER//; return $_[0]; } +sub my_filter_fetch_value { $_[0] =~ s/^MYFILTER//; return $_[0]; } diff --git a/t/16digest.t b/t/16digest.t new file mode 100644 index 0000000..464b007 --- /dev/null +++ b/t/16digest.t @@ -0,0 +1,115 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 13 } + +use DBM::Deep; + +my $salt = 38473827; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep( + file => "test.db" +); +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# Set digest handler +## +DBM::Deep::set_digest( \&my_digest, 8 ); + +## +# put/get key +## +$db->{key1} = "value1"; +ok( $db->{key1} eq "value1" ); + +$db->put("key2", "value2"); +ok( $db->get("key2") eq "value2" ); + +## +# key exists +## +ok( $db->exists("key1") ); +ok( exists $db->{key2} ); + +## +# count keys +## +ok( scalar keys %$db == 2 ); + +## +# step through keys +## +my $temphash = {}; +while ( my ($key, $value) = each %$db ) { + $temphash->{$key} = $value; +} + +ok( ($temphash->{key1} eq "value1") && ($temphash->{key2} eq "value2") ); + +$temphash = {}; +my $key = $db->first_key(); +while ($key) { + $temphash->{$key} = $db->get($key); + $key = $db->next_key($key); +} + +ok( ($temphash->{key1} eq "value1") && ($temphash->{key2} eq "value2") ); + +## +# delete keys +## +ok( delete $db->{key1} ); +ok( $db->delete("key2") ); + +ok( scalar keys %$db == 0 ); + +## +# delete all keys +## +$db->put("another", "value"); +$db->clear(); + +ok( scalar keys %$db == 0 ); + +## +# replace key +## +$db->put("key1", "value1"); +$db->put("key1", "value2"); + +ok( $db->get("key1") eq "value2" ); + +$db->put("key1", "value222222222222222222222222"); + +ok( $db->get("key1") eq "value222222222222222222222222" ); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); + +sub my_digest { + ## + # Warning: This digest function is for testing ONLY + # It is NOT intended for actual use + ## + my $key = shift; + my $num = $salt; + + for (my $k=0; $k 2 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# put/get simple keys +## +$db->{key1} = "value1"; +$db->{key2} = "value2"; + +## +# Insert circular reference +## +$db->{circle} = $db; + +## +# Make sure keys exist in both places +## +ok( + ($db->{key1} eq "value1") && + ($db->{circle}->{key1} eq "value1") +); + +## +# Make sure changes are reflected in both places +## +$db->{key1} = "another value"; + +ok( + ($db->{key1} eq "another value") && + ($db->{circle}->{key1} eq "another value") +); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/18import.t b/t/18import.t new file mode 100644 index 0000000..3f44e19 --- /dev/null +++ b/t/18import.t @@ -0,0 +1,60 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# Create structure in memory +## +my $struct = { + key1 => "value1", + key2 => "value2", + array1 => [ "elem0", "elem1", "elem2" ], + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } +}; + +## +# Import entire thing +## +$db->import( $struct ); +undef $struct; + +## +# Make sure everything is there +## +ok( + ($db->{key1} eq "value1") && + ($db->{key2} eq "value2") && + ($db->{array1} && + ($db->{array1}->[0] eq "elem0") && + ($db->{array1}->[1] eq "elem1") && + ($db->{array1}->[2] eq "elem2") + ) && + ($db->{hash1} && + ($db->{hash1}->{subkey1} eq "subvalue1") && + ($db->{hash1}->{subkey2} eq "subvalue2") + ) +); + +## +# close, delete file, exit +## +undef $db; +unlink "test.db"; +exit(0); diff --git a/t/19export.t b/t/19export.t new file mode 100644 index 0000000..9ee2ad6 --- /dev/null +++ b/t/19export.t @@ -0,0 +1,60 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +## +# Create structure in DB +## +$db->import( + key1 => "value1", + key2 => "value2", + array1 => [ "elem0", "elem1", "elem2" ], + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } +); + +## +# Export entire thing +## +my $struct = $db->export(); + +## +# close, delete file +## +undef $db; +unlink "test.db"; + +## +# Make sure everything is here, outside DB +## +ok( + ($struct->{key1} eq "value1") && + ($struct->{key2} eq "value2") && + ($struct->{array1} && + ($struct->{array1}->[0] eq "elem0") && + ($struct->{array1}->[1] eq "elem1") && + ($struct->{array1}->[2] eq "elem2") + ) && + ($struct->{hash1} && + ($struct->{hash1}->{subkey1} eq "subvalue1") && + ($struct->{hash1}->{subkey2} eq "subvalue2") + ) +); + +exit(0); diff --git a/t/20crossref.t b/t/20crossref.t new file mode 100644 index 0000000..d972b9f --- /dev/null +++ b/t/20crossref.t @@ -0,0 +1,62 @@ +## +# DBM::Deep Test +## +use strict; +use Test; +BEGIN { plan tests => 1 } + +use DBM::Deep; + +## +# basic file open +## +unlink "test.db"; +my $db = new DBM::Deep "test.db"; +if ($db->error()) { + die "ERROR: " . $db->error(); +} + +unlink "test2.db"; +my $db2 = new DBM::Deep "test2.db"; +if ($db2->error()) { + die "ERROR: " . $db2->error(); +} + +## +# Create structure in $db +## +$db->import( + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } +); + +## +# Cross-ref nested hash accross DB objects +## +$db2->{hash1} = $db->{hash1}; + +## +# close, delete $db +## +undef $db; +unlink "test.db"; + +## +# Make sure $db2 has copy of $db's hash structure +## +ok( + ($db2->{hash1} && + ($db2->{hash1}->{subkey1} eq "subvalue1") && + ($db2->{hash1}->{subkey2} eq "subvalue2") + ) +); + +## +# close, delete $db2, exit +## +undef $db2; +unlink "test2.db"; + +exit(0);