X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep.pm;h=aa231793e928a6ea26c822958fd3482813ecfbdb;hb=fb451ba69d35e7acbd996e3de8c073f6ce76d7ea;hp=b70f6962d7b068f805082147bd47f8ed19f2b2f9;hpb=8dbf89d641f23eb445cb54e16ff38e6b01b53e14;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index b70f696..aa23179 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -29,37 +29,28 @@ package DBM::Deep; # modify it under the same terms as Perl itself. ## +use 5.6.0; + use strict; +use warnings; + +our $VERSION = q(0.99_03); use Fcntl qw( :DEFAULT :flock :seek ); + +use Clone::Any '_clone_data'; use Digest::MD5 (); +use FileHandle::Fmode (); use Scalar::Util (); use DBM::Deep::Engine; - -use vars qw( $VERSION ); -$VERSION = q(0.99_01); - - -## -# Setup file and tag signatures. These should never change. -## -sub SIG_FILE () { 'DPDB' } -sub SIG_HASH () { 'H' } -sub SIG_ARRAY () { 'A' } -sub SIG_SCALAR () { 'S' } -sub SIG_NULL () { 'N' } -sub SIG_DATA () { 'D' } -sub SIG_INDEX () { 'I' } -sub SIG_BLIST () { 'B' } -sub SIG_SIZE () { 1 } +use DBM::Deep::File; ## # Setup constants for users to pass to new() ## -sub TYPE_HASH () { SIG_HASH } -sub TYPE_ARRAY () { SIG_ARRAY } -sub TYPE_SCALAR () { SIG_SCALAR } +sub TYPE_HASH () { DBM::Deep::Engine->SIG_HASH } +sub TYPE_ARRAY () { DBM::Deep::Engine->SIG_ARRAY } sub _get_args { my $proto = shift; @@ -111,33 +102,39 @@ sub new { return bless $self, $class; } +# This initializer is called from the various TIE* methods. new() calls tie(), +# which allows for a single point of entry. sub _init { - ## - # Setup $self and bless into this class. - ## my $class = shift; - my $args = shift; + my ($args) = @_; + + $args->{storage} = DBM::Deep::File->new( $args ) + unless exists $args->{storage}; + + # locking implicitly enables autoflush + if ($args->{locking}) { $args->{autoflush} = 1; } # These are the defaults to be optionally overridden below my $self = bless { type => TYPE_HASH, - base_offset => length(SIG_FILE), - engine => DBM::Deep::Engine->new, + base_offset => undef, + + parent => undef, + parent_key => undef, + + storage => undef, }, $class; + $self->{engine} = DBM::Deep::Engine->new( { %{$args}, obj => $self } ); + # Grab the parameters we want to use foreach my $param ( keys %$self ) { next unless exists $args->{$param}; - $self->{$param} = delete $args->{$param} + $self->{$param} = $args->{$param}; } - # locking implicitly enables autoflush - if ($args->{locking}) { $args->{autoflush} = 1; } - - $self->{root} = exists $args->{root} - ? $args->{root} - : DBM::Deep::_::Root->new( $args ); + $self->_engine->setup_fh( $self ); - $self->{engine}->setup_fh( $self ); + $self->_storage->set_db( $self ); return $self; } @@ -154,66 +151,14 @@ sub TIEARRAY { return DBM::Deep::Array->TIEARRAY( @_ ); } -#XXX Unneeded now ... -#sub DESTROY { -#} - sub lock { - ## - # If db locking is set, flock() the db file. If called multiple - # times before unlock(), then the same number of unlocks() must - # be called before the lock is released. - ## - my $self = $_[0]->_get_self; - my $type = $_[1]; - $type = LOCK_EX unless defined $type; - - if (!defined($self->_fh)) { return; } - - if ($self->_root->{locking}) { - if (!$self->_root->{locked}) { - flock($self->_fh, $type); - - # refresh end counter in case file has changed size - my @stats = stat($self->_root->{file}); - $self->_root->{end} = $stats[7]; - - # double-check file inode, in case another process - # has optimize()d our file while we were waiting. - if ($stats[1] != $self->_root->{inode}) { - $self->{engine}->close_fh( $self ); - $self->{engine}->setup_fh( $self ); - flock($self->_fh, $type); # re-lock - - # This may not be necessary after re-opening - $self->_root->{end} = (stat($self->_fh))[7]; # re-end - } - } - $self->_root->{locked}++; - - return 1; - } - - return; + my $self = shift->_get_self; + return $self->_storage->lock( $self, @_ ); } sub unlock { - ## - # If db locking is set, unlock the db file. See note in lock() - # regarding calling lock() multiple times. - ## - my $self = $_[0]->_get_self; - - if (!defined($self->_fh)) { return; } - - if ($self->_root->{locking} && $self->_root->{locked} > 0) { - $self->_root->{locked}--; - if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); } - - return 1; - } - - return; + my $self = shift->_get_self; + return $self->_storage->unlock( $self, @_ ); } sub _copy_value { @@ -224,8 +169,7 @@ sub _copy_value { ${$spot} = $value; } elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) { - my $type = $value->_type; - ${$spot} = $type eq TYPE_HASH ? {} : []; + ${$spot} = $value->_repr; $value->_copy_node( ${$spot} ); } else { @@ -245,46 +189,35 @@ sub _copy_value { } sub _copy_node { - ## - # Copy single level of keys or elements to new DB handle. - # Recurse for nested structures - ## - my $self = shift->_get_self; - my ($db_temp) = @_; - - if ($self->_type eq TYPE_HASH) { - my $key = $self->first_key(); - while ($key) { - my $value = $self->get($key); - $self->_copy_value( \$db_temp->{$key}, $value ); - $key = $self->next_key($key); - } - } - else { - my $length = $self->length(); - for (my $index = 0; $index < $length; $index++) { - my $value = $self->get($index); - $self->_copy_value( \$db_temp->[$index], $value ); - } - } + die "Must be implemented in a child class\n"; +} - return 1; +sub _repr { + die "Must be implemented in a child class\n"; } sub export { ## # Recursively export into standard Perl hashes and arrays. ## - my $self = $_[0]->_get_self; + my $self = shift->_get_self; - my $temp; - if ($self->_type eq TYPE_HASH) { $temp = {}; } - elsif ($self->_type eq TYPE_ARRAY) { $temp = []; } + my $temp = $self->_repr; $self->lock(); $self->_copy_node( $temp ); $self->unlock(); + # This will always work because $self, after _get_self() is a HASH + if ( $self->{parent} ) { + my $c = Scalar::Util::blessed( + $self->{parent}->get($self->{parent_key}) + ); + if ( $c && !$c->isa( 'DBM::Deep' ) ) { + bless $temp, $c; + } + } + return $temp; } @@ -292,56 +225,50 @@ sub import { ## # Recursively import Perl hash/array structure ## - #XXX This use of ref() seems to be ok if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore - my $self = $_[0]->_get_self; - my $struct = $_[1]; + my $self = shift->_get_self; + my ($struct) = @_; - #XXX This use of ref() seems to be ok + # struct is not a reference, so just import based on our type if (!ref($struct)) { - ## - # struct is not a reference, so just import based on our type - ## - shift @_; - - if ($self->_type eq TYPE_HASH) { $struct = {@_}; } - elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; } + $struct = $self->_repr( @_ ); } - my $r = Scalar::Util::reftype($struct) || ''; - if ($r eq "HASH" && $self->_type eq TYPE_HASH) { - foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); } - } - elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) { - $self->push( @$struct ); - } - else { - return $self->_throw_error("Cannot import: type mismatch"); + #XXX This isn't the best solution. Better would be to use Data::Walker, + #XXX but that's a lot more thinking than I want to do right now. + eval { + $self->begin_work; + $self->_import( _clone_data( $struct ) ); + $self->commit; + }; if ( $@ ) { + $self->rollback; + die $@; } return 1; } +#XXX Need to keep track of who has a fh to this file in order to +#XXX close them all prior to optimize on Win32/cygwin sub optimize { ## # Rebuild entire database into new file, then move # it back on top of original. ## - my $self = $_[0]->_get_self; + my $self = shift->_get_self; #XXX Need to create a new test for this -# if ($self->_root->{links} > 1) { -# return $self->_throw_error("Cannot optimize: reference count is greater than 1"); +# if ($self->_storage->{links} > 1) { +# $self->_throw_error("Cannot optimize: reference count is greater than 1"); # } + #XXX Do we have to lock the tempfile? + my $db_temp = DBM::Deep->new( - file => $self->_root->{file} . '.tmp', + file => $self->_storage->{file} . '.tmp', type => $self->_type ); - if (!$db_temp) { - return $self->_throw_error("Cannot optimize: failed to open temp file: $!"); - } $self->lock(); $self->_copy_node( $db_temp ); @@ -354,8 +281,8 @@ sub optimize { my $perms = $stats[2] & 07777; my $uid = $stats[4]; my $gid = $stats[5]; - chown( $uid, $gid, $self->_root->{file} . '.tmp' ); - chmod( $perms, $self->_root->{file} . '.tmp' ); + chown( $uid, $gid, $self->_storage->{file} . '.tmp' ); + chmod( $perms, $self->_storage->{file} . '.tmp' ); # q.v. perlport for more information on this variable if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) { @@ -366,18 +293,19 @@ sub optimize { # with a soft copy. ## $self->unlock(); - $self->{engine}->close_fh( $self ); + $self->_storage->close; } - if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) { - unlink $self->_root->{file} . '.tmp'; + if (!rename $self->_storage->{file} . '.tmp', $self->_storage->{file}) { + unlink $self->_storage->{file} . '.tmp'; $self->unlock(); - return $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); + $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); } $self->unlock(); - $self->{engine}->close_fh( $self ); - $self->{engine}->setup_fh( $self ); + $self->_storage->close; + $self->_storage->open; + $self->_engine->setup_fh( $self ); return 1; } @@ -386,12 +314,14 @@ sub clone { ## # Make copy of object and return ## - my $self = $_[0]->_get_self; + my $self = shift->_get_self; return DBM::Deep->new( - type => $self->_type, + type => $self->_type, base_offset => $self->_base_offset, - root => $self->_root + storage => $self->_storage, + parent => $self->{parent}, + parent_key => $self->{parent_key}, ); } @@ -407,12 +337,12 @@ sub clone { ## # Setup filter function for storing or fetching the key or value ## - my $self = $_[0]->_get_self; - my $type = lc $_[1]; - my $func = $_[2] ? $_[2] : undef; + my $self = shift->_get_self; + my $type = lc shift; + my $func = shift; if ( $is_legal_filter{$type} ) { - $self->_root->{"filter_$type"} = $func; + $self->_storage->{"filter_$type"} = $func; return 1; } @@ -420,42 +350,50 @@ sub clone { } } +sub begin_work { + my $self = shift->_get_self; + return $self->_storage->begin_transaction; +} + +sub rollback { + my $self = shift->_get_self; + return $self->_storage->end_transaction; +} + +sub commit { + my $self = shift->_get_self; + return $self->_storage->commit_transaction; +} + ## # Accessor methods ## -sub _root { - ## - # Get access to the root structure - ## +sub _engine { my $self = $_[0]->_get_self; - return $self->{root}; + return $self->{engine}; } -sub _fh { - ## - # Get access to the raw fh - ## +sub _storage { my $self = $_[0]->_get_self; - return $self->_root->{fh}; + return $self->{storage}; } sub _type { - ## - # Get type of current node (TYPE_HASH or TYPE_ARRAY) - ## my $self = $_[0]->_get_self; return $self->{type}; } sub _base_offset { - ## - # Get base_offset of current node (TYPE_HASH or TYPE_ARRAY) - ## my $self = $_[0]->_get_self; return $self->{base_offset}; } +sub _fh { + my $self = $_[0]->_get_self; + return $self->_storage->{fh}; +} + ## # Utility methods ## @@ -464,50 +402,101 @@ sub _throw_error { die "DBM::Deep: $_[1]\n"; } -sub _is_writable { - my $fh = shift; - (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); -} +sub _find_parent { + my $self = shift; -#sub _is_readable { -# my $fh = shift; -# (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); -#} + my $base = ''; + #XXX This if() is redundant + if ( my $parent = $self->{parent} ) { + my $child = $self; + while ( $parent->{parent} ) { + $base = ( + $parent->_type eq TYPE_HASH + ? "\{q{$child->{parent_key}}\}" + : "\[$child->{parent_key}\]" + ) . $base; + + $child = $parent; + $parent = $parent->{parent}; + } + if ( $base ) { + $base = "\$db->get( q{$child->{parent_key}} )->" . $base; + } + else { + $base = "\$db->get( q{$child->{parent_key}} )"; + } + } + return $base; +} sub STORE { ## # Store single hash key/value or array element in database. ## my $self = shift->_get_self; - my ($key, $value) = @_; + my ($key, $value, $orig_key) = @_; + $orig_key = $key unless defined $orig_key; - unless ( _is_writable( $self->_fh ) ) { + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { $self->_throw_error( 'Cannot write to a readonly filehandle' ); } - ## - # Request exclusive lock for writing - ## - $self->lock( LOCK_EX ); + #XXX The second condition needs to disappear + if ( !( $self->_type eq TYPE_ARRAY && $orig_key eq 'length') ) { + my $rhs; + + my $r = Scalar::Util::reftype( $value ) || ''; + if ( $r eq 'HASH' ) { + $rhs = '{}'; + } + elsif ( $r eq 'ARRAY' ) { + $rhs = '[]'; + } + elsif ( defined $value ) { + $rhs = "'$value'"; + } + else { + $rhs = "undef"; + } + + if ( my $c = Scalar::Util::blessed( $value ) ) { + $rhs = "bless $rhs, '$c'"; + } - my $md5 = $self->{engine}{digest}->($key); + my $lhs = $self->_find_parent; + if ( $lhs ) { + if ( $self->_type eq TYPE_HASH ) { + $lhs .= "->\{q{$orig_key}\}"; + } + else { + $lhs .= "->\[$orig_key\]"; + } - my $tag = $self->{engine}->find_bucket_list( $self, $md5, { create => 1 } ); + $lhs .= "=$rhs;"; + } + else { + $lhs = "\$db->put(q{$orig_key},$rhs);"; + } - # User may be storing a hash, in which case we do not want it run - # through the filtering system - if ( !ref($value) && $self->_root->{filter_store_value} ) { - $value = $self->_root->{filter_store_value}->( $value ); + $self->_storage->audit($lhs); } ## - # Add key/value to bucket list + # Request exclusive lock for writing ## - my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value ); + $self->lock( LOCK_EX ); + + # User may be storing a complex value, in which case we do not want it run + # through the filtering system. + if ( !ref($value) && $self->_storage->{filter_store_value} ) { + $value = $self->_storage->{filter_store_value}->( $value ); + } + + $self->_engine->write_value( $self->_base_offset, $key, $value, $orig_key ); $self->unlock(); - return $result; + return 1; } sub FETCH { @@ -515,32 +504,22 @@ sub FETCH { # Fetch single value or element given plain key or array index ## my $self = shift->_get_self; - my $key = shift; - - my $md5 = $self->{engine}{digest}->($key); + my ($key, $orig_key) = @_; + $orig_key = $key unless defined $orig_key; ## # Request shared lock for reading ## $self->lock( LOCK_SH ); - my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); - if (!$tag) { - $self->unlock(); - return; - } - - ## - # Get value from bucket list - ## - my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 ); + my $result = $self->_engine->read_value( $self->_base_offset, $key, $orig_key ); $self->unlock(); # Filters only apply to scalar values, so the ref check is making # sure the fetched bucket is a scalar, not a child hash or array. - return ($result && !ref($result) && $self->_root->{filter_fetch_value}) - ? $self->_root->{filter_fetch_value}->($result) + return ($result && !ref($result) && $self->_storage->{filter_fetch_value}) + ? $self->_storage->{filter_fetch_value}->($result) : $result; } @@ -548,42 +527,38 @@ sub DELETE { ## # Delete single key/value pair or element given plain key or array index ## - my $self = $_[0]->_get_self; - my $key = $_[1]; + my $self = shift->_get_self; + my ($key, $orig_key) = @_; + $orig_key = $key unless defined $orig_key; - unless ( _is_writable( $self->_fh ) ) { + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { $self->_throw_error( 'Cannot write to a readonly filehandle' ); } + if ( defined $orig_key ) { + my $lhs = $self->_find_parent; + if ( $lhs ) { + $self->_storage->audit( "delete $lhs;" ); + } + else { + $self->_storage->audit( "\$db->delete('$orig_key');" ); + } + } + ## # Request exclusive lock for writing ## $self->lock( LOCK_EX ); - my $md5 = $self->{engine}{digest}->($key); - - my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); - if (!$tag) { - $self->unlock(); - return; - } - ## # Delete bucket ## - my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 ); + my $value = $self->_engine->delete_key( $self->_base_offset, $key, $orig_key ); - if (defined $value && !ref($value) && $self->_root->{filter_fetch_value}) { - $value = $self->_root->{filter_fetch_value}->($value); + if (defined $value && !ref($value) && $self->_storage->{filter_fetch_value}) { + $value = $self->_storage->{filter_fetch_value}->($value); } - my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 ); - - ## - # If this object is an array and the key deleted was on the end of the stack, - # decrement the length variable. - ## - $self->unlock(); return $value; @@ -593,30 +568,15 @@ sub EXISTS { ## # Check if a single key or element exists given plain key or array index ## - my $self = $_[0]->_get_self; - my $key = $_[1]; - - my $md5 = $self->{engine}{digest}->($key); + my $self = shift->_get_self; + my ($key) = @_; ## # Request shared lock for reading ## $self->lock( LOCK_SH ); - my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); - if (!$tag) { - $self->unlock(); - - ## - # For some reason, the built-in exists() function returns '' for false - ## - return ''; - } - - ## - # Check if bucket exists and return 1 or '' - ## - my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || ''; + my $result = $self->_engine->key_exists( $self->_base_offset, $key ); $self->unlock(); @@ -627,26 +587,51 @@ sub CLEAR { ## # Clear all keys from hash, or all elements from array. ## - my $self = $_[0]->_get_self; + my $self = shift->_get_self; - unless ( _is_writable( $self->_fh ) ) { + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { $self->_throw_error( 'Cannot write to a readonly filehandle' ); } + { + my $lhs = $self->_find_parent; + + if ( $self->_type eq TYPE_HASH ) { + $lhs = '%{' . $lhs . '}'; + } + else { + $lhs = '@{' . $lhs . '}'; + } + + $self->_storage->audit( "$lhs = ();" ); + } + ## # Request exclusive lock for writing ## $self->lock( LOCK_EX ); - my $fh = $self->_fh; - - seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET); - if (eof $fh) { - $self->unlock(); - return; + if ( $self->_type eq TYPE_HASH ) { + my $key = $self->first_key; + while ( $key ) { + # Retrieve the key before deleting because we depend on next_key + my $next_key = $self->next_key( $key ); + $self->_engine->delete_key( $self->_base_offset, $key, $key ); + $key = $next_key; + } } - - $self->{engine}->create_tag($self, $self->_base_offset, $self->_type, chr(0) x $self->{engine}{index_size}); + else { + my $size = $self->FETCHSIZE; + for my $key ( 0 .. $size - 1 ) { + $self->_engine->delete_key( $self->_base_offset, $key, $key ); + } + $self->STORESIZE( 0 ); + } +#XXX This needs updating to use _release_space +# $self->_engine->write_tag( +# $self->_base_offset, $self->_type, +# chr(0)x$self->_engine->{index_size}, +# ); $self->unlock(); @@ -664,46 +649,7 @@ sub delete { (shift)->DELETE( @_ ) } sub exists { (shift)->EXISTS( @_ ) } sub clear { (shift)->CLEAR( @_ ) } -package DBM::Deep::_::Root; - -sub new { - my $class = shift; - my ($args) = @_; - - my $self = bless { - autobless => undef, - autoflush => undef, - end => 0, - fh => undef, - file => undef, - file_offset => 0, - locking => undef, - locked => 0, - filter_store_key => undef, - filter_store_value => undef, - filter_fetch_key => undef, - filter_fetch_value => undef, - %$args, - }, $class; - - if ( $self->{fh} && !$self->{file_offset} ) { - $self->{file_offset} = tell( $self->{fh} ); - } - - return $self; -} - -sub DESTROY { - my $self = shift; - return unless $self; - - close $self->{fh} if $self->{fh}; - - return; -} - 1; - __END__ =head1 NAME @@ -715,10 +661,10 @@ DBM::Deep - A pure perl multi-level hash/array DBM use DBM::Deep; my $db = DBM::Deep->new( "foo.db" ); - $db->{key} = 'value'; # tie() style + $db->{key} = 'value'; print $db->{key}; - $db->put('key' => 'value'); # OO style + $db->put('key' => 'value'); print $db->get('key'); # true multi-level support @@ -727,28 +673,29 @@ DBM::Deep - A pure perl multi-level hash/array DBM 42, 99, ]; -=head1 DESCRIPTION + tie my %db, 'DBM::Deep', 'foo.db'; + $db{key} = 'value'; + print $db{key}; -A unique flat-file database module, written in pure perl. True -multi-level hash/array support (unlike MLDBM, which is faked), hybrid -OO / tie() interface, cross-platform FTPable files, and quite fast. Can -handle millions of keys and unlimited hash levels without significant -slow-down. Written from the ground-up in pure perl -- this is NOT a -wrapper around a C-based DBM. Out-of-the-box compatibility with Unix, -Mac OS X and Windows. + tied(%db)->put('key' => 'value'); + print tied(%db)->get('key'); + +=head1 DESCRIPTION -=head1 INSTALLATION +A unique flat-file database module, written in pure perl. True multi-level +hash/array support (unlike MLDBM, which is faked), hybrid OO / tie() +interface, cross-platform FTPable files, ACID transactions, and is quite fast. +Can handle millions of keys and unlimited levels without significant +slow-down. Written from the ground-up in pure perl -- this is NOT a wrapper +around a C-based DBM. Out-of-the-box compatibility with Unix, Mac OS X and +Windows. -Hopefully you are using Perl's excellent CPAN module, which will download -and install the module for you. If not, get the tarball, and run these -commands: +=head1 VERSION DIFFERENCES - tar zxf DBM-Deep-* - cd DBM-Deep-* - perl Makefile.PL - make - make test - make install +B: 0.99_01 and above have significant file format differences from 0.983 and +before. There will be a backwards-compatibility layer in 1.00, but that is +slated for a later 0.99_x release. This version is B backwards compatible +with 0.983 and before. =head1 SETUP @@ -758,9 +705,9 @@ Perl's tie() function. Both are examined here. =head2 OO CONSTRUCTION The recommended way to construct a DBM::Deep object is to use the new() -method, which gets you a blessed, tied hash or array reference. +method, which gets you a blessed I tied hash (or array) reference. - my $db = DBM::Deep->new( "foo.db" ); + my $db = DBM::Deep->new( "foo.db" ); This opens a new database handle, mapped to the file "foo.db". If this file does not exist, it will automatically be created. DB files are @@ -768,28 +715,26 @@ opened in "r+" (read/write) mode, and the type of object returned is a hash, unless otherwise specified (see L below). You can pass a number of options to the constructor to specify things like -locking, autoflush, etc. This is done by passing an inline hash: +locking, autoflush, etc. This is done by passing an inline hash (or hashref): - my $db = DBM::Deep->new( - file => "foo.db", - locking => 1, - autoflush => 1 - ); + my $db = DBM::Deep->new( + file => "foo.db", + locking => 1, + autoflush => 1 + ); Notice that the filename is now specified I the hash with the "file" parameter, as opposed to being the sole argument to the constructor. This is required if any options are specified. See L below for the complete list. - - You can also start with an array instead of a hash. For this, you must specify the C parameter: - my $db = DBM::Deep->new( - file => "foo.db", - type => DBM::Deep->TYPE_ARRAY - ); + my $db = DBM::Deep->new( + file => "foo.db", + type => DBM::Deep->TYPE_ARRAY + ); B Specifing the C parameter only takes effect when beginning a new DB file. If you create a DBM::Deep object with an existing file, the @@ -800,24 +745,24 @@ the wrong type is passed in. Alternately, you can create a DBM::Deep handle by using Perl's built-in tie() function. The object returned from tie() can be used to call methods, -such as lock() and unlock(), but cannot be used to assign to the DBM::Deep -file (as expected with most tie'd objects). +such as lock() and unlock(). (That object can be retrieved from the tied +variable at any time using tied() - please see L for more info. - my %hash; - my $db = tie %hash, "DBM::Deep", "foo.db"; + my %hash; + my $db = tie %hash, "DBM::Deep", "foo.db"; - my @array; - my $db = tie @array, "DBM::Deep", "bar.db"; + my @array; + my $db = tie @array, "DBM::Deep", "bar.db"; As with the OO constructor, you can replace the DB filename parameter with a hash containing one or more options (see L just below for the complete list). - tie %hash, "DBM::Deep", { - file => "foo.db", - locking => 1, - autoflush => 1 - }; + tie %hash, "DBM::Deep", { + file => "foo.db", + locking => 1, + autoflush => 1 + }; =head2 OPTIONS @@ -844,6 +789,11 @@ needs. If you open it read-only and attempt to write, an exception will be throw open it write-only or append-only, an exception will be thrown immediately as DBM::Deep needs to read from the fh. +=item * audit_file / audit_fh + +These are just like file/fh, except for auditing. Please see L for +more information. + =item * file_offset This is the offset within the file that the DBM::Deep db starts. Most of the time, you will @@ -854,17 +804,27 @@ If you pass in fh and do not set this, it will be set appropriately. =item * type This parameter specifies what type of object to create, a hash or array. Use -one of these two constants: CTYPE_HASH> or CTYPE_ARRAY>. +one of these two constants: + +=over 4 + +=item * CTYPE_HASH> + +=item * CTYPE_ARRAY>. + +=back + This only takes effect when beginning a new file. This is an optional parameter, and defaults to CTYPE_HASH>. =item * locking -Specifies whether locking is to be enabled. DBM::Deep uses Perl's Fnctl flock() -function to lock the database in exclusive mode for writes, and shared mode for -reads. Pass any true value to enable. This affects the base DB handle I that use the same DB file. This is an optional -parameter, and defaults to 0 (disabled). See L below for more. +Specifies whether locking is to be enabled. DBM::Deep uses Perl's flock() +function to lock the database in exclusive mode for writes, and shared mode +for reads. Pass any true value to enable. This affects the base DB handle +I that use the same DB file. This is an +optional parameter, and defaults to 0 (disabled). See L below for +more. =item * autoflush @@ -876,16 +836,15 @@ Pass any true value to enable. This is an optional parameter, and defaults to 0 =item * autobless -If I mode is enabled, DBM::Deep will preserve blessed hashes, and -restore them when fetched. This is an B feature, and does have -side-effects. Basically, when hashes are re-blessed into their original -classes, they are no longer blessed into the DBM::Deep class! So you won't be -able to call any DBM::Deep methods on them. You have been warned. -This is an optional parameter, and defaults to 0 (disabled). +If I mode is enabled, DBM::Deep will preserve the class something +is blessed into, and restores it when fetched. This is an optional parameter, and defaults to 1 (enabled). + +B If you use the OO-interface, you will not be able to call any methods +of DBM::Deep on the blessed item. This is considered to be a feature. =item * filter_* -See L below. +See L below. =back @@ -905,35 +864,35 @@ to access your databases. You can treat any DBM::Deep object like a normal Perl hash reference. Add keys, or even nested hashes (or arrays) using standard Perl syntax: - my $db = DBM::Deep->new( "foo.db" ); + my $db = DBM::Deep->new( "foo.db" ); - $db->{mykey} = "myvalue"; - $db->{myhash} = {}; - $db->{myhash}->{subkey} = "subvalue"; + $db->{mykey} = "myvalue"; + $db->{myhash} = {}; + $db->{myhash}->{subkey} = "subvalue"; - print $db->{myhash}->{subkey} . "\n"; + print $db->{myhash}->{subkey} . "\n"; You can even step through hash keys using the normal Perl C function: - foreach my $key (keys %$db) { - print "$key: " . $db->{$key} . "\n"; - } + foreach my $key (keys %$db) { + print "$key: " . $db->{$key} . "\n"; + } Remember that Perl's C function extracts I key from the hash and pushes them onto an array, all before the loop even begins. If you have an -extra large hash, this may exhaust Perl's memory. Instead, consider using +extremely large hash, this may exhaust Perl's memory. Instead, consider using Perl's C function, which pulls keys/values one at a time, using very little memory: - while (my ($key, $value) = each %$db) { - print "$key: $value\n"; - } + while (my ($key, $value) = each %$db) { + print "$key: $value\n"; + } Please note that when using C, you should always pass a direct hash reference, not a lookup. Meaning, you should B do this: - # NEVER DO THIS - while (my ($key, $value) = each %{$db->{foo}}) { # BAD + # NEVER DO THIS + while (my ($key, $value) = each %{$db->{foo}}) { # BAD This causes an infinite loop, because for each iteration, Perl is calling FETCH() on the $db handle, resulting in a "new" hash for foo every time, so @@ -948,27 +907,28 @@ and the C, C, C, C and C functions. The object must have first been created using type CTYPE_ARRAY>, or simply be a nested array reference inside a hash. Example: - my $db = DBM::Deep->new( - file => "foo-array.db", - type => DBM::Deep->TYPE_ARRAY - ); + my $db = DBM::Deep->new( + file => "foo-array.db", + type => DBM::Deep->TYPE_ARRAY + ); - $db->[0] = "foo"; - push @$db, "bar", "baz"; - unshift @$db, "bah"; + $db->[0] = "foo"; + push @$db, "bar", "baz"; + unshift @$db, "bah"; - my $last_elem = pop @$db; # baz - my $first_elem = shift @$db; # bah - my $second_elem = $db->[1]; # bar + my $last_elem = pop @$db; # baz + my $first_elem = shift @$db; # bah + my $second_elem = $db->[1]; # bar - my $num_elements = scalar @$db; + my $num_elements = scalar @$db; =head1 OO INTERFACE In addition to the I interface, you can also use a standard OO interface to manipulate all aspects of DBM::Deep databases. Each type of object (hash or array) has its own methods, but both types share the following common methods: -C, C, C, C and C. +C, C, C, C and C. C and +C are aliases to C and C, respectively. =over @@ -982,8 +942,8 @@ Stores a new hash key/value pair, or sets an array element value. Takes two arguments, the hash key or array index, and the new value. The value can be a scalar, hash ref or array ref. Returns true on success, false on failure. - $db->put("foo", "bar"); # for hashes - $db->put(1, "bar"); # for arrays + $db->put("foo", "bar"); # for hashes + $db->put(1, "bar"); # for arrays =item * get() / fetch() @@ -991,16 +951,16 @@ Fetches the value of a hash key or array element. Takes one argument: the hash key or array index. Returns a scalar, hash ref or array ref, depending on the data type stored. - my $value = $db->get("foo"); # for hashes - my $value = $db->get(1); # for arrays + my $value = $db->get("foo"); # for hashes + my $value = $db->get(1); # for arrays =item * exists() Checks if a hash key or array index exists. Takes one argument: the hash key or array index. Returns true if it exists, false if not. - if ($db->exists("foo")) { print "yay!\n"; } # for hashes - if ($db->exists(1)) { print "yay!\n"; } # for arrays + if ($db->exists("foo")) { print "yay!\n"; } # for hashes + if ($db->exists(1)) { print "yay!\n"; } # for arrays =item * delete() @@ -1012,8 +972,8 @@ internal arrays work. Please note that the space occupied by the deleted key/value or element is B reused again -- see L below for details and workarounds. - $db->delete("foo"); # for hashes - $db->delete(1); # for arrays + $db->delete("foo"); # for hashes + $db->delete(1); # for arrays =item * clear() @@ -1022,7 +982,7 @@ value. Please note that the space occupied by the deleted keys/values or elements is B reused again -- see L below for details and workarounds. - $db->clear(); # hashes or arrays + $db->clear(); # hashes or arrays =item * lock() / unlock() @@ -1030,16 +990,13 @@ q.v. Locking. =item * optimize() -Recover lost disk space. +Recover lost disk space. This is important to do, especially if you use +transactions. =item * import() / export() Data going in and out. -=item * set_digest() / set_pack() / set_filter() - -q.v. adjusting the interal parameters. - =back =head2 HASHES @@ -1055,35 +1012,35 @@ Returns the "first" key in the hash. As with built-in Perl hashes, keys are fetched in an undefined order (which appears random). Takes no arguments, returns the key as a scalar value. - my $key = $db->first_key(); + my $key = $db->first_key(); =item * next_key() Returns the "next" key in the hash, given the previous one as the sole argument. Returns undef if there are no more keys to be fetched. - $key = $db->next_key($key); + $key = $db->next_key($key); =back Here are some examples of using hashes: - my $db = DBM::Deep->new( "foo.db" ); + my $db = DBM::Deep->new( "foo.db" ); - $db->put("foo", "bar"); - print "foo: " . $db->get("foo") . "\n"; + $db->put("foo", "bar"); + print "foo: " . $db->get("foo") . "\n"; - $db->put("baz", {}); # new child hash ref - $db->get("baz")->put("buz", "biz"); - print "buz: " . $db->get("baz")->get("buz") . "\n"; + $db->put("baz", {}); # new child hash ref + $db->get("baz")->put("buz", "biz"); + print "buz: " . $db->get("baz")->get("buz") . "\n"; - my $key = $db->first_key(); - while ($key) { - print "$key: " . $db->get($key) . "\n"; - $key = $db->next_key($key); - } + my $key = $db->first_key(); + while ($key) { + print "$key: " . $db->get($key) . "\n"; + $key = $db->next_key($key); + } - if ($db->exists("foo")) { $db->delete("foo"); } + if ($db->exists("foo")) { $db->delete("foo"); } =head2 ARRAYS @@ -1097,21 +1054,21 @@ C and C. Returns the number of elements in the array. Takes no arguments. - my $len = $db->length(); + my $len = $db->length(); =item * push() Adds one or more elements onto the end of the array. Accepts scalars, hash refs or array refs. No return value. - $db->push("foo", "bar", {}); + $db->push("foo", "bar", {}); =item * pop() Fetches the last element in the array, and deletes it. Takes no arguments. Returns undef if array is empty. Returns the element value. - my $elem = $db->pop(); + my $elem = $db->pop(); =item * shift() @@ -1120,7 +1077,7 @@ remaining elements over to take up the space. Returns the element value. This method is not recommended with large arrays -- see L below for details. - my $elem = $db->shift(); + my $elem = $db->shift(); =item * unshift() @@ -1129,7 +1086,7 @@ existing elements over to make room. Accepts scalars, hash refs or array refs. No return value. This method is not recommended with large arrays -- see below for details. - $db->unshift("foo", "bar", {}); + $db->unshift("foo", "bar", {}); =item * splice() @@ -1141,37 +1098,37 @@ not recommended with large arrays -- see L below for details. Here are some examples of using arrays: - my $db = DBM::Deep->new( - file => "foo.db", - type => DBM::Deep->TYPE_ARRAY - ); + my $db = DBM::Deep->new( + file => "foo.db", + type => DBM::Deep->TYPE_ARRAY + ); - $db->push("bar", "baz"); - $db->unshift("foo"); - $db->put(3, "buz"); + $db->push("bar", "baz"); + $db->unshift("foo"); + $db->put(3, "buz"); - my $len = $db->length(); - print "length: $len\n"; # 4 + my $len = $db->length(); + print "length: $len\n"; # 4 - for (my $k=0; $k<$len; $k++) { - print "$k: " . $db->get($k) . "\n"; - } + for (my $k=0; $k<$len; $k++) { + print "$k: " . $db->get($k) . "\n"; + } - $db->splice(1, 2, "biz", "baf"); + $db->splice(1, 2, "biz", "baf"); - while (my $elem = shift @$db) { - print "shifted: $elem\n"; - } + while (my $elem = shift @$db) { + print "shifted: $elem\n"; + } =head1 LOCKING Enable automatic file locking by passing a true value to the C parameter when constructing your DBM::Deep object (see L above). - my $db = DBM::Deep->new( - file => "foo.db", - locking => 1 - ); + my $db = DBM::Deep->new( + file => "foo.db", + locking => 1 + ); This causes DBM::Deep to C the underlying filehandle with exclusive mode for writes, and shared mode for reads. This is required if you have @@ -1187,26 +1144,27 @@ optional lock mode argument (defaults to exclusive mode). This is particularly useful for things like counters, where the current value needs to be fetched, then incremented, then stored again. - $db->lock(); - my $counter = $db->get("counter"); - $counter++; - $db->put("counter", $counter); - $db->unlock(); + $db->lock(); + my $counter = $db->get("counter"); + $counter++; + $db->put("counter", $counter); + $db->unlock(); - # or... + # or... - $db->lock(); - $db->{counter}++; - $db->unlock(); + $db->lock(); + $db->{counter}++; + $db->unlock(); You can pass C an optional argument, which specifies which mode to use -(exclusive or shared). Use one of these two constants: CLOCK_EX> -or CLOCK_SH>. These are passed directly to C, and are the -same as the constants defined in Perl's C module. +(exclusive or shared). Use one of these two constants: +CLOCK_EX> or CLOCK_SH>. These are passed +directly to C, and are the same as the constants defined in Perl's +L module. - $db->lock( DBM::Deep->LOCK_SH ); - # something here - $db->unlock(); + $db->lock( $db->LOCK_SH ); + # something here + $db->unlock(); =head1 IMPORTING/EXPORTING @@ -1221,20 +1179,20 @@ walking the structure and adding keys/elements to the database as you go, simply pass a reference to the C method. This recursively adds everything to an existing DBM::Deep object for you. Here is an example: - my $struct = { - key1 => "value1", - key2 => "value2", - array1 => [ "elem0", "elem1", "elem2" ], - hash1 => { - subkey1 => "subvalue1", - subkey2 => "subvalue2" - } - }; + my $struct = { + key1 => "value1", + key2 => "value2", + array1 => [ "elem0", "elem1", "elem2" ], + hash1 => { + subkey1 => "subvalue1", + subkey2 => "subvalue2" + } + }; - my $db = DBM::Deep->new( "foo.db" ); - $db->import( $struct ); + my $db = DBM::Deep->new( "foo.db" ); + $db->import( $struct ); - print $db->{key1} . "\n"; # prints "value1" + print $db->{key1} . "\n"; # prints "value1" This recursively imports the entire C<$struct> object into C<$db>, including all nested hashes and arrays. If the DBM::Deep object contains exsiting data, @@ -1243,7 +1201,8 @@ The C method can be called on any database level (not just the base level), and works with both hash and array DB types. B Make sure your existing structure has no circular references in it. -These will cause an infinite loop when importing. +These will cause an infinite loop when importing. There are plans to fix this +in a later release. =head2 EXPORTING @@ -1252,17 +1211,17 @@ a reference to a new in-memory copy of the database. The export is done recursively, so all nested hashes/arrays are all exported to standard Perl objects. Here is an example: - my $db = DBM::Deep->new( "foo.db" ); + my $db = DBM::Deep->new( "foo.db" ); - $db->{key1} = "value1"; - $db->{key2} = "value2"; - $db->{hash1} = {}; - $db->{hash1}->{subkey1} = "subvalue1"; - $db->{hash1}->{subkey2} = "subvalue2"; + $db->{key1} = "value1"; + $db->{key2} = "value2"; + $db->{hash1} = {}; + $db->{hash1}->{subkey1} = "subvalue1"; + $db->{hash1}->{subkey2} = "subvalue2"; - my $struct = $db->export(); + my $struct = $db->export(); - print $struct->{key1} . "\n"; # prints "value1" + print $struct->{key1} . "\n"; # prints "value1" This makes a complete copy of the database in memory, and returns a reference to it. The C method can be called on any database level (not just @@ -1271,7 +1230,8 @@ large databases -- you can store a lot more data in a DBM::Deep object than an in-memory Perl structure. B Make sure your database has no circular references in it. -These will cause an infinite loop when exporting. +These will cause an infinite loop when exporting. There are plans to fix this +in a later release. =head1 FILTERS @@ -1310,16 +1270,16 @@ It is passed the transformed value, and expected to return the plain value. Here are the two ways to setup a filter hook: - my $db = DBM::Deep->new( - file => "foo.db", - filter_store_value => \&my_filter_store, - filter_fetch_value => \&my_filter_fetch - ); + my $db = DBM::Deep->new( + file => "foo.db", + filter_store_value => \&my_filter_store, + filter_fetch_value => \&my_filter_fetch + ); - # or... + # or... - $db->set_filter( "filter_store_value", \&my_filter_store ); - $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); + $db->set_filter( "filter_store_value", \&my_filter_store ); + $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); Your filter function will be called only when dealing with SCALAR keys or values. When nested hashes and arrays are being stored/fetched, filtering @@ -1327,7 +1287,7 @@ is bypassed. Filters are called as static functions, passed a single SCALAR argument, and expected to return a single SCALAR value. If you want to remove a filter, set the function reference to C: - $db->set_filter( "filter_store_value", undef ); + $db->set_filter( "filter_store_value", undef ); =head2 REAL-TIME ENCRYPTION EXAMPLE @@ -1336,41 +1296,41 @@ do real-time encryption / decryption of keys & values with DBM::Deep Filters. Please visit L for more on I. You'll also need the I module. - use DBM::Deep; - use Crypt::Blowfish; - use Crypt::CBC; - - my $cipher = Crypt::CBC->new({ - 'key' => 'my secret key', - 'cipher' => 'Blowfish', - 'iv' => '$KJh#(}q', - 'regenerate_key' => 0, - 'padding' => 'space', - 'prepend_iv' => 0 - }); - - my $db = DBM::Deep->new( - file => "foo-encrypt.db", - filter_store_key => \&my_encrypt, - filter_store_value => \&my_encrypt, - filter_fetch_key => \&my_decrypt, - filter_fetch_value => \&my_decrypt, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_encrypt { - return $cipher->encrypt( $_[0] ); - } - sub my_decrypt { - return $cipher->decrypt( $_[0] ); - } + use DBM::Deep; + use Crypt::Blowfish; + use Crypt::CBC; + + my $cipher = Crypt::CBC->new({ + 'key' => 'my secret key', + 'cipher' => 'Blowfish', + 'iv' => '$KJh#(}q', + 'regenerate_key' => 0, + 'padding' => 'space', + 'prepend_iv' => 0 + }); + + my $db = DBM::Deep->new( + file => "foo-encrypt.db", + filter_store_key => \&my_encrypt, + filter_store_value => \&my_encrypt, + filter_fetch_key => \&my_decrypt, + filter_fetch_value => \&my_decrypt, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_encrypt { + return $cipher->encrypt( $_[0] ); + } + sub my_decrypt { + return $cipher->decrypt( $_[0] ); + } =head2 REAL-TIME COMPRESSION EXAMPLE @@ -1379,31 +1339,31 @@ compression / decompression of keys & values with DBM::Deep Filters. Please visit L for more on I. - use DBM::Deep; - use Compress::Zlib; - - my $db = DBM::Deep->new( - file => "foo-compress.db", - filter_store_key => \&my_compress, - filter_store_value => \&my_compress, - filter_fetch_key => \&my_decompress, - filter_fetch_value => \&my_decompress, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_compress { - return Compress::Zlib::memGzip( $_[0] ) ; - } - sub my_decompress { - return Compress::Zlib::memGunzip( $_[0] ) ; - } + use DBM::Deep; + use Compress::Zlib; + + my $db = DBM::Deep->new( + file => "foo-compress.db", + filter_store_key => \&my_compress, + filter_store_value => \&my_compress, + filter_fetch_key => \&my_decompress, + filter_fetch_value => \&my_decompress, + ); + + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; + + undef $db; + exit; + + sub my_compress { + return Compress::Zlib::memGzip( $_[0] ) ; + } + sub my_decompress { + return Compress::Zlib::memGunzip( $_[0] ) ; + } B Filtering of keys only applies to hashes. Array "keys" are actually numerical index numbers, and are not filtered. @@ -1413,48 +1373,54 @@ actually numerical index numbers, and are not filtered. Most DBM::Deep methods return a true value for success, and call die() on failure. You can wrap calls in an eval block to catch the die. - my $db = DBM::Deep->new( "foo.db" ); # create hash - eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call + my $db = DBM::Deep->new( "foo.db" ); # create hash + eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call - print $@; # prints error message + print $@; # prints error message =head1 LARGEFILE SUPPORT If you have a 64-bit system, and your Perl is compiled with both LARGEFILE and 64-bit support, you I be able to create databases larger than 2 GB. DBM::Deep by default uses 32-bit file offset tags, but these can be changed -by calling the static C method before you do anything else. +by specifying the 'pack_size' parameter when constructing the file. - DBM::Deep::set_pack(8, 'Q'); + DBM::Deep->new( + filename => $filename, + pack_size => 'large', + ); This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words instead of 32-bit longs. After setting these values your DB files have a theoretical maximum size of 16 XB (exabytes). +You can also use C 'small'> in order to use 16-bit file +offsets. + B Changing these values will B work for existing database files. -Only change this for new files, and make sure it stays set consistently -throughout the file's life. If you do set these values, you can no longer -access 32-bit DB files. You can, however, call C to change -back to 32-bit mode. +Only change this for new files. Once the value has been set, it is stored in +the file's header and cannot be changed for the life of the file. These +parameters are per-file, meaning you can access 32-bit and 64-bit files, as +you chose. -B I have not personally tested files > 2 GB -- all my systems have -only a 32-bit Perl. However, I have received user reports that this does -indeed work! +B We have not personally tested files larger than 2 GB -- all my +systems have only a 32-bit Perl. However, I have received user reports that +this does indeed work! =head1 LOW-LEVEL ACCESS If you require low-level access to the underlying filehandle that DBM::Deep uses, you can call the C<_fh()> method, which returns the handle: - my $fh = $db->_fh(); + my $fh = $db->_fh(); This method can be called on the root level of the datbase, or any child hashes or arrays. All levels share a I structure, which contains things like the filehandle, a reference counter, and all the options specified -when you created the object. You can get access to this root structure by -calling the C method. +when you created the object. You can get access to this file object by +calling the C<_storage()> method. - my $root = $db->_root(); + my $file_obj = $db->_storage(); This is useful for changing options after the object has already been created, such as enabling/disabling locking. You can also store your own temporary user @@ -1466,41 +1432,44 @@ any child hash or array. DBM::Deep by default uses the I (MD5) algorithm for hashing keys. However you can override this, and use another algorithm (such as SHA-256) or even write your own. But please note that DBM::Deep currently expects zero -collisions, so your algorithm has to be I, so to speak. -Collision detection may be introduced in a later version. - +collisions, so your algorithm has to be I, so to speak. Collision +detection may be introduced in a later version. - -You can specify a custom digest algorithm by calling the static C -function, passing a reference to a subroutine, and the length of the algorithm's -hashes (in bytes). This is a global static function, which affects ALL DBM::Deep -objects. Here is a working example that uses a 256-bit hash from the +You can specify a custom digest algorithm by passing it into the parameter +list for new(), passing a reference to a subroutine as the 'digest' parameter, +and the length of the algorithm's hashes (in bytes) as the 'hash_size' +parameter. Here is a working example that uses a 256-bit hash from the I module. Please see -L for more. +L for more information. - use DBM::Deep; - use Digest::SHA256; + use DBM::Deep; + use Digest::SHA256; - my $context = Digest::SHA256::new(256); + my $context = Digest::SHA256::new(256); - DBM::Deep::set_digest( \&my_digest, 32 ); + my $db = DBM::Deep->new( + filename => "foo-sha.db", + digest => \&my_digest, + hash_size => 32, + ); - my $db = DBM::Deep->new( "foo-sha.db" ); + $db->{key1} = "value1"; + $db->{key2} = "value2"; + print "key1: " . $db->{key1} . "\n"; + print "key2: " . $db->{key2} . "\n"; - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; + undef $db; + exit; - undef $db; - exit; - - sub my_digest { - return substr( $context->hash($_[0]), 0, 32 ); - } + sub my_digest { + return substr( $context->hash($_[0]), 0, 32 ); + } B Your returned digest strings must be B the number -of bytes you specify in the C function (in this case 32). +of bytes you specify in the hash_size parameter (in this case 32). + +B If you do choose to use a custom digest algorithm, you must set it +every time you access this file. Otherwise, the default (MD5) will be used. =head1 CIRCULAR REFERENCES @@ -1509,23 +1478,53 @@ can have a nested hash key or array element that points to a parent object. This relationship is stored in the DB file, and is preserved between sessions. Here is an example: - my $db = DBM::Deep->new( "foo.db" ); + my $db = DBM::Deep->new( "foo.db" ); - $db->{foo} = "bar"; - $db->{circle} = $db; # ref to self + $db->{foo} = "bar"; + $db->{circle} = $db; # ref to self - print $db->{foo} . "\n"; # prints "foo" - print $db->{circle}->{foo} . "\n"; # prints "foo" again + print $db->{foo} . "\n"; # prints "bar" + print $db->{circle}->{foo} . "\n"; # prints "bar" again -One catch is, passing the object to a function that recursively walks the +B: Passing the object to a function that recursively walks the object tree (such as I or even the built-in C or -C methods) will result in an infinite loop. The other catch is, -if you fetch the I of a circular reference (i.e. using the C -or C methods), you will get the I, not the -ref's key. This gets even more interesting with the above example, where -the I key points to the base DB object, which technically doesn't -have a key. So I made DBM::Deep return "[base]" as the key name in that -special case. +C methods) will result in an infinite loop. This will be fixed in +a future release. + +=head1 AUDITING + +New in 0.99_01 is the ability to audit your databases actions. By passing in +audit_file (or audit_fh) to the constructor, all actions will be logged to +that file. The format is one that is suitable for eval'ing against the +database to replay the actions. Please see t/33_audit_trail.t for an example +of how to do this. + +=head1 TRANSACTIONS + +New in 0.99_01 is ACID transactions. Every DBM::Deep object is completely +transaction-ready - it is not an option you have to turn on. Three new methods +have been added to support them. They are: + +=over 4 + +=item * begin_work() + +This starts a transaction. + +=item * commit() + +This applies the changes done within the transaction to the mainline and ends +the transaction. + +=item * rollback() + +This discards the changes done within the transaction to the mainline and ends +the transaction. + +=back + +Transactions in DBM::Deep are done using the MVCC method, the same method used +by the InnoDB MySQL table type. =head1 CAVEATS / ISSUES / BUGS @@ -1540,7 +1539,7 @@ and adding new keys, your file will continuously grow. I am working on this, but in the meantime you can call the built-in C method from time to time (perhaps in a crontab or something) to recover all your unused space. - $db->optimize(); # returns true on success + $db->optimize(); # returns true on success This rebuilds the ENTIRE database into a new file, then moves it on top of the original. The new file will have no unused space, thus it will take up as @@ -1555,25 +1554,55 @@ B Only call optimize() on the top-level node of the database, and make sure there are no child references lying around. DBM::Deep keeps a reference counter, and if it is greater than 1, optimize() will abort and return undef. -=head2 AUTOVIVIFICATION +=head2 REFERENCES + +(The reasons given assume a high level of Perl understanding, specifically of +references. You can safely skip this section.) + +Currently, the only references supported are HASH and ARRAY. The other reference +types (SCALAR, CODE, GLOB, and REF) cannot be supported for various reasons. + +=over 4 + +=item * GLOB + +These are things like filehandles and other sockets. They can't be supported +because it's completely unclear how DBM::Deep should serialize them. + +=item * SCALAR / REF + +The discussion here refers to the following type of example: -Unfortunately, autovivification doesn't work with tied hashes. This appears to -be a bug in Perl's tie() system, as I encountered the very same -issue with his I module (see L), -and it is also mentioned in the BUGS section for the I module ). Basically, on a new db file, -this does not work: + my $x = 25; + $db->{key1} = \$x; - $db->{foo}->{bar} = "hello"; + $x = 50; -Since "foo" doesn't exist, you cannot add "bar" to it. You end up with "foo" -being an empty hash. Try this instead, which works fine: + # In some other process ... - $db->{foo} = { bar => "hello" }; + my $val = ${ $db->{key1} }; -As of Perl 5.8.7, this bug still exists. I have walked very carefully through -the execution path, and Perl indeed passes an empty hash to the STORE() method. -Probably a bug in Perl. + is( $val, 50, "What actually gets stored in the DB file?" ); + +The problem is one of synchronization. When the variable being referred to +changes value, the reference isn't notified. This means that the new value won't +be stored in the datafile for other processes to read. There is no TIEREF. + +It is theoretically possible to store references to values already within a +DBM::Deep object because everything already is synchronized, but the change to +the internals would be quite large. Specifically, DBM::Deep would have to tie +every single value that is stored. This would bloat the RAM footprint of +DBM::Deep at least twofold (if not more) and be a significant performance drain, +all to support a feature that has never been requested. + +=item * CODE + +L provides a mechanism for serializing coderefs, +including saving off all closure state. However, just as for SCALAR and REF, +that closure state may change without notifying the DBM::Deep object storing +the reference. + +=back =head2 FILE CORRUPTION @@ -1587,13 +1616,13 @@ be addressed in a later version of DBM::Deep. =head2 DB OVER NFS -Beware of using DB files over NFS. DBM::Deep uses flock(), which works well on local -filesystems, but will NOT protect you from file corruption over NFS. I've heard -about setting up your NFS server with a locking daemon, then using lockf() to -lock your files, but your mileage may vary there as well. From what I -understand, there is no real way to do it. However, if you need access to the -underlying filehandle in DBM::Deep for using some other kind of locking scheme like -lockf(), see the L section above. +Beware of using DBM::Deep files over NFS. DBM::Deep uses flock(), which works +well on local filesystems, but will NOT protect you from file corruption over +NFS. I've heard about setting up your NFS server with a locking daemon, then +using lockf() to lock your files, but your mileage may vary there as well. +From what I understand, there is no real way to do it. However, if you need +access to the underlying filehandle in DBM::Deep for using some other kind of +locking scheme like lockf(), see the L section above. =head2 COPYING OBJECTS @@ -1601,10 +1630,10 @@ Beware of copying tied objects in Perl. Very strange things can happen. Instead, use DBM::Deep's C method which safely copies the object and returns a new, blessed, tied hash or array to the same level in the DB. - my $copy = $db->clone(); + my $copy = $db->clone(); B: Since clone() here is cloning the object, not the database location, any -modifications to either $db or $copy will be visible in both. +modifications to either $db or $copy will be visible to both. =head2 LARGE ARRAYS @@ -1620,226 +1649,36 @@ writeonly mode. STORE will verify that the filehandle is writable. However, ther doesn't seem to be a good way to determine if a filehandle is readable. And, if the filehandle isn't readable, it's not clear what will happen. So, don't do that. -=head1 PERFORMANCE - -This section discusses DBM::Deep's speed and memory usage. - -=head2 SPEED - -Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as -the almighty I. But it makes up for it in features like true -multi-level hash/array support, and cross-platform FTPable files. Even so, -DBM::Deep is still pretty fast, and the speed stays fairly consistent, even -with huge databases. Here is some test data: - - Adding 1,000,000 keys to new DB file... - - At 100 keys, avg. speed is 2,703 keys/sec - At 200 keys, avg. speed is 2,642 keys/sec - At 300 keys, avg. speed is 2,598 keys/sec - At 400 keys, avg. speed is 2,578 keys/sec - At 500 keys, avg. speed is 2,722 keys/sec - At 600 keys, avg. speed is 2,628 keys/sec - At 700 keys, avg. speed is 2,700 keys/sec - At 800 keys, avg. speed is 2,607 keys/sec - At 900 keys, avg. speed is 2,190 keys/sec - At 1,000 keys, avg. speed is 2,570 keys/sec - At 2,000 keys, avg. speed is 2,417 keys/sec - At 3,000 keys, avg. speed is 1,982 keys/sec - At 4,000 keys, avg. speed is 1,568 keys/sec - At 5,000 keys, avg. speed is 1,533 keys/sec - At 6,000 keys, avg. speed is 1,787 keys/sec - At 7,000 keys, avg. speed is 1,977 keys/sec - At 8,000 keys, avg. speed is 2,028 keys/sec - At 9,000 keys, avg. speed is 2,077 keys/sec - At 10,000 keys, avg. speed is 2,031 keys/sec - At 20,000 keys, avg. speed is 1,970 keys/sec - At 30,000 keys, avg. speed is 2,050 keys/sec - At 40,000 keys, avg. speed is 2,073 keys/sec - At 50,000 keys, avg. speed is 1,973 keys/sec - At 60,000 keys, avg. speed is 1,914 keys/sec - At 70,000 keys, avg. speed is 2,091 keys/sec - At 80,000 keys, avg. speed is 2,103 keys/sec - At 90,000 keys, avg. speed is 1,886 keys/sec - At 100,000 keys, avg. speed is 1,970 keys/sec - At 200,000 keys, avg. speed is 2,053 keys/sec - At 300,000 keys, avg. speed is 1,697 keys/sec - At 400,000 keys, avg. speed is 1,838 keys/sec - At 500,000 keys, avg. speed is 1,941 keys/sec - At 600,000 keys, avg. speed is 1,930 keys/sec - At 700,000 keys, avg. speed is 1,735 keys/sec - At 800,000 keys, avg. speed is 1,795 keys/sec - At 900,000 keys, avg. speed is 1,221 keys/sec - At 1,000,000 keys, avg. speed is 1,077 keys/sec - -This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl -5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM. The hash keys and -values were between 6 - 12 chars in length. The DB file ended up at 210MB. -Run time was 12 min 3 sec. - -=head2 MEMORY USAGE - -One of the great things about DBM::Deep is that it uses very little memory. -Even with huge databases (1,000,000+ keys) you will not see much increased -memory on your process. DBM::Deep relies solely on the filesystem for storing -and fetching data. Here is output from I before even opening a -database handle: - - PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND - 22831 root 11 0 2716 2716 1296 R 0.0 0.2 0:07 perl - -Basically the process is taking 2,716K of memory. And here is the same -process after storing and fetching 1,000,000 keys: - - PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND - 22831 root 14 0 2772 2772 1328 R 0.0 0.2 13:32 perl - -Notice the memory usage increased by only 56K. Test was performed on a 700mHz -x86 box running Linux RedHat 7.2 & Perl 5.6.1. - -=head1 DB FILE FORMAT - -In case you were interested in the underlying DB file format, it is documented -here in this section. You don't need to know this to use the module, it's just -included for reference. - -=head2 SIGNATURE - -DBM::Deep files always start with a 32-bit signature to identify the file type. -This is at offset 0. The signature is "DPDB" in network byte order. This is -checked for when the file is opened and an error will be thrown if it's not found. - -=head2 TAG - -The DBM::Deep file is in a I, meaning each section of the file -has a standard header containing the type of data, the length of data, and then -the data itself. The type is a single character (1 byte), the length is a -32-bit unsigned long in network byte order, and the data is, well, the data. -Here is how it unfolds: - -=head2 MASTER INDEX - -Immediately after the 32-bit file signature is the I record. -This is a standard tag header followed by 1024 bytes (in 32-bit mode) or 2048 -bytes (in 64-bit mode) of data. The type is I for hash or I for array, -depending on how the DBM::Deep object was constructed. - -The index works by looking at a I of the hash key (or array index -number). The first 8-bit char of the MD5 signature is the offset into the -index, multipled by 4 in 32-bit mode, or 8 in 64-bit mode. The value of the -index element is a file offset of the next tag for the key/element in question, -which is usually a I tag (see below). - -The next tag I be another index, depending on how many keys/elements -exist. See L below for details. - -=head2 BUCKET LIST - -A I is a collection of 16 MD5 hashes for keys/elements, plus -file offsets to where the actual data is stored. It starts with a standard -tag header, with type I, and a data size of 320 bytes in 32-bit mode, or -384 bytes in 64-bit mode. Each MD5 hash is stored in full (16 bytes), plus -the 32-bit or 64-bit file offset for the I containing the actual data. -When the list fills up, a I operation is performed (See -L below). - -=head2 BUCKET - -A I is a tag containing a key/value pair (in hash mode), or a -index/value pair (in array mode). It starts with a standard tag header with -type I for scalar data (string, binary, etc.), or it could be a nested -hash (type I) or array (type I). The value comes just after the tag -header. The size reported in the tag header is only for the value, but then, -just after the value is another size (32-bit unsigned long) and then the plain -key itself. Since the value is likely to be fetched more often than the plain -key, I figured it would be I faster to store the value first. - -If the type is I (hash) or I (array), the value is another I -record for the nested structure, where the process begins all over again. - -=head2 RE-INDEXING - -After a I grows to 16 records, its allocated space in the file is -exhausted. Then, when another key/element comes in, the list is converted to a -new index record. However, this index will look at the next char in the MD5 -hash, and arrange new Bucket List pointers accordingly. This process is called -I. Basically, a new index tag is created at the file EOF, and all -17 (16 + new one) keys/elements are removed from the old Bucket List and -inserted into the new index. Several new Bucket Lists are created in the -process, as a new MD5 char from the key is being examined (it is unlikely that -the keys will all share the same next char of their MD5s). - -Because of the way the I algorithm works, it is impossible to tell exactly -when the Bucket Lists will turn into indexes, but the first round tends to -happen right around 4,000 keys. You will see a I decrease in -performance here, but it picks back up pretty quick (see L above). Then -it takes B more keys to exhaust the next level of Bucket Lists. It's -right around 900,000 keys. This process can continue nearly indefinitely -- -right up until the point the I signatures start colliding with each other, -and this is B rare -- like winning the lottery 5 times in a row AND -getting struck by lightning while you are walking to cash in your tickets. -Theoretically, since I hashes are 128-bit values, you I have up to -340,282,366,921,000,000,000,000,000,000,000,000,000 keys/elements (I believe -this is 340 unodecillion, but don't quote me). - -=head2 STORING - -When a new key/element is stored, the key (or index number) is first run through -I to get a 128-bit signature (example, in hex: -b05783b0773d894396d475ced9d2f4f6). Then, the I record is checked -for the first char of the signature (in this case I). If it does not exist, -a new I is created for our key (and the next 15 future keys that -happen to also have I as their first MD5 char). The entire MD5 is written -to the I along with the offset of the new I record (EOF at -this point, unless we are replacing an existing I), where the actual -data will be stored. - -=head2 FETCHING - -Fetching an existing key/element involves getting a I of the key -(or index number), then walking along the indexes. If there are enough -keys/elements in this DB level, there might be nested indexes, each linked to -a particular char of the MD5. Finally, a I is pointed to, which -contains up to 16 full MD5 hashes. Each is checked for equality to the key in -question. If we found a match, the I tag is loaded, where the value and -plain key are stored. - -Fetching the plain key occurs when calling the I and I -methods. In this process the indexes are walked systematically, and each key -fetched in increasing MD5 order (which is why it appears random). Once the -I is found, the value is skipped and the plain key returned instead. -B Do not count on keys being fetched as if the MD5 hashes were -alphabetically sorted. This only happens on an index-level -- as soon as the -I are hit, the keys will come out in the order they went in -- -so it's pretty much undefined how the keys will come out -- just like Perl's -built-in hashes. - =head1 CODE COVERAGE -We use B to test the code coverage of our tests, below is the -B report on this module's test suite. +B is used to test the code coverage of the tests. Below is the +B report on this distribution's test suite. ---------------------------- ------ ------ ------ ------ ------ ------ ------ File stmt bran cond sub pod time total ---------------------------- ------ ------ ------ ------ ------ ------ ------ - blib/lib/DBM/Deep.pm 95.1 81.6 70.3 100.0 100.0 33.4 91.0 - blib/lib/DBM/Deep/Array.pm 100.0 91.1 100.0 100.0 n/a 27.8 98.0 - blib/lib/DBM/Deep/Engine.pm 97.8 85.6 75.0 100.0 0.0 25.8 90.8 - blib/lib/DBM/Deep/Hash.pm 100.0 87.5 100.0 100.0 n/a 13.0 97.2 - Total 97.5 85.4 76.6 100.0 46.9 100.0 92.5 + blib/lib/DBM/Deep.pm 96.2 89.0 75.0 95.8 89.5 36.0 92.9 + blib/lib/DBM/Deep/Array.pm 96.1 88.3 100.0 96.4 100.0 15.9 94.7 + blib/lib/DBM/Deep/Engine.pm 96.6 86.6 89.5 100.0 0.0 20.0 91.0 + blib/lib/DBM/Deep/File.pm 99.4 88.3 55.6 100.0 0.0 19.6 89.5 + blib/lib/DBM/Deep/Hash.pm 98.5 83.3 100.0 100.0 100.0 8.5 96.3 + Total 96.9 87.4 81.2 98.0 38.5 100.0 92.1 ---------------------------- ------ ------ ------ ------ ------ ------ ------ =head1 MORE INFORMATION Check out the DBM::Deep Google Group at L -or send email to L. +or send email to L. You can also visit #dbm-deep on +irc.perl.org -=head1 AUTHORS +The source code repository is at L -Joseph Huckaby, L +=head1 MAINTAINERS Rob Kinyon, L +Originally written by Joseph Huckaby, L + Special thanks to Adam Sah and Rich Gaushell! You know why :-) =head1 SEE ALSO