X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=lib%2FDBM%2FDeep.pm;h=d34e67513e651ae661bead39dec381e0c8354791;hb=4301e8795213becc8854d413062d49746cdf5a31;hp=8862aea690e92ccaba8eea58e5470280c2a9c0c1;hpb=ec1bce6bdd7722112c8726ef4ec05b40fa1e9893;p=dbsrgits%2FDBM-Deep.git diff --git a/lib/DBM/Deep.pm b/lib/DBM/Deep.pm index 8862aea..d34e675 100644 --- a/lib/DBM/Deep.pm +++ b/lib/DBM/Deep.pm @@ -1,66 +1,32 @@ package DBM::Deep; -## -# DBM::Deep -# -# Description: -# Multi-level database module for storing hash trees, arrays and simple -# key/value pairs into FTP-able, cross-platform binary database files. -# -# Type `perldoc DBM::Deep` for complete documentation. -# -# Usage Examples: -# my %db; -# tie %db, 'DBM::Deep', 'my_database.db'; # standard tie() method -# -# my $db = new DBM::Deep( 'my_database.db' ); # preferred OO method -# -# $db->{my_scalar} = 'hello world'; -# $db->{my_hash} = { larry => 'genius', hashes => 'fast' }; -# $db->{my_array} = [ 1, 2, 3, time() ]; -# $db->{my_complex} = [ 'hello', { perl => 'rules' }, 42, 99 ]; -# push @{$db->{my_array}}, 'another value'; -# my @key_list = keys %{$db->{my_hash}}; -# print "This module " . $db->{my_complex}->[1]->{perl} . "!\n"; -# -# Copyright: -# (c) 2002-2006 Joseph Huckaby. All Rights Reserved. -# This program is free software; you can redistribute it and/or -# modify it under the same terms as Perl itself. -## +use 5.006_000; use strict; +use warnings; + +our $VERSION = q(1.0006); + +use Fcntl qw( :flock ); -use Fcntl qw( :DEFAULT :flock :seek ); use Digest::MD5 (); +use FileHandle::Fmode (); use Scalar::Util (); use DBM::Deep::Engine; +use DBM::Deep::File; -use vars qw( $VERSION ); -$VERSION = q(0.99_01); - - -## -# Setup file and tag signatures. These should never change. -## -sub SIG_FILE () { 'DPDB' } -sub SIG_HASH () { 'H' } -sub SIG_ARRAY () { 'A' } -sub SIG_SCALAR () { 'S' } -sub SIG_NULL () { 'N' } -sub SIG_DATA () { 'D' } -sub SIG_INDEX () { 'I' } -sub SIG_BLIST () { 'B' } -sub SIG_SIZE () { 1 } +use overload + '""' => sub { overload::StrVal( $_[0] ) }, + fallback => 1; ## # Setup constants for users to pass to new() ## -sub TYPE_HASH () { SIG_HASH } -sub TYPE_ARRAY () { SIG_ARRAY } -sub TYPE_SCALAR () { SIG_SCALAR } +sub TYPE_HASH () { DBM::Deep::Engine->SIG_HASH } +sub TYPE_ARRAY () { DBM::Deep::Engine->SIG_ARRAY } +# This is used in all the children of this class in their TIE methods. sub _get_args { my $proto = shift; @@ -71,13 +37,13 @@ sub _get_args { } $args = {@_}; } - elsif ( ref $_[0] ) { + elsif ( ref $_[0] ) { unless ( eval { local $SIG{'__DIE__'}; %{$_[0]} || 1 } ) { $proto->_throw_error( "Not a hashref in args to " . (caller(1))[2] ); } $args = $_[0]; } - else { + else { $args = { file => shift }; } @@ -85,59 +51,75 @@ sub _get_args { } sub new { - ## - # Class constructor method for Perl OO interface. - # Calls tie() and returns blessed reference to tied hash or array, - # providing a hybrid OO/tie interface. - ## - my $class = shift; - my $args = $class->_get_args( @_ ); - - ## - # Check if we want a tied hash or array. - ## - my $self; - if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) { + ## + # Class constructor method for Perl OO interface. + # Calls tie() and returns blessed reference to tied hash or array, + # providing a hybrid OO/tie interface. + ## + my $class = shift; + my $args = $class->_get_args( @_ ); + + ## + # Check if we want a tied hash or array. + ## + my $self; + if (defined($args->{type}) && $args->{type} eq TYPE_ARRAY) { $class = 'DBM::Deep::Array'; require DBM::Deep::Array; - tie @$self, $class, %$args; - } - else { + tie @$self, $class, %$args; + } + else { $class = 'DBM::Deep::Hash'; require DBM::Deep::Hash; - tie %$self, $class, %$args; - } + tie %$self, $class, %$args; + } - return bless $self, $class; + return bless $self, $class; } +# This initializer is called from the various TIE* methods. new() calls tie(), +# which allows for a single point of entry. sub _init { - ## - # Setup $self and bless into this class. - ## my $class = shift; - my $args = shift; + my ($args) = @_; + + $args->{storage} = DBM::Deep::File->new( $args ) + unless exists $args->{storage}; + + # locking implicitly enables autoflush + if ($args->{locking}) { $args->{autoflush} = 1; } # These are the defaults to be optionally overridden below my $self = bless { type => TYPE_HASH, - base_offset => length(SIG_FILE), - engine => DBM::Deep::Engine->new, + base_offset => undef, + staleness => undef, + + storage => undef, + engine => undef, }, $class; + $args->{engine} = DBM::Deep::Engine->new( { %{$args}, obj => $self } ) + unless exists $args->{engine}; + + # Grab the parameters we want to use foreach my $param ( keys %$self ) { next unless exists $args->{$param}; - $self->{$param} = delete $args->{$param} + $self->{$param} = $args->{$param}; } - - # locking implicitly enables autoflush - if ($args->{locking}) { $args->{autoflush} = 1; } - - $self->{root} = exists $args->{root} - ? $args->{root} - : DBM::Deep::_::Root->new( $args ); - $self->{engine}->setup_fh( $self ); + eval { + local $SIG{'__DIE__'}; + + $self->lock; + $self->_engine->setup_fh( $self ); + $self->_storage->set_inode; + $self->unlock; + }; if ( $@ ) { + my $e = $@; + eval { local $SIG{'__DIE__'}; $self->unlock; }; + die $e; + } return $self; } @@ -154,66 +136,14 @@ sub TIEARRAY { return DBM::Deep::Array->TIEARRAY( @_ ); } -#XXX Unneeded now ... -#sub DESTROY { -#} - sub lock { - ## - # If db locking is set, flock() the db file. If called multiple - # times before unlock(), then the same number of unlocks() must - # be called before the lock is released. - ## - my $self = $_[0]->_get_self; - my $type = $_[1]; - $type = LOCK_EX unless defined $type; - - if (!defined($self->_fh)) { return; } - - if ($self->_root->{locking}) { - if (!$self->_root->{locked}) { - flock($self->_fh, $type); - - # refresh end counter in case file has changed size - my @stats = stat($self->_root->{file}); - $self->_root->{end} = $stats[7]; - - # double-check file inode, in case another process - # has optimize()d our file while we were waiting. - if ($stats[1] != $self->_root->{inode}) { - $self->{engine}->close( $self ); - $self->{engine}->setup_fh( $self ); - flock($self->_fh, $type); # re-lock - - # This may not be necessary after re-opening - $self->_root->{end} = (stat($self->_fh))[7]; # re-end - } - } - $self->_root->{locked}++; - - return 1; - } - - return; + my $self = shift->_get_self; + return $self->_storage->lock( $self, @_ ); } sub unlock { - ## - # If db locking is set, unlock the db file. See note in lock() - # regarding calling lock() multiple times. - ## - my $self = $_[0]->_get_self; - - if (!defined($self->_fh)) { return; } - - if ($self->_root->{locking} && $self->_root->{locked} > 0) { - $self->_root->{locked}--; - if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); } - - return 1; - } - - return; + my $self = shift->_get_self; + return $self->_storage->unlock( $self, @_ ); } sub _copy_value { @@ -224,8 +154,7 @@ sub _copy_value { ${$spot} = $value; } elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) { - my $type = $value->_type; - ${$spot} = $type eq TYPE_HASH ? {} : []; + ${$spot} = $value->_repr; $value->_copy_node( ${$spot} ); } else { @@ -244,157 +173,204 @@ sub _copy_value { return 1; } -sub _copy_node { - ## - # Copy single level of keys or elements to new DB handle. - # Recurse for nested structures - ## +#sub _copy_node { +# die "Must be implemented in a child class\n"; +#} +# +#sub _repr { +# die "Must be implemented in a child class\n"; +#} + +sub export { + ## + # Recursively export into standard Perl hashes and arrays. + ## my $self = shift->_get_self; - my ($db_temp) = @_; - - if ($self->_type eq TYPE_HASH) { - my $key = $self->first_key(); - while ($key) { - my $value = $self->get($key); - $self->_copy_value( \$db_temp->{$key}, $value ); - $key = $self->next_key($key); - } - } - else { - my $length = $self->length(); - for (my $index = 0; $index < $length; $index++) { - my $value = $self->get($index); - $self->_copy_value( \$db_temp->[$index], $value ); - } - } - return 1; + my $temp = $self->_repr; + + $self->lock(); + $self->_copy_node( $temp ); + $self->unlock(); + + my $classname = $self->_engine->get_classname( $self ); + if ( defined $classname ) { + bless $temp, $classname; + } + + return $temp; } -sub export { - ## - # Recursively export into standard Perl hashes and arrays. - ## - my $self = $_[0]->_get_self; - - my $temp; - if ($self->_type eq TYPE_HASH) { $temp = {}; } - elsif ($self->_type eq TYPE_ARRAY) { $temp = []; } - - $self->lock(); - $self->_copy_node( $temp ); - $self->unlock(); - - return $temp; +sub _check_legality { + my $self = shift; + my ($val) = @_; + + my $r = Scalar::Util::reftype( $val ); + + return $r if !defined $r || '' eq $r; + return $r if 'HASH' eq $r; + return $r if 'ARRAY' eq $r; + + DBM::Deep->_throw_error( + "Storage of references of type '$r' is not supported." + ); } sub import { - ## - # Recursively import Perl hash/array structure - ## - #XXX This use of ref() seems to be ok - if (!ref($_[0])) { return; } # Perl calls import() on use -- ignore - - my $self = $_[0]->_get_self; - my $struct = $_[1]; - - #XXX This use of ref() seems to be ok - if (!ref($struct)) { - ## - # struct is not a reference, so just import based on our type - ## - shift @_; - - if ($self->_type eq TYPE_HASH) { $struct = {@_}; } - elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; } - } - - my $r = Scalar::Util::reftype($struct) || ''; - if ($r eq "HASH" && $self->_type eq TYPE_HASH) { - foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); } - } - elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) { - $self->push( @$struct ); - } - else { - return $self->_throw_error("Cannot import: type mismatch"); - } - - return 1; + # Perl calls import() on use -- ignore + return if !ref $_[0]; + + my $self = shift->_get_self; + my ($struct) = @_; + + my $type = $self->_check_legality( $struct ); + if ( !$type ) { + DBM::Deep->_throw_error( "Cannot import a scalar" ); + } + + if ( substr( $type, 0, 1 ) ne $self->_type ) { + DBM::Deep->_throw_error( + "Cannot import " . ('HASH' eq $type ? 'a hash' : 'an array') + . " into " . ('HASH' eq $type ? 'an array' : 'a hash') + ); + } + + my %seen; + my $recurse; + $recurse = sub { + my ($db, $val) = @_; + + my $obj = 'HASH' eq Scalar::Util::reftype( $db ) ? tied(%$db) : tied(@$db); + $obj ||= $db; + + my $r = $self->_check_legality( $val ); + if ( 'HASH' eq $r ) { + while ( my ($k, $v) = each %$val ) { + my $r = $self->_check_legality( $v ); + if ( $r ) { + my $temp = 'HASH' eq $r ? {} : []; + if ( my $c = Scalar::Util::blessed( $v ) ) { + bless $temp, $c; + } + $obj->put( $k, $temp ); + $recurse->( $temp, $v ); + } + else { + $obj->put( $k, $v ); + } + } + } + elsif ( 'ARRAY' eq $r ) { + foreach my $k ( 0 .. $#$val ) { + my $v = $val->[$k]; + my $r = $self->_check_legality( $v ); + if ( $r ) { + my $temp = 'HASH' eq $r ? {} : []; + if ( my $c = Scalar::Util::blessed( $v ) ) { + bless $temp, $c; + } + $obj->put( $k, $temp ); + $recurse->( $temp, $v ); + } + else { + $obj->put( $k, $v ); + } + } + } + }; + $recurse->( $self, $struct ); + + return 1; } +#XXX Need to keep track of who has a fh to this file in order to +#XXX close them all prior to optimize on Win32/cygwin sub optimize { - ## - # Rebuild entire database into new file, then move - # it back on top of original. - ## - my $self = $_[0]->_get_self; + ## + # Rebuild entire database into new file, then move + # it back on top of original. + ## + my $self = shift->_get_self; #XXX Need to create a new test for this -# if ($self->_root->{links} > 1) { -# return $self->_throw_error("Cannot optimize: reference count is greater than 1"); -# } - - my $db_temp = DBM::Deep->new( - file => $self->_root->{file} . '.tmp', - type => $self->_type - ); - if (!$db_temp) { - return $self->_throw_error("Cannot optimize: failed to open temp file: $!"); - } - - $self->lock(); - $self->_copy_node( $db_temp ); - undef $db_temp; - - ## - # Attempt to copy user, group and permissions over to new file - ## - my @stats = stat($self->_fh); - my $perms = $stats[2] & 07777; - my $uid = $stats[4]; - my $gid = $stats[5]; - chown( $uid, $gid, $self->_root->{file} . '.tmp' ); - chmod( $perms, $self->_root->{file} . '.tmp' ); - +# if ($self->_storage->{links} > 1) { +# $self->_throw_error("Cannot optimize: reference count is greater than 1"); +# } + + #XXX Do we have to lock the tempfile? + + #XXX Should we use tempfile() here instead of a hard-coded name? + my $db_temp = DBM::Deep->new( + file => $self->_storage->{file} . '.tmp', + type => $self->_type, + + # Bring over all the parameters that we need to bring over + ( map { $_ => $self->_engine->$_ } qw( + byte_size max_buckets data_sector_size num_txns + )), + ); + + $self->lock(); + $self->_engine->clear_cache; + $self->_copy_node( $db_temp ); + undef $db_temp; + + ## + # Attempt to copy user, group and permissions over to new file + ## + my @stats = stat($self->_fh); + my $perms = $stats[2] & 07777; + my $uid = $stats[4]; + my $gid = $stats[5]; + chown( $uid, $gid, $self->_storage->{file} . '.tmp' ); + chmod( $perms, $self->_storage->{file} . '.tmp' ); + # q.v. perlport for more information on this variable if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) { - ## - # Potential race condition when optmizing on Win32 with locking. - # The Windows filesystem requires that the filehandle be closed - # before it is overwritten with rename(). This could be redone - # with a soft copy. - ## - $self->unlock(); - $self->{engine}->close( $self ); - } - - if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) { - unlink $self->_root->{file} . '.tmp'; - $self->unlock(); - return $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); - } - - $self->unlock(); - $self->{engine}->close( $self ); - $self->{engine}->setup_fh( $self ); - - return 1; + ## + # Potential race condition when optmizing on Win32 with locking. + # The Windows filesystem requires that the filehandle be closed + # before it is overwritten with rename(). This could be redone + # with a soft copy. + ## + $self->unlock(); + $self->_storage->close; + } + + if (!rename $self->_storage->{file} . '.tmp', $self->_storage->{file}) { + unlink $self->_storage->{file} . '.tmp'; + $self->unlock(); + $self->_throw_error("Optimize failed: Cannot copy temp file over original: $!"); + } + + $self->unlock(); + $self->_storage->close; + + $self->_storage->open; + $self->lock(); + $self->_engine->setup_fh( $self ); + $self->unlock(); + + return 1; } sub clone { - ## - # Make copy of object and return - ## - my $self = $_[0]->_get_self; - - return DBM::Deep->new( - type => $self->_type, - base_offset => $self->_base_offset, - root => $self->_root - ); + ## + # Make copy of object and return + ## + my $self = shift->_get_self; + + return DBM::Deep->new( + type => $self->_type, + base_offset => $self->_base_offset, + staleness => $self->_staleness, + storage => $self->_storage, + engine => $self->_engine, + ); } +#XXX Migrate this to the engine, where it really belongs and go through some +# API - stop poking in the innards of someone else.. { my %is_legal_filter = map { $_ => ~~1, @@ -404,1484 +380,240 @@ sub clone { ); sub set_filter { - ## - # Setup filter function for storing or fetching the key or value - ## - my $self = $_[0]->_get_self; - my $type = lc $_[1]; - my $func = $_[2] ? $_[2] : undef; - + my $self = shift->_get_self; + my $type = lc shift; + my $func = shift; + if ( $is_legal_filter{$type} ) { - $self->_root->{"filter_$type"} = $func; + $self->_storage->{"filter_$type"} = $func; return 1; } return; } -} -## -# Accessor methods -## - -sub _root { - ## - # Get access to the root structure - ## - my $self = $_[0]->_get_self; - return $self->{root}; + sub filter_store_key { $_[0]->set_filter( store_key => $_[1] ); } + sub filter_store_value { $_[0]->set_filter( store_value => $_[1] ); } + sub filter_fetch_key { $_[0]->set_filter( fetch_key => $_[1] ); } + sub filter_fetch_value { $_[0]->set_filter( fetch_value => $_[1] ); } } -sub _fh { - ## - # Get access to the raw fh - ## - #XXX It will be useful, though, when we split out HASH and ARRAY - my $self = $_[0]->_get_self; - return $self->_root->{fh}; +sub begin_work { + my $self = shift->_get_self; + return $self->_engine->begin_work( $self, @_ ); } -sub _type { - ## - # Get type of current node (TYPE_HASH or TYPE_ARRAY) - ## - my $self = $_[0]->_get_self; - return $self->{type}; +sub rollback { + my $self = shift->_get_self; + return $self->_engine->rollback( $self, @_ ); } -sub _base_offset { - ## - # Get base_offset of current node (TYPE_HASH or TYPE_ARRAY) - ## - my $self = $_[0]->_get_self; - return $self->{base_offset}; +sub commit { + my $self = shift->_get_self; + return $self->_engine->commit( $self, @_ ); } ## -# Utility methods +# Accessor methods ## -sub _throw_error { - die "DBM::Deep: $_[1]\n"; -} - -sub _is_writable { - my $fh = shift; - (O_WRONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); +sub _engine { + my $self = $_[0]->_get_self; + return $self->{engine}; } -#sub _is_readable { -# my $fh = shift; -# (O_RDONLY | O_RDWR) & fcntl( $fh, F_GETFL, my $slush = 0); -#} - -## -# tie() methods (hashes and arrays) -## - -sub STORE { - ## - # Store single hash key/value or array element in database. - ## +sub _storage { my $self = $_[0]->_get_self; - my $key = $_[1]; - - # User may be storing a hash, in which case we do not want it run - # through the filtering system - my $value = ($self->_root->{filter_store_value} && !ref($_[2])) - ? $self->_root->{filter_store_value}->($_[2]) - : $_[2]; - - my $md5 = $self->{engine}{digest}->($key); - - unless ( _is_writable( $self->_fh ) ) { - $self->_throw_error( 'Cannot write to a readonly filehandle' ); - } - - ## - # Request exclusive lock for writing - ## - $self->lock( LOCK_EX ); - - my $fh = $self->_fh; - - ## - # Locate offset for bucket list using digest index system - ## - my $tag = $self->{engine}->load_tag($self, $self->_base_offset); - if (!$tag) { - $tag = $self->{engine}->create_tag($self, $self->_base_offset, SIG_INDEX, chr(0) x $self->{engine}{index_size}); - } - - my $ch = 0; - while ($tag->{signature} ne SIG_BLIST) { - my $num = ord(substr($md5, $ch, 1)); - - my $ref_loc = $tag->{offset} + ($num * $DBM::Deep::Engine::LONG_SIZE); - my $new_tag = $self->{engine}->index_lookup($self, $tag, $num); - - if (!$new_tag) { - seek($fh, $ref_loc + $self->_root->{file_offset}, SEEK_SET); - print( $fh pack($DBM::Deep::Engine::LONG_PACK, $self->_root->{end}) ); - - $tag = $self->{engine}->create_tag($self, $self->_root->{end}, SIG_BLIST, chr(0) x $DBM::Deep::Engine::BUCKET_LIST_SIZE); - - $tag->{ref_loc} = $ref_loc; - $tag->{ch} = $ch; - - last; - } - else { - $tag = $new_tag; - - $tag->{ref_loc} = $ref_loc; - $tag->{ch} = $ch; - } - $ch++; - } - - ## - # Add key/value to bucket list - ## - my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value ); - - $self->unlock(); - - return $result; + return $self->{storage}; } -sub FETCH { - ## - # Fetch single value or element given plain key or array index - ## - my $self = shift->_get_self; - my $key = shift; - - my $md5 = $self->{engine}{digest}->($key); - - ## - # Request shared lock for reading - ## - $self->lock( LOCK_SH ); - - my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); - if (!$tag) { - $self->unlock(); - return; - } - - ## - # Get value from bucket list - ## - my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 ); - - $self->unlock(); - - #XXX What is ref() checking here? - #YYY Filters only apply on scalar values, so the ref check is making - #YYY sure the fetched bucket is a scalar, not a child hash or array. - return ($result && !ref($result) && $self->_root->{filter_fetch_value}) - ? $self->_root->{filter_fetch_value}->($result) - : $result; +sub _type { + my $self = $_[0]->_get_self; + return $self->{type}; } -sub DELETE { - ## - # Delete single key/value pair or element given plain key or array index - ## +sub _base_offset { my $self = $_[0]->_get_self; - my $key = $_[1]; - - my $md5 = $self->{engine}{digest}->($key); - - ## - # Request exclusive lock for writing - ## - $self->lock( LOCK_EX ); - - my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); - if (!$tag) { - $self->unlock(); - return; - } - - ## - # Delete bucket - ## - my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 ); - if ($value && !ref($value) && $self->_root->{filter_fetch_value}) { - $value = $self->_root->{filter_fetch_value}->($value); - } - - my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 ); - - ## - # If this object is an array and the key deleted was on the end of the stack, - # decrement the length variable. - ## - - $self->unlock(); - - return $value; + return $self->{base_offset}; } -sub EXISTS { - ## - # Check if a single key or element exists given plain key or array index - ## +sub _staleness { my $self = $_[0]->_get_self; - my $key = $_[1]; - - my $md5 = $self->{engine}{digest}->($key); - - ## - # Request shared lock for reading - ## - $self->lock( LOCK_SH ); - - my $tag = $self->{engine}->find_bucket_list( $self, $md5 ); - - ## - # For some reason, the built-in exists() function returns '' for false - ## - if (!$tag) { - $self->unlock(); - return ''; - } - - ## - # Check if bucket exists and return 1 or '' - ## - my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || ''; - - $self->unlock(); - - return $result; + return $self->{staleness}; } -sub CLEAR { - ## - # Clear all keys from hash, or all elements from array. - ## +sub _fh { my $self = $_[0]->_get_self; - - ## - # Request exclusive lock for writing - ## - $self->lock( LOCK_EX ); - - my $fh = $self->_fh; - - seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET); - if (eof $fh) { - $self->unlock(); - return; - } - - $self->{engine}->create_tag($self, $self->_base_offset, $self->_type, chr(0) x $self->{engine}{index_size}); - - $self->unlock(); - - return 1; + return $self->_storage->{fh}; } ## -# Public method aliases +# Utility methods ## -sub put { (shift)->STORE( @_ ) } -sub store { (shift)->STORE( @_ ) } -sub get { (shift)->FETCH( @_ ) } -sub fetch { (shift)->FETCH( @_ ) } -sub delete { (shift)->DELETE( @_ ) } -sub exists { (shift)->EXISTS( @_ ) } -sub clear { (shift)->CLEAR( @_ ) } - -package DBM::Deep::_::Root; -sub new { - my $class = shift; - my ($args) = @_; - - my $self = bless { - file => undef, - fh => undef, - file_offset => 0, - end => 0, - autoflush => undef, - locking => undef, - locked => 0, - filter_store_key => undef, - filter_store_value => undef, - filter_fetch_key => undef, - filter_fetch_value => undef, - autobless => undef, - %$args, - }, $class; +sub _throw_error { + my $n = 0; + while( 1 ) { + my @caller = caller( ++$n ); + next if $caller[0] =~ m/^DBM::Deep/; - if ( $self->{fh} && !$self->{file_offset} ) { - $self->{file_offset} = tell( $self->{fh} ); + die "DBM::Deep: $_[1] at $0 line $caller[2]\n"; } - - return $self; -} - -sub DESTROY { - my $self = shift; - return unless $self; - - close $self->{fh} if $self->{fh}; - - return; } -1; - -__END__ - -=head1 NAME - -DBM::Deep - A pure perl multi-level hash/array DBM - -=head1 SYNOPSIS - - use DBM::Deep; - my $db = DBM::Deep->new( "foo.db" ); - - $db->{key} = 'value'; # tie() style - print $db->{key}; - - $db->put('key' => 'value'); # OO style - print $db->get('key'); - - # true multi-level support - $db->{my_complex} = [ - 'hello', { perl => 'rules' }, - 42, 99, - ]; - -=head1 DESCRIPTION - -A unique flat-file database module, written in pure perl. True -multi-level hash/array support (unlike MLDBM, which is faked), hybrid -OO / tie() interface, cross-platform FTPable files, and quite fast. Can -handle millions of keys and unlimited hash levels without significant -slow-down. Written from the ground-up in pure perl -- this is NOT a -wrapper around a C-based DBM. Out-of-the-box compatibility with Unix, -Mac OS X and Windows. - -=head1 INSTALLATION - -Hopefully you are using Perl's excellent CPAN module, which will download -and install the module for you. If not, get the tarball, and run these -commands: - - tar zxf DBM-Deep-* - cd DBM-Deep-* - perl Makefile.PL - make - make test - make install - -=head1 SETUP - -Construction can be done OO-style (which is the recommended way), or using -Perl's tie() function. Both are examined here. - -=head2 OO CONSTRUCTION - -The recommended way to construct a DBM::Deep object is to use the new() -method, which gets you a blessed, tied hash or array reference. - - my $db = DBM::Deep->new( "foo.db" ); - -This opens a new database handle, mapped to the file "foo.db". If this -file does not exist, it will automatically be created. DB files are -opened in "r+" (read/write) mode, and the type of object returned is a -hash, unless otherwise specified (see L below). - -You can pass a number of options to the constructor to specify things like -locking, autoflush, etc. This is done by passing an inline hash: - - my $db = DBM::Deep->new( - file => "foo.db", - locking => 1, - autoflush => 1 - ); - -Notice that the filename is now specified I the hash with -the "file" parameter, as opposed to being the sole argument to the -constructor. This is required if any options are specified. -See L below for the complete list. - - - -You can also start with an array instead of a hash. For this, you must -specify the C parameter: - - my $db = DBM::Deep->new( - file => "foo.db", - type => DBM::Deep->TYPE_ARRAY - ); - -B Specifing the C parameter only takes effect when beginning -a new DB file. If you create a DBM::Deep object with an existing file, the -C will be loaded from the file header, and an error will be thrown if -the wrong type is passed in. - -=head2 TIE CONSTRUCTION - -Alternately, you can create a DBM::Deep handle by using Perl's built-in -tie() function. The object returned from tie() can be used to call methods, -such as lock() and unlock(), but cannot be used to assign to the DBM::Deep -file (as expected with most tie'd objects). - - my %hash; - my $db = tie %hash, "DBM::Deep", "foo.db"; - - my @array; - my $db = tie @array, "DBM::Deep", "bar.db"; - -As with the OO constructor, you can replace the DB filename parameter with -a hash containing one or more options (see L just below for the -complete list). - - tie %hash, "DBM::Deep", { - file => "foo.db", - locking => 1, - autoflush => 1 - }; - -=head2 OPTIONS - -There are a number of options that can be passed in when constructing your -DBM::Deep objects. These apply to both the OO- and tie- based approaches. - -=over - -=item * file - -Filename of the DB file to link the handle to. You can pass a full absolute -filesystem path, partial path, or a plain filename if the file is in the -current working directory. This is a required parameter (though q.v. fh). - -=item * fh - -If you want, you can pass in the fh instead of the file. This is most useful for doing -something like: - - my $db = DBM::Deep->new( { fh => \*DATA } ); - -You are responsible for making sure that the fh has been opened appropriately for your -needs. If you open it read-only and attempt to write, an exception will be thrown. If you -open it write-only or append-only, an exception will be thrown immediately as DBM::Deep -needs to read from the fh. - -=item * file_offset - -This is the offset within the file that the DBM::Deep db starts. Most of the time, you will -not need to set this. However, it's there if you want it. - -If you pass in fh and do not set this, it will be set appropriately. - -=item * type - -This parameter specifies what type of object to create, a hash or array. Use -one of these two constants: CTYPE_HASH> or CTYPE_ARRAY>. -This only takes effect when beginning a new file. This is an optional -parameter, and defaults to CTYPE_HASH>. - -=item * locking - -Specifies whether locking is to be enabled. DBM::Deep uses Perl's Fnctl flock() -function to lock the database in exclusive mode for writes, and shared mode for -reads. Pass any true value to enable. This affects the base DB handle I that use the same DB file. This is an optional -parameter, and defaults to 0 (disabled). See L below for more. - -=item * autoflush - -Specifies whether autoflush is to be enabled on the underlying filehandle. -This obviously slows down write operations, but is required if you may have -multiple processes accessing the same DB file (also consider enable I). -Pass any true value to enable. This is an optional parameter, and defaults to 0 -(disabled). - -=item * autobless - -If I mode is enabled, DBM::Deep will preserve blessed hashes, and -restore them when fetched. This is an B feature, and does have -side-effects. Basically, when hashes are re-blessed into their original -classes, they are no longer blessed into the DBM::Deep class! So you won't be -able to call any DBM::Deep methods on them. You have been warned. -This is an optional parameter, and defaults to 0 (disabled). - -=item * filter_* - -See L below. - -=back - -=head1 TIE INTERFACE - -With DBM::Deep you can access your databases using Perl's standard hash/array -syntax. Because all DBM::Deep objects are I to hashes or arrays, you can -treat them as such. DBM::Deep will intercept all reads/writes and direct them -to the right place -- the DB file. This has nothing to do with the -L section above. This simply tells you how to use DBM::Deep -using regular hashes and arrays, rather than calling functions like C -and C (although those work too). It is entirely up to you how to want -to access your databases. - -=head2 HASHES - -You can treat any DBM::Deep object like a normal Perl hash reference. Add keys, -or even nested hashes (or arrays) using standard Perl syntax: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->{mykey} = "myvalue"; - $db->{myhash} = {}; - $db->{myhash}->{subkey} = "subvalue"; - - print $db->{myhash}->{subkey} . "\n"; - -You can even step through hash keys using the normal Perl C function: - - foreach my $key (keys %$db) { - print "$key: " . $db->{$key} . "\n"; - } - -Remember that Perl's C function extracts I key from the hash and -pushes them onto an array, all before the loop even begins. If you have an -extra large hash, this may exhaust Perl's memory. Instead, consider using -Perl's C function, which pulls keys/values one at a time, using very -little memory: - - while (my ($key, $value) = each %$db) { - print "$key: $value\n"; - } - -Please note that when using C, you should always pass a direct -hash reference, not a lookup. Meaning, you should B do this: - - # NEVER DO THIS - while (my ($key, $value) = each %{$db->{foo}}) { # BAD - -This causes an infinite loop, because for each iteration, Perl is calling -FETCH() on the $db handle, resulting in a "new" hash for foo every time, so -it effectively keeps returning the first key over and over again. Instead, -assign a temporary variable to C<$db->{foo}>, then pass that to each(). - -=head2 ARRAYS - -As with hashes, you can treat any DBM::Deep object like a normal Perl array -reference. This includes inserting, removing and manipulating elements, -and the C, C, C, C and C functions. -The object must have first been created using type CTYPE_ARRAY>, -or simply be a nested array reference inside a hash. Example: - - my $db = DBM::Deep->new( - file => "foo-array.db", - type => DBM::Deep->TYPE_ARRAY - ); - - $db->[0] = "foo"; - push @$db, "bar", "baz"; - unshift @$db, "bah"; - - my $last_elem = pop @$db; # baz - my $first_elem = shift @$db; # bah - my $second_elem = $db->[1]; # bar - - my $num_elements = scalar @$db; - -=head1 OO INTERFACE - -In addition to the I interface, you can also use a standard OO interface -to manipulate all aspects of DBM::Deep databases. Each type of object (hash or -array) has its own methods, but both types share the following common methods: -C, C, C, C and C. - -=over - -=item * new() / clone() - -These are the constructor and copy-functions. - -=item * put() / store() - -Stores a new hash key/value pair, or sets an array element value. Takes two -arguments, the hash key or array index, and the new value. The value can be -a scalar, hash ref or array ref. Returns true on success, false on failure. - - $db->put("foo", "bar"); # for hashes - $db->put(1, "bar"); # for arrays - -=item * get() / fetch() - -Fetches the value of a hash key or array element. Takes one argument: the hash -key or array index. Returns a scalar, hash ref or array ref, depending on the -data type stored. - - my $value = $db->get("foo"); # for hashes - my $value = $db->get(1); # for arrays - -=item * exists() - -Checks if a hash key or array index exists. Takes one argument: the hash key -or array index. Returns true if it exists, false if not. - - if ($db->exists("foo")) { print "yay!\n"; } # for hashes - if ($db->exists(1)) { print "yay!\n"; } # for arrays - -=item * delete() - -Deletes one hash key/value pair or array element. Takes one argument: the hash -key or array index. Returns true on success, false if not found. For arrays, -the remaining elements located after the deleted element are NOT moved over. -The deleted element is essentially just undefined, which is exactly how Perl's -internal arrays work. Please note that the space occupied by the deleted -key/value or element is B reused again -- see L -below for details and workarounds. - - $db->delete("foo"); # for hashes - $db->delete(1); # for arrays - -=item * clear() - -Deletes B hash keys or array elements. Takes no arguments. No return -value. Please note that the space occupied by the deleted keys/values or -elements is B reused again -- see L below for -details and workarounds. - - $db->clear(); # hashes or arrays - -=item * lock() / unlock() - -q.v. Locking. - -=item * optimize() - -Recover lost disk space. - -=item * import() / export() - -Data going in and out. - -=item * set_digest() / set_pack() / set_filter() - -q.v. adjusting the interal parameters. - -=back - -=head2 HASHES - -For hashes, DBM::Deep supports all the common methods described above, and the -following additional methods: C and C. - -=over - -=item * first_key() +sub STORE { + ## + # Store single hash key/value or array element in database. + ## + my $self = shift->_get_self; + my ($key, $value) = @_; -Returns the "first" key in the hash. As with built-in Perl hashes, keys are -fetched in an undefined order (which appears random). Takes no arguments, -returns the key as a scalar value. + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } - my $key = $db->first_key(); + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); -=item * next_key() + # User may be storing a complex value, in which case we do not want it run + # through the filtering system. + if ( !ref($value) && $self->_storage->{filter_store_value} ) { + $value = $self->_storage->{filter_store_value}->( $value ); + } -Returns the "next" key in the hash, given the previous one as the sole argument. -Returns undef if there are no more keys to be fetched. + $self->_engine->write_value( $self, $key, $value); - $key = $db->next_key($key); + $self->unlock(); -=back + return 1; +} -Here are some examples of using hashes: +sub FETCH { + ## + # Fetch single value or element given plain key or array index + ## + my $self = shift->_get_self; + my ($key) = @_; - my $db = DBM::Deep->new( "foo.db" ); - - $db->put("foo", "bar"); - print "foo: " . $db->get("foo") . "\n"; - - $db->put("baz", {}); # new child hash ref - $db->get("baz")->put("buz", "biz"); - print "buz: " . $db->get("baz")->get("buz") . "\n"; - - my $key = $db->first_key(); - while ($key) { - print "$key: " . $db->get($key) . "\n"; - $key = $db->next_key($key); - } - - if ($db->exists("foo")) { $db->delete("foo"); } + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); -=head2 ARRAYS + my $result = $self->_engine->read_value( $self, $key); -For arrays, DBM::Deep supports all the common methods described above, and the -following additional methods: C, C, C, C, -C and C. + $self->unlock(); -=over + # Filters only apply to scalar values, so the ref check is making + # sure the fetched bucket is a scalar, not a child hash or array. + return ($result && !ref($result) && $self->_storage->{filter_fetch_value}) + ? $self->_storage->{filter_fetch_value}->($result) + : $result; +} -=item * length() +sub DELETE { + ## + # Delete single key/value pair or element given plain key or array index + ## + my $self = shift->_get_self; + my ($key) = @_; -Returns the number of elements in the array. Takes no arguments. + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } - my $len = $db->length(); + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); -=item * push() + ## + # Delete bucket + ## + my $value = $self->_engine->delete_key( $self, $key); -Adds one or more elements onto the end of the array. Accepts scalars, hash -refs or array refs. No return value. + if (defined $value && !ref($value) && $self->_storage->{filter_fetch_value}) { + $value = $self->_storage->{filter_fetch_value}->($value); + } - $db->push("foo", "bar", {}); + $self->unlock(); -=item * pop() + return $value; +} -Fetches the last element in the array, and deletes it. Takes no arguments. -Returns undef if array is empty. Returns the element value. +sub EXISTS { + ## + # Check if a single key or element exists given plain key or array index + ## + my $self = shift->_get_self; + my ($key) = @_; - my $elem = $db->pop(); + ## + # Request shared lock for reading + ## + $self->lock( LOCK_SH ); -=item * shift() + my $result = $self->_engine->key_exists( $self, $key ); -Fetches the first element in the array, deletes it, then shifts all the -remaining elements over to take up the space. Returns the element value. This -method is not recommended with large arrays -- see L below for -details. + $self->unlock(); - my $elem = $db->shift(); + return $result; +} -=item * unshift() +sub CLEAR { + ## + # Clear all keys from hash, or all elements from array. + ## + my $self = shift->_get_self; -Inserts one or more elements onto the beginning of the array, shifting all -existing elements over to make room. Accepts scalars, hash refs or array refs. -No return value. This method is not recommended with large arrays -- see - below for details. + if ( !FileHandle::Fmode::is_W( $self->_fh ) ) { + $self->_throw_error( 'Cannot write to a readonly filehandle' ); + } - $db->unshift("foo", "bar", {}); + ## + # Request exclusive lock for writing + ## + $self->lock( LOCK_EX ); + + #XXX Rewrite this dreck to do it in the engine as a tight loop vs. + # iterating over keys - such a WASTE - is this required for transactional + # clearning?! Surely that can be detected in the engine ... + if ( $self->_type eq TYPE_HASH ) { + my $key = $self->first_key; + while ( $key ) { + # Retrieve the key before deleting because we depend on next_key + my $next_key = $self->next_key( $key ); + $self->_engine->delete_key( $self, $key, $key ); + $key = $next_key; + } + } + else { + my $size = $self->FETCHSIZE; + for my $key ( 0 .. $size - 1 ) { + $self->_engine->delete_key( $self, $key, $key ); + } + $self->STORESIZE( 0 ); + } -=item * splice() + $self->unlock(); -Performs exactly like Perl's built-in function of the same name. See L for usage -- it is too complicated to document here. This method is -not recommended with large arrays -- see L below for details. + return 1; +} -=back +## +# Public method aliases +## +sub put { (shift)->STORE( @_ ) } +sub store { (shift)->STORE( @_ ) } +sub get { (shift)->FETCH( @_ ) } +sub fetch { (shift)->FETCH( @_ ) } +sub delete { (shift)->DELETE( @_ ) } +sub exists { (shift)->EXISTS( @_ ) } +sub clear { (shift)->CLEAR( @_ ) } -Here are some examples of using arrays: +sub _dump_file {shift->_get_self->_engine->_dump_file;} - my $db = DBM::Deep->new( - file => "foo.db", - type => DBM::Deep->TYPE_ARRAY - ); - - $db->push("bar", "baz"); - $db->unshift("foo"); - $db->put(3, "buz"); - - my $len = $db->length(); - print "length: $len\n"; # 4 - - for (my $k=0; $k<$len; $k++) { - print "$k: " . $db->get($k) . "\n"; - } - - $db->splice(1, 2, "biz", "baf"); - - while (my $elem = shift @$db) { - print "shifted: $elem\n"; - } - -=head1 LOCKING - -Enable automatic file locking by passing a true value to the C -parameter when constructing your DBM::Deep object (see L above). - - my $db = DBM::Deep->new( - file => "foo.db", - locking => 1 - ); - -This causes DBM::Deep to C the underlying filehandle with exclusive -mode for writes, and shared mode for reads. This is required if you have -multiple processes accessing the same database file, to avoid file corruption. -Please note that C does NOT work for files over NFS. See L below for more. - -=head2 EXPLICIT LOCKING - -You can explicitly lock a database, so it remains locked for multiple -transactions. This is done by calling the C method, and passing an -optional lock mode argument (defaults to exclusive mode). This is particularly -useful for things like counters, where the current value needs to be fetched, -then incremented, then stored again. - - $db->lock(); - my $counter = $db->get("counter"); - $counter++; - $db->put("counter", $counter); - $db->unlock(); - - # or... - - $db->lock(); - $db->{counter}++; - $db->unlock(); - -You can pass C an optional argument, which specifies which mode to use -(exclusive or shared). Use one of these two constants: CLOCK_EX> -or CLOCK_SH>. These are passed directly to C, and are the -same as the constants defined in Perl's C module. - - $db->lock( DBM::Deep->LOCK_SH ); - # something here - $db->unlock(); - -=head1 IMPORTING/EXPORTING - -You can import existing complex structures by calling the C method, -and export an entire database into an in-memory structure using the C -method. Both are examined here. - -=head2 IMPORTING - -Say you have an existing hash with nested hashes/arrays inside it. Instead of -walking the structure and adding keys/elements to the database as you go, -simply pass a reference to the C method. This recursively adds -everything to an existing DBM::Deep object for you. Here is an example: - - my $struct = { - key1 => "value1", - key2 => "value2", - array1 => [ "elem0", "elem1", "elem2" ], - hash1 => { - subkey1 => "subvalue1", - subkey2 => "subvalue2" - } - }; - - my $db = DBM::Deep->new( "foo.db" ); - $db->import( $struct ); - - print $db->{key1} . "\n"; # prints "value1" - -This recursively imports the entire C<$struct> object into C<$db>, including -all nested hashes and arrays. If the DBM::Deep object contains exsiting data, -keys are merged with the existing ones, replacing if they already exist. -The C method can be called on any database level (not just the base -level), and works with both hash and array DB types. - -B Make sure your existing structure has no circular references in it. -These will cause an infinite loop when importing. - -=head2 EXPORTING - -Calling the C method on an existing DBM::Deep object will return -a reference to a new in-memory copy of the database. The export is done -recursively, so all nested hashes/arrays are all exported to standard Perl -objects. Here is an example: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - $db->{hash1} = {}; - $db->{hash1}->{subkey1} = "subvalue1"; - $db->{hash1}->{subkey2} = "subvalue2"; - - my $struct = $db->export(); - - print $struct->{key1} . "\n"; # prints "value1" - -This makes a complete copy of the database in memory, and returns a reference -to it. The C method can be called on any database level (not just -the base level), and works with both hash and array DB types. Be careful of -large databases -- you can store a lot more data in a DBM::Deep object than an -in-memory Perl structure. - -B Make sure your database has no circular references in it. -These will cause an infinite loop when exporting. - -=head1 FILTERS - -DBM::Deep has a number of hooks where you can specify your own Perl function -to perform filtering on incoming or outgoing data. This is a perfect -way to extend the engine, and implement things like real-time compression or -encryption. Filtering applies to the base DB level, and all child hashes / -arrays. Filter hooks can be specified when your DBM::Deep object is first -constructed, or by calling the C method at any time. There are -four available filter hooks, described below: - -=over - -=item * filter_store_key - -This filter is called whenever a hash key is stored. It -is passed the incoming key, and expected to return a transformed key. - -=item * filter_store_value - -This filter is called whenever a hash key or array element is stored. It -is passed the incoming value, and expected to return a transformed value. - -=item * filter_fetch_key - -This filter is called whenever a hash key is fetched (i.e. via -C or C). It is passed the transformed key, -and expected to return the plain key. - -=item * filter_fetch_value - -This filter is called whenever a hash key or array element is fetched. -It is passed the transformed value, and expected to return the plain value. - -=back - -Here are the two ways to setup a filter hook: - - my $db = DBM::Deep->new( - file => "foo.db", - filter_store_value => \&my_filter_store, - filter_fetch_value => \&my_filter_fetch - ); - - # or... - - $db->set_filter( "filter_store_value", \&my_filter_store ); - $db->set_filter( "filter_fetch_value", \&my_filter_fetch ); - -Your filter function will be called only when dealing with SCALAR keys or -values. When nested hashes and arrays are being stored/fetched, filtering -is bypassed. Filters are called as static functions, passed a single SCALAR -argument, and expected to return a single SCALAR value. If you want to -remove a filter, set the function reference to C: - - $db->set_filter( "filter_store_value", undef ); - -=head2 REAL-TIME ENCRYPTION EXAMPLE - -Here is a working example that uses the I module to -do real-time encryption / decryption of keys & values with DBM::Deep Filters. -Please visit L for more -on I. You'll also need the I module. - - use DBM::Deep; - use Crypt::Blowfish; - use Crypt::CBC; - - my $cipher = Crypt::CBC->new({ - 'key' => 'my secret key', - 'cipher' => 'Blowfish', - 'iv' => '$KJh#(}q', - 'regenerate_key' => 0, - 'padding' => 'space', - 'prepend_iv' => 0 - }); - - my $db = DBM::Deep->new( - file => "foo-encrypt.db", - filter_store_key => \&my_encrypt, - filter_store_value => \&my_encrypt, - filter_fetch_key => \&my_decrypt, - filter_fetch_value => \&my_decrypt, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_encrypt { - return $cipher->encrypt( $_[0] ); - } - sub my_decrypt { - return $cipher->decrypt( $_[0] ); - } - -=head2 REAL-TIME COMPRESSION EXAMPLE - -Here is a working example that uses the I module to do real-time -compression / decompression of keys & values with DBM::Deep Filters. -Please visit L for -more on I. - - use DBM::Deep; - use Compress::Zlib; - - my $db = DBM::Deep->new( - file => "foo-compress.db", - filter_store_key => \&my_compress, - filter_store_value => \&my_compress, - filter_fetch_key => \&my_decompress, - filter_fetch_value => \&my_decompress, - ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_compress { - return Compress::Zlib::memGzip( $_[0] ) ; - } - sub my_decompress { - return Compress::Zlib::memGunzip( $_[0] ) ; - } - -B Filtering of keys only applies to hashes. Array "keys" are -actually numerical index numbers, and are not filtered. - -=head1 ERROR HANDLING - -Most DBM::Deep methods return a true value for success, and call die() on -failure. You can wrap calls in an eval block to catch the die. - - my $db = DBM::Deep->new( "foo.db" ); # create hash - eval { $db->push("foo"); }; # ILLEGAL -- push is array-only call - - print $@; # prints error message - -=head1 LARGEFILE SUPPORT - -If you have a 64-bit system, and your Perl is compiled with both LARGEFILE -and 64-bit support, you I be able to create databases larger than 2 GB. -DBM::Deep by default uses 32-bit file offset tags, but these can be changed -by calling the static C method before you do anything else. - - DBM::Deep::set_pack(8, 'Q'); - -This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words -instead of 32-bit longs. After setting these values your DB files have a -theoretical maximum size of 16 XB (exabytes). - -B Changing these values will B work for existing database files. -Only change this for new files, and make sure it stays set consistently -throughout the file's life. If you do set these values, you can no longer -access 32-bit DB files. You can, however, call C to change -back to 32-bit mode. - -B I have not personally tested files > 2 GB -- all my systems have -only a 32-bit Perl. However, I have received user reports that this does -indeed work! - -=head1 LOW-LEVEL ACCESS - -If you require low-level access to the underlying filehandle that DBM::Deep uses, -you can call the C<_fh()> method, which returns the handle: - - my $fh = $db->_fh(); - -This method can be called on the root level of the datbase, or any child -hashes or arrays. All levels share a I structure, which contains things -like the filehandle, a reference counter, and all the options specified -when you created the object. You can get access to this root structure by -calling the C method. - - my $root = $db->_root(); - -This is useful for changing options after the object has already been created, -such as enabling/disabling locking. You can also store your own temporary user -data in this structure (be wary of name collision), which is then accessible from -any child hash or array. - -=head1 CUSTOM DIGEST ALGORITHM - -DBM::Deep by default uses the I (MD5) algorithm for hashing -keys. However you can override this, and use another algorithm (such as SHA-256) -or even write your own. But please note that DBM::Deep currently expects zero -collisions, so your algorithm has to be I, so to speak. -Collision detection may be introduced in a later version. - - - -You can specify a custom digest algorithm by calling the static C -function, passing a reference to a subroutine, and the length of the algorithm's -hashes (in bytes). This is a global static function, which affects ALL DBM::Deep -objects. Here is a working example that uses a 256-bit hash from the -I module. Please see -L for more. - - use DBM::Deep; - use Digest::SHA256; - - my $context = Digest::SHA256::new(256); - - DBM::Deep::set_digest( \&my_digest, 32 ); - - my $db = DBM::Deep->new( "foo-sha.db" ); - - $db->{key1} = "value1"; - $db->{key2} = "value2"; - print "key1: " . $db->{key1} . "\n"; - print "key2: " . $db->{key2} . "\n"; - - undef $db; - exit; - - sub my_digest { - return substr( $context->hash($_[0]), 0, 32 ); - } - -B Your returned digest strings must be B the number -of bytes you specify in the C function (in this case 32). - -=head1 CIRCULAR REFERENCES - -DBM::Deep has B support for circular references. Meaning you -can have a nested hash key or array element that points to a parent object. -This relationship is stored in the DB file, and is preserved between sessions. -Here is an example: - - my $db = DBM::Deep->new( "foo.db" ); - - $db->{foo} = "bar"; - $db->{circle} = $db; # ref to self - - print $db->{foo} . "\n"; # prints "foo" - print $db->{circle}->{foo} . "\n"; # prints "foo" again - -One catch is, passing the object to a function that recursively walks the -object tree (such as I or even the built-in C or -C methods) will result in an infinite loop. The other catch is, -if you fetch the I of a circular reference (i.e. using the C -or C methods), you will get the I, not the -ref's key. This gets even more interesting with the above example, where -the I key points to the base DB object, which technically doesn't -have a key. So I made DBM::Deep return "[base]" as the key name in that -special case. - -=head1 CAVEATS / ISSUES / BUGS - -This section describes all the known issues with DBM::Deep. It you have found -something that is not listed here, please send e-mail to L. - -=head2 UNUSED SPACE RECOVERY - -One major caveat with DBM::Deep is that space occupied by existing keys and -values is not recovered when they are deleted. Meaning if you keep deleting -and adding new keys, your file will continuously grow. I am working on this, -but in the meantime you can call the built-in C method from time to -time (perhaps in a crontab or something) to recover all your unused space. - - $db->optimize(); # returns true on success - -This rebuilds the ENTIRE database into a new file, then moves it on top of -the original. The new file will have no unused space, thus it will take up as -little disk space as possible. Please note that this operation can take -a long time for large files, and you need enough disk space to temporarily hold -2 copies of your DB file. The temporary file is created in the same directory -as the original, named with a ".tmp" extension, and is deleted when the -operation completes. Oh, and if locking is enabled, the DB is automatically -locked for the entire duration of the copy. - -B Only call optimize() on the top-level node of the database, and -make sure there are no child references lying around. DBM::Deep keeps a reference -counter, and if it is greater than 1, optimize() will abort and return undef. - -=head2 AUTOVIVIFICATION - -Unfortunately, autovivification doesn't work with tied hashes. This appears to -be a bug in Perl's tie() system, as I encountered the very same -issue with his I module (see L), -and it is also mentioned in the BUGS section for the I module ). Basically, on a new db file, -this does not work: - - $db->{foo}->{bar} = "hello"; - -Since "foo" doesn't exist, you cannot add "bar" to it. You end up with "foo" -being an empty hash. Try this instead, which works fine: - - $db->{foo} = { bar => "hello" }; - -As of Perl 5.8.7, this bug still exists. I have walked very carefully through -the execution path, and Perl indeed passes an empty hash to the STORE() method. -Probably a bug in Perl. - -=head2 FILE CORRUPTION - -The current level of error handling in DBM::Deep is minimal. Files I checked -for a 32-bit signature when opened, but other corruption in files can cause -segmentation faults. DBM::Deep may try to seek() past the end of a file, or get -stuck in an infinite loop depending on the level of corruption. File write -operations are not checked for failure (for speed), so if you happen to run -out of disk space, DBM::Deep will probably fail in a bad way. These things will -be addressed in a later version of DBM::Deep. - -=head2 DB OVER NFS - -Beware of using DB files over NFS. DBM::Deep uses flock(), which works well on local -filesystems, but will NOT protect you from file corruption over NFS. I've heard -about setting up your NFS server with a locking daemon, then using lockf() to -lock your files, but your mileage may vary there as well. From what I -understand, there is no real way to do it. However, if you need access to the -underlying filehandle in DBM::Deep for using some other kind of locking scheme like -lockf(), see the L section above. - -=head2 COPYING OBJECTS - -Beware of copying tied objects in Perl. Very strange things can happen. -Instead, use DBM::Deep's C method which safely copies the object and -returns a new, blessed, tied hash or array to the same level in the DB. - - my $copy = $db->clone(); - -B: Since clone() here is cloning the object, not the database location, any -modifications to either $db or $copy will be visible in both. - -=head2 LARGE ARRAYS - -Beware of using C, C or C with large arrays. -These functions cause every element in the array to move, which can be murder -on DBM::Deep, as every element has to be fetched from disk, then stored again in -a different location. This will be addressed in the forthcoming version 1.00. - -=head2 WRITEONLY FILES - -If you pass in a filehandle to new(), you may have opened it in either a readonly or -writeonly mode. STORE will verify that the filehandle is writable. However, there -doesn't seem to be a good way to determine if a filehandle is readable. And, if the -filehandle isn't readable, it's not clear what will happen. So, don't do that. - -=head1 PERFORMANCE - -This section discusses DBM::Deep's speed and memory usage. - -=head2 SPEED - -Obviously, DBM::Deep isn't going to be as fast as some C-based DBMs, such as -the almighty I. But it makes up for it in features like true -multi-level hash/array support, and cross-platform FTPable files. Even so, -DBM::Deep is still pretty fast, and the speed stays fairly consistent, even -with huge databases. Here is some test data: - - Adding 1,000,000 keys to new DB file... - - At 100 keys, avg. speed is 2,703 keys/sec - At 200 keys, avg. speed is 2,642 keys/sec - At 300 keys, avg. speed is 2,598 keys/sec - At 400 keys, avg. speed is 2,578 keys/sec - At 500 keys, avg. speed is 2,722 keys/sec - At 600 keys, avg. speed is 2,628 keys/sec - At 700 keys, avg. speed is 2,700 keys/sec - At 800 keys, avg. speed is 2,607 keys/sec - At 900 keys, avg. speed is 2,190 keys/sec - At 1,000 keys, avg. speed is 2,570 keys/sec - At 2,000 keys, avg. speed is 2,417 keys/sec - At 3,000 keys, avg. speed is 1,982 keys/sec - At 4,000 keys, avg. speed is 1,568 keys/sec - At 5,000 keys, avg. speed is 1,533 keys/sec - At 6,000 keys, avg. speed is 1,787 keys/sec - At 7,000 keys, avg. speed is 1,977 keys/sec - At 8,000 keys, avg. speed is 2,028 keys/sec - At 9,000 keys, avg. speed is 2,077 keys/sec - At 10,000 keys, avg. speed is 2,031 keys/sec - At 20,000 keys, avg. speed is 1,970 keys/sec - At 30,000 keys, avg. speed is 2,050 keys/sec - At 40,000 keys, avg. speed is 2,073 keys/sec - At 50,000 keys, avg. speed is 1,973 keys/sec - At 60,000 keys, avg. speed is 1,914 keys/sec - At 70,000 keys, avg. speed is 2,091 keys/sec - At 80,000 keys, avg. speed is 2,103 keys/sec - At 90,000 keys, avg. speed is 1,886 keys/sec - At 100,000 keys, avg. speed is 1,970 keys/sec - At 200,000 keys, avg. speed is 2,053 keys/sec - At 300,000 keys, avg. speed is 1,697 keys/sec - At 400,000 keys, avg. speed is 1,838 keys/sec - At 500,000 keys, avg. speed is 1,941 keys/sec - At 600,000 keys, avg. speed is 1,930 keys/sec - At 700,000 keys, avg. speed is 1,735 keys/sec - At 800,000 keys, avg. speed is 1,795 keys/sec - At 900,000 keys, avg. speed is 1,221 keys/sec - At 1,000,000 keys, avg. speed is 1,077 keys/sec - -This test was performed on a PowerMac G4 1gHz running Mac OS X 10.3.2 & Perl -5.8.1, with an 80GB Ultra ATA/100 HD spinning at 7200RPM. The hash keys and -values were between 6 - 12 chars in length. The DB file ended up at 210MB. -Run time was 12 min 3 sec. - -=head2 MEMORY USAGE - -One of the great things about DBM::Deep is that it uses very little memory. -Even with huge databases (1,000,000+ keys) you will not see much increased -memory on your process. DBM::Deep relies solely on the filesystem for storing -and fetching data. Here is output from I before even opening a -database handle: - - PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND - 22831 root 11 0 2716 2716 1296 R 0.0 0.2 0:07 perl - -Basically the process is taking 2,716K of memory. And here is the same -process after storing and fetching 1,000,000 keys: - - PID USER PRI NI SIZE RSS SHARE STAT %CPU %MEM TIME COMMAND - 22831 root 14 0 2772 2772 1328 R 0.0 0.2 13:32 perl - -Notice the memory usage increased by only 56K. Test was performed on a 700mHz -x86 box running Linux RedHat 7.2 & Perl 5.6.1. - -=head1 DB FILE FORMAT - -In case you were interested in the underlying DB file format, it is documented -here in this section. You don't need to know this to use the module, it's just -included for reference. - -=head2 SIGNATURE - -DBM::Deep files always start with a 32-bit signature to identify the file type. -This is at offset 0. The signature is "DPDB" in network byte order. This is -checked for when the file is opened and an error will be thrown if it's not found. - -=head2 TAG - -The DBM::Deep file is in a I, meaning each section of the file -has a standard header containing the type of data, the length of data, and then -the data itself. The type is a single character (1 byte), the length is a -32-bit unsigned long in network byte order, and the data is, well, the data. -Here is how it unfolds: - -=head2 MASTER INDEX - -Immediately after the 32-bit file signature is the I record. -This is a standard tag header followed by 1024 bytes (in 32-bit mode) or 2048 -bytes (in 64-bit mode) of data. The type is I for hash or I for array, -depending on how the DBM::Deep object was constructed. - -The index works by looking at a I of the hash key (or array index -number). The first 8-bit char of the MD5 signature is the offset into the -index, multipled by 4 in 32-bit mode, or 8 in 64-bit mode. The value of the -index element is a file offset of the next tag for the key/element in question, -which is usually a I tag (see below). - -The next tag I be another index, depending on how many keys/elements -exist. See L below for details. - -=head2 BUCKET LIST - -A I is a collection of 16 MD5 hashes for keys/elements, plus -file offsets to where the actual data is stored. It starts with a standard -tag header, with type I, and a data size of 320 bytes in 32-bit mode, or -384 bytes in 64-bit mode. Each MD5 hash is stored in full (16 bytes), plus -the 32-bit or 64-bit file offset for the I containing the actual data. -When the list fills up, a I operation is performed (See -L below). - -=head2 BUCKET - -A I is a tag containing a key/value pair (in hash mode), or a -index/value pair (in array mode). It starts with a standard tag header with -type I for scalar data (string, binary, etc.), or it could be a nested -hash (type I) or array (type I). The value comes just after the tag -header. The size reported in the tag header is only for the value, but then, -just after the value is another size (32-bit unsigned long) and then the plain -key itself. Since the value is likely to be fetched more often than the plain -key, I figured it would be I faster to store the value first. - -If the type is I (hash) or I (array), the value is another I -record for the nested structure, where the process begins all over again. - -=head2 RE-INDEXING - -After a I grows to 16 records, its allocated space in the file is -exhausted. Then, when another key/element comes in, the list is converted to a -new index record. However, this index will look at the next char in the MD5 -hash, and arrange new Bucket List pointers accordingly. This process is called -I. Basically, a new index tag is created at the file EOF, and all -17 (16 + new one) keys/elements are removed from the old Bucket List and -inserted into the new index. Several new Bucket Lists are created in the -process, as a new MD5 char from the key is being examined (it is unlikely that -the keys will all share the same next char of their MD5s). - -Because of the way the I algorithm works, it is impossible to tell exactly -when the Bucket Lists will turn into indexes, but the first round tends to -happen right around 4,000 keys. You will see a I decrease in -performance here, but it picks back up pretty quick (see L above). Then -it takes B more keys to exhaust the next level of Bucket Lists. It's -right around 900,000 keys. This process can continue nearly indefinitely -- -right up until the point the I signatures start colliding with each other, -and this is B rare -- like winning the lottery 5 times in a row AND -getting struck by lightning while you are walking to cash in your tickets. -Theoretically, since I hashes are 128-bit values, you I have up to -340,282,366,921,000,000,000,000,000,000,000,000,000 keys/elements (I believe -this is 340 unodecillion, but don't quote me). - -=head2 STORING - -When a new key/element is stored, the key (or index number) is first run through -I to get a 128-bit signature (example, in hex: -b05783b0773d894396d475ced9d2f4f6). Then, the I record is checked -for the first char of the signature (in this case I). If it does not exist, -a new I is created for our key (and the next 15 future keys that -happen to also have I as their first MD5 char). The entire MD5 is written -to the I along with the offset of the new I record (EOF at -this point, unless we are replacing an existing I), where the actual -data will be stored. - -=head2 FETCHING - -Fetching an existing key/element involves getting a I of the key -(or index number), then walking along the indexes. If there are enough -keys/elements in this DB level, there might be nested indexes, each linked to -a particular char of the MD5. Finally, a I is pointed to, which -contains up to 16 full MD5 hashes. Each is checked for equality to the key in -question. If we found a match, the I tag is loaded, where the value and -plain key are stored. - -Fetching the plain key occurs when calling the I and I -methods. In this process the indexes are walked systematically, and each key -fetched in increasing MD5 order (which is why it appears random). Once the -I is found, the value is skipped and the plain key returned instead. -B Do not count on keys being fetched as if the MD5 hashes were -alphabetically sorted. This only happens on an index-level -- as soon as the -I are hit, the keys will come out in the order they went in -- -so it's pretty much undefined how the keys will come out -- just like Perl's -built-in hashes. - -=head1 CODE COVERAGE - -We use B to test the code coverage of our tests, below is the -B report on this module's test suite. - - ---------------------------- ------ ------ ------ ------ ------ ------ ------ - File stmt bran cond sub pod time total - ---------------------------- ------ ------ ------ ------ ------ ------ ------ - blib/lib/DBM/Deep.pm 95.2 83.8 70.0 98.2 100.0 58.0 91.0 - blib/lib/DBM/Deep/Array.pm 100.0 91.1 100.0 100.0 n/a 26.7 98.0 - blib/lib/DBM/Deep/Hash.pm 95.3 80.0 100.0 100.0 n/a 15.3 92.4 - Total 96.2 84.8 74.4 98.8 100.0 100.0 92.4 - ---------------------------- ------ ------ ------ ------ ------ ------ ------ - -=head1 MORE INFORMATION - -Check out the DBM::Deep Google Group at L -or send email to L. - -=head1 AUTHORS - -Joseph Huckaby, L - -Rob Kinyon, L - -Special thanks to Adam Sah and Rich Gaushell! You know why :-) - -=head1 SEE ALSO - -perltie(1), Tie::Hash(3), Digest::MD5(3), Fcntl(3), flock(2), lockf(3), nfs(5), -Digest::SHA256(3), Crypt::Blowfish(3), Compress::Zlib(3) - -=head1 LICENSE - -Copyright (c) 2002-2006 Joseph Huckaby. All Rights Reserved. -This is free software, you may use it and distribute it under the -same terms as Perl itself. - -=cut +1; +__END__