# modify it under the same terms as Perl itself.
##
+use 5.6.0;
+
use strict;
+use warnings;
use Fcntl qw( :DEFAULT :flock :seek );
use Digest::MD5 ();
use Scalar::Util ();
use DBM::Deep::Engine;
+use DBM::Deep::File;
use vars qw( $VERSION );
$VERSION = q(0.99_01);
return bless $self, $class;
}
+# This initializer is called from the various TIE* methods. new() calls tie(),
+# which allows for a single point of entry.
sub _init {
- ##
- # Setup $self and bless into this class.
- ##
my $class = shift;
my ($args) = @_;
+ $args->{fileobj} = DBM::Deep::File->new( $args )
+ unless exists $args->{fileobj};
+
+ # locking implicitly enables autoflush
+ if ($args->{locking}) { $args->{autoflush} = 1; }
+
# These are the defaults to be optionally overridden below
my $self = bless {
type => TYPE_HASH,
- engine => DBM::Deep::Engine->new,
+ engine => DBM::Deep::Engine->new( $args ),
base_offset => undef,
+ fileobj => undef,
}, $class;
+ # Grab the parameters we want to use
foreach my $param ( keys %$self ) {
next unless exists $args->{$param};
- $self->{$param} = delete $args->{$param}
+ $self->{$param} = $args->{$param};
}
- # locking implicitly enables autoflush
- if ($args->{locking}) { $args->{autoflush} = 1; }
-
- $self->{root} = exists $args->{root}
- ? $args->{root}
- : DBM::Deep::_::Root->new( $args );
-
$self->{engine}->setup_fh( $self );
return $self;
if (!defined($self->_fh)) { return; }
- if ($self->_root->{locking}) {
- if (!$self->_root->{locked}) {
+ if ($self->_fileobj->{locking}) {
+ if (!$self->_fileobj->{locked}) {
flock($self->_fh, $type);
# refresh end counter in case file has changed size
my @stats = stat($self->_fh);
- $self->_root->{end} = $stats[7];
+ $self->_fileobj->{end} = $stats[7];
# double-check file inode, in case another process
# has optimize()d our file while we were waiting.
- if ($stats[1] != $self->_root->{inode}) {
- $self->{engine}->close_fh( $self );
+ if ($stats[1] != $self->_fileobj->{inode}) {
+ $self->_fileobj->close;
+ $self->_fileobj->open;
$self->{engine}->setup_fh( $self );
flock($self->_fh, $type); # re-lock
# This may not be necessary after re-opening
- $self->_root->{end} = (stat($self->_fh))[7]; # re-end
+ $self->_fileobj->{end} = (stat($self->_fh))[7]; # re-end
}
}
- $self->_root->{locked}++;
+ $self->_fileobj->{locked}++;
return 1;
}
if (!defined($self->_fh)) { return; }
- if ($self->_root->{locking} && $self->_root->{locked} > 0) {
- $self->_root->{locked}--;
- if (!$self->_root->{locked}) { flock($self->_fh, LOCK_UN); }
+ if ($self->_fileobj->{locking} && $self->_fileobj->{locked} > 0) {
+ $self->_fileobj->{locked}--;
+ if (!$self->_fileobj->{locked}) { flock($self->_fh, LOCK_UN); }
return 1;
}
${$spot} = $value;
}
elsif ( eval { local $SIG{__DIE__}; $value->isa( 'DBM::Deep' ) } ) {
- my $type = $value->_type;
- ${$spot} = $type eq TYPE_HASH ? {} : [];
+ ${$spot} = $value->_repr;
$value->_copy_node( ${$spot} );
}
else {
}
sub _copy_node {
- ##
- # Copy single level of keys or elements to new DB handle.
- # Recurse for nested structures
- ##
- my $self = shift->_get_self;
- my ($db_temp) = @_;
-
- if ($self->_type eq TYPE_HASH) {
- my $key = $self->first_key();
- while ($key) {
- my $value = $self->get($key);
- $self->_copy_value( \$db_temp->{$key}, $value );
- $key = $self->next_key($key);
- }
- }
- else {
- my $length = $self->length();
- for (my $index = 0; $index < $length; $index++) {
- my $value = $self->get($index);
- $self->_copy_value( \$db_temp->[$index], $value );
- }
- }
+ die "Must be implemented in a child class\n";
+}
- return 1;
+sub _repr {
+ die "Must be implemented in a child class\n";
}
sub export {
##
my $self = shift->_get_self;
- my $temp;
- if ($self->_type eq TYPE_HASH) { $temp = {}; }
- elsif ($self->_type eq TYPE_ARRAY) { $temp = []; }
+ my $temp = $self->_repr;
$self->lock();
$self->_copy_node( $temp );
# struct is not a reference, so just import based on our type
if (!ref($struct)) {
- if ($self->_type eq TYPE_HASH) { $struct = {@_}; }
- elsif ($self->_type eq TYPE_ARRAY) { $struct = [@_]; }
+ $struct = $self->_repr( @_ );
}
- my $r = Scalar::Util::reftype($struct) || '';
- if ($r eq "HASH" && $self->_type eq TYPE_HASH) {
- foreach my $key (keys %$struct) { $self->put($key, $struct->{$key}); }
- }
- elsif ($r eq "ARRAY" && $self->_type eq TYPE_ARRAY) {
- $self->push( @$struct );
- }
- else {
- $self->_throw_error("Cannot import: type mismatch");
- }
-
- return 1;
+ return $self->_import( $struct );
}
sub optimize {
my $self = shift->_get_self;
#XXX Need to create a new test for this
-# if ($self->_root->{links} > 1) {
+# if ($self->_fileobj->{links} > 1) {
# $self->_throw_error("Cannot optimize: reference count is greater than 1");
# }
my $db_temp = DBM::Deep->new(
- file => $self->_root->{file} . '.tmp',
+ file => $self->_fileobj->{file} . '.tmp',
type => $self->_type
);
my $perms = $stats[2] & 07777;
my $uid = $stats[4];
my $gid = $stats[5];
- chown( $uid, $gid, $self->_root->{file} . '.tmp' );
- chmod( $perms, $self->_root->{file} . '.tmp' );
+ chown( $uid, $gid, $self->_fileobj->{file} . '.tmp' );
+ chmod( $perms, $self->_fileobj->{file} . '.tmp' );
# q.v. perlport for more information on this variable
if ( $^O eq 'MSWin32' || $^O eq 'cygwin' ) {
# with a soft copy.
##
$self->unlock();
- $self->{engine}->close_fh( $self );
+ $self->_fileobj->close;
}
- if (!rename $self->_root->{file} . '.tmp', $self->_root->{file}) {
- unlink $self->_root->{file} . '.tmp';
+ if (!rename $self->_fileobj->{file} . '.tmp', $self->_fileobj->{file}) {
+ unlink $self->_fileobj->{file} . '.tmp';
$self->unlock();
$self->_throw_error("Optimize failed: Cannot copy temp file over original: $!");
}
$self->unlock();
- $self->{engine}->close_fh( $self );
+ $self->_fileobj->close;
+ $self->_fileobj->open;
$self->{engine}->setup_fh( $self );
return 1;
return DBM::Deep->new(
type => $self->_type,
base_offset => $self->_base_offset,
- root => $self->_root
+ fileobj => $self->_fileobj,
);
}
my $func = shift;
if ( $is_legal_filter{$type} ) {
- $self->_root->{"filter_$type"} = $func;
+ $self->_fileobj->{"filter_$type"} = $func;
return 1;
}
}
}
+sub begin_work {
+ my $self = shift->_get_self;
+ $self->_fileobj->begin_transaction;
+ return 1;
+}
+
+sub rollback {
+ my $self = shift->_get_self;
+ $self->_fileobj->end_transaction;
+ return 1;
+}
+
+#sub commit {
+# my $self = shift->_get_self;
+#}
+
##
# Accessor methods
##
-sub _root {
+sub _fileobj {
##
# Get access to the root structure
##
my $self = $_[0]->_get_self;
- return $self->{root};
+ return $self->{fileobj};
}
sub _type {
# Get access to the raw fh
##
my $self = $_[0]->_get_self;
- return $self->_root->{fh};
+ return $self->_fileobj->{fh};
}
##
my $self = shift->_get_self;
my ($key, $value) = @_;
- unless ( _is_writable( $self->_fh ) ) {
+ if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
$self->_throw_error( 'Cannot write to a readonly filehandle' );
}
my $md5 = $self->{engine}{digest}->($key);
- my $tag = $self->{engine}->find_bucket_list( $self, $md5, { create => 1 } );
+ my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5, { create => 1 } );
# User may be storing a hash, in which case we do not want it run
# through the filtering system
- if ( !ref($value) && $self->_root->{filter_store_value} ) {
- $value = $self->_root->{filter_store_value}->( $value );
+ if ( !ref($value) && $self->_fileobj->{filter_store_value} ) {
+ $value = $self->_fileobj->{filter_store_value}->( $value );
}
##
# Add key/value to bucket list
##
- my $result = $self->{engine}->add_bucket( $self, $tag, $md5, $key, $value );
+ my $result = $self->{engine}->add_bucket( $tag, $md5, $key, $value );
$self->unlock();
##
$self->lock( LOCK_SH );
- my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
+ my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5 );
if (!$tag) {
$self->unlock();
return;
##
# Get value from bucket list
##
- my $result = $self->{engine}->get_bucket_value( $self, $tag, $md5 );
+ my $result = $self->{engine}->get_bucket_value( $tag, $md5 );
$self->unlock();
# Filters only apply to scalar values, so the ref check is making
# sure the fetched bucket is a scalar, not a child hash or array.
- return ($result && !ref($result) && $self->_root->{filter_fetch_value})
- ? $self->_root->{filter_fetch_value}->($result)
+ return ($result && !ref($result) && $self->_fileobj->{filter_fetch_value})
+ ? $self->_fileobj->{filter_fetch_value}->($result)
: $result;
}
my $self = $_[0]->_get_self;
my $key = $_[1];
- unless ( _is_writable( $self->_fh ) ) {
+ if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
$self->_throw_error( 'Cannot write to a readonly filehandle' );
}
my $md5 = $self->{engine}{digest}->($key);
- my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
+ my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5 );
if (!$tag) {
$self->unlock();
return;
##
# Delete bucket
##
- my $value = $self->{engine}->get_bucket_value($self, $tag, $md5 );
+ my $value = $self->{engine}->get_bucket_value( $tag, $md5 );
- if (defined $value && !ref($value) && $self->_root->{filter_fetch_value}) {
- $value = $self->_root->{filter_fetch_value}->($value);
+ if (defined $value && !ref($value) && $self->_fileobj->{filter_fetch_value}) {
+ $value = $self->_fileobj->{filter_fetch_value}->($value);
}
- my $result = $self->{engine}->delete_bucket( $self, $tag, $md5 );
+ my $result = $self->{engine}->delete_bucket( $tag, $md5 );
##
# If this object is an array and the key deleted was on the end of the stack,
##
$self->lock( LOCK_SH );
- my $tag = $self->{engine}->find_bucket_list( $self, $md5 );
+ my $tag = $self->{engine}->find_bucket_list( $self->_base_offset, $md5 );
if (!$tag) {
$self->unlock();
##
# Check if bucket exists and return 1 or ''
##
- my $result = $self->{engine}->bucket_exists( $self, $tag, $md5 ) || '';
+ my $result = $self->{engine}->bucket_exists( $tag, $md5 ) || '';
$self->unlock();
##
my $self = $_[0]->_get_self;
- unless ( _is_writable( $self->_fh ) ) {
+ if ( $^O ne 'MSWin32' && !_is_writable( $self->_fh ) ) {
$self->_throw_error( 'Cannot write to a readonly filehandle' );
}
my $fh = $self->_fh;
- seek($fh, $self->_base_offset + $self->_root->{file_offset}, SEEK_SET);
+ seek($fh, $self->_base_offset + $self->_fileobj->{file_offset}, SEEK_SET);
if (eof $fh) {
$self->unlock();
return;
}
- $self->{engine}->create_tag(
- $self, $self->_base_offset, $self->_type,
+#XXX This needs updating to use _release_space
+ $self->{engine}->write_tag(
+ $self->_base_offset, $self->_type,
chr(0)x$self->{engine}{index_size},
);
sub exists { (shift)->EXISTS( @_ ) }
sub clear { (shift)->CLEAR( @_ ) }
-package DBM::Deep::_::Root;
-
-sub new {
- my $class = shift;
- my ($args) = @_;
-
- my $self = bless {
- autobless => undef,
- autoflush => undef,
- end => 0,
- fh => undef,
- file => undef,
- file_offset => 0,
- locking => undef,
- locked => 0,
- filter_store_key => undef,
- filter_store_value => undef,
- filter_fetch_key => undef,
- filter_fetch_value => undef,
- %$args,
- }, $class;
-
- if ( $self->{fh} && !$self->{file_offset} ) {
- $self->{file_offset} = tell( $self->{fh} );
- }
-
- return $self;
-}
-
-sub DESTROY {
- my $self = shift;
- return unless $self;
-
- close $self->{fh} if $self->{fh};
-
- return;
-}
-
1;
__END__
Data going in and out.
-=item * set_digest() / set_pack() / set_filter()
-
-q.v. adjusting the interal parameters.
-
=back
=head2 HASHES
If you have a 64-bit system, and your Perl is compiled with both LARGEFILE
and 64-bit support, you I<may> be able to create databases larger than 2 GB.
DBM::Deep by default uses 32-bit file offset tags, but these can be changed
-by calling the static C<set_pack()> method before you do anything else.
+by specifying the 'pack_size' parameter when constructing the file.
- DBM::Deep::set_pack(8, 'Q');
+ DBM::Deep->new(
+ filename => $filename,
+ pack_size => 'large',
+ );
This tells DBM::Deep to pack all file offsets with 8-byte (64-bit) quad words
instead of 32-bit longs. After setting these values your DB files have a
theoretical maximum size of 16 XB (exabytes).
+You can also use C<pack_size =E<gt> 'small'> in order to use 16-bit file
+offsets.
+
B<Note:> Changing these values will B<NOT> work for existing database files.
-Only change this for new files, and make sure it stays set consistently
-throughout the file's life. If you do set these values, you can no longer
-access 32-bit DB files. You can, however, call C<set_pack(4, 'N')> to change
-back to 32-bit mode.
+Only change this for new files. Once the value has been set, it is stored in
+the file's header and cannot be changed for the life of the file. These
+parameters are per-file, meaning you can access 32-bit and 64-bit files, as
+you chose.
-B<Note:> I have not personally tested files > 2 GB -- all my systems have
-only a 32-bit Perl. However, I have received user reports that this does
-indeed work!
+B<Note:> We have not personally tested files larger than 2 GB -- all my
+systems have only a 32-bit Perl. However, I have received user reports that
+this does indeed work!
=head1 LOW-LEVEL ACCESS
This method can be called on the root level of the datbase, or any child
hashes or arrays. All levels share a I<root> structure, which contains things
like the filehandle, a reference counter, and all the options specified
-when you created the object. You can get access to this root structure by
-calling the C<root()> method.
+when you created the object. You can get access to this file object by
+calling the C<_fileobj()> method.
- my $root = $db->_root();
+ my $file_obj = $db->_fileobj();
This is useful for changing options after the object has already been created,
such as enabling/disabling locking. You can also store your own temporary user
DBM::Deep by default uses the I<Message Digest 5> (MD5) algorithm for hashing
keys. However you can override this, and use another algorithm (such as SHA-256)
or even write your own. But please note that DBM::Deep currently expects zero
-collisions, so your algorithm has to be I<perfect>, so to speak.
-Collision detection may be introduced in a later version.
+collisions, so your algorithm has to be I<perfect>, so to speak. Collision
+detection may be introduced in a later version.
-
-
-You can specify a custom digest algorithm by calling the static C<set_digest()>
-function, passing a reference to a subroutine, and the length of the algorithm's
-hashes (in bytes). This is a global static function, which affects ALL DBM::Deep
-objects. Here is a working example that uses a 256-bit hash from the
+You can specify a custom digest algorithm by passing it into the parameter
+list for new(), passing a reference to a subroutine as the 'digest' parameter,
+and the length of the algorithm's hashes (in bytes) as the 'hash_size'
+parameter. Here is a working example that uses a 256-bit hash from the
I<Digest::SHA256> module. Please see
-L<http://search.cpan.org/search?module=Digest::SHA256> for more.
+L<http://search.cpan.org/search?module=Digest::SHA256> for more information.
use DBM::Deep;
use Digest::SHA256;
my $context = Digest::SHA256::new(256);
- DBM::Deep::set_digest( \&my_digest, 32 );
-
- my $db = DBM::Deep->new( "foo-sha.db" );
+ my $db = DBM::Deep->new(
+ filename => "foo-sha.db",
+ digest => \&my_digest,
+ hash_size => 32,
+ );
$db->{key1} = "value1";
$db->{key2} = "value2";
}
B<Note:> Your returned digest strings must be B<EXACTLY> the number
-of bytes you specify in the C<set_digest()> function (in this case 32).
+of bytes you specify in the hash_size parameter (in this case 32).
+
+B<Note:> If you do choose to use a custom digest algorithm, you must set it
+every time you access this file. Otherwise, the default (MD5) will be used.
=head1 CIRCULAR REFERENCES
make sure there are no child references lying around. DBM::Deep keeps a reference
counter, and if it is greater than 1, optimize() will abort and return undef.
-=head2 AUTOVIVIFICATION
-
-Unfortunately, autovivification doesn't work with tied hashes. This appears to
-be a bug in Perl's tie() system, as I<Jakob Schmidt> encountered the very same
-issue with his I<DWH_FIle> module (see L<http://search.cpan.org/search?module=DWH_File>),
-and it is also mentioned in the BUGS section for the I<MLDBM> module <see
-L<http://search.cpan.org/search?module=MLDBM>). Basically, on a new db file,
-this does not work:
-
- $db->{foo}->{bar} = "hello";
-
-Since "foo" doesn't exist, you cannot add "bar" to it. You end up with "foo"
-being an empty hash. Try this instead, which works fine:
-
- $db->{foo} = { bar => "hello" };
-
-As of Perl 5.8.7, this bug still exists. I have walked very carefully through
-the execution path, and Perl indeed passes an empty hash to the STORE() method.
-Probably a bug in Perl.
-
=head2 REFERENCES
(The reasons given assume a high level of Perl understanding, specifically of