#XXX This isn't the best solution. Better would be to use Data::Walker,
#XXX but that's a lot more thinking than I want to do right now.
eval {
+ local $SIG{'__DIE__'};
$self->begin_work;
$self->_import( _clone_data( $struct ) );
$self->commit;
- }; if ( $@ ) {
+ }; if ( my $e = $@ ) {
$self->rollback;
- die $@;
+ die $e;
}
return 1;
);
}
+#XXX Migrate this to the engine, where it really belongs and go through some
+# API - stop poking in the innards of someone else..
{
my %is_legal_filter = map {
$_ => ~~1,
##
$self->lock( LOCK_EX );
+ #XXX Rewrite this dreck to do it in the engine as a tight loop vs.
+ # iterating over keys - such a WASTE - is this required for transactional
+ # clearning?! Surely that can be detected in the engine ...
if ( $self->_type eq TYPE_HASH ) {
my $key = $self->first_key;
while ( $key ) {
}
$self->STORESIZE( 0 );
}
-#XXX This needs updating to use _release_space
-# $self->_engine->write_tag(
-# $self->_base_offset, $self->_type,
-# chr(0)x$self->_engine->{index_size},
-# );
$self->unlock();
Transactions in DBM::Deep are done using the MVCC method, the same method used
by the InnoDB MySQL table type.
+=head1 PERFORMANCE
+
+Because DBM::Deep is a conncurrent datastore, every change is flushed to disk
+immediately and every read goes to disk. This means that DBM::Deep functions
+at the speed of disk (generally 10-20ms) vs. the speed of RAM (generally
+50-70ns), or at least 150-200x slower than the comparable in-memory
+datastructure in Perl.
+
+There are several techniques you can use to speed up how DBM::Deep functions.
+
+=over 4
+
+=item * Put it on a ramdisk
+
+The easiest and quickest mechanism to making DBM::Deep run faster is to create
+a ramdisk and locate the DBM::Deep file there. Doing this as an option may
+become a feature of DBM::Deep, assuming there is a good ramdisk wrapper on CPAN.
+
+=item * Work at the tightest level possible
+
+It is much faster to assign the level of your db that you are working with to
+an intermediate variable than to re-look it up every time. Thus
+
+ # BAD
+ while ( my ($k, $v) = each %{$db->{foo}{bar}{baz}} ) {
+ ...
+ }
+
+ # GOOD
+ my $x = $db->{foo}{bar}{baz};
+ while ( my ($k, $v) = each %$x ) {
+ ...
+ }
+
+=item * Make your file as tight as possible
+
+If you know that you are not going to use more than 65K in your database,
+consider using the C<pack_size =#<gt> 'small'> option. This will instruct
+DBM::Deep to use 16bit addresses, meaning that the seek times will be less.
+The same goes with the number of transactions. num_Txns defaults to 16. If you
+can set that to 1 or 2, that will reduce the file-size considerably, thus
+reducing seek times.
+
+=back
+
=head1 CAVEATS / ISSUES / BUGS
This section describes all the known issues with DBM::Deep. It you have found
---------------------------- ------ ------ ------ ------ ------ ------ ------
File stmt bran cond sub pod time total
---------------------------- ------ ------ ------ ------ ------ ------ ------
- blib/lib/DBM/Deep.pm 88.1 81.0 81.0 97.9 89.5 4.7 87.9
- blib/lib/DBM/Deep/Array.pm 99.5 90.0 100.0 100.0 100.0 5.8 97.6
- blib/lib/DBM/Deep/Engine.pm 95.6 84.6 78.0 99.1 0.0 58.8 89.3
- blib/lib/DBM/Deep/File.pm 92.6 80.0 45.5 100.0 0.0 28.8 82.6
- blib/lib/DBM/Deep/Hash.pm 98.5 83.3 100.0 100.0 100.0 2.0 96.3
- Total 94.7 84.4 77.5 99.1 32.1 100.0 89.9
+ blib/lib/DBM/Deep.pm 96.7 87.9 90.5 100.0 89.5 4.5 95.1
+ blib/lib/DBM/Deep/Array.pm 100.0 91.4 100.0 100.0 100.0 4.9 98.3
+ blib/lib/DBM/Deep/Engine.pm 95.6 85.1 78.0 99.1 0.0 57.4 89.4
+ blib/lib/DBM/Deep/File.pm 94.3 86.1 55.6 100.0 0.0 30.7 85.7
+ blib/lib/DBM/Deep/Hash.pm 100.0 100.0 100.0 100.0 100.0 2.4 100.0
+ Total 96.5 86.9 81.0 99.5 32.1 100.0 91.8
---------------------------- ------ ------ ------ ------ ------ ------ ------
+
=head1 MORE INFORMATION
Check out the DBM::Deep Google Group at L<http://groups.google.com/group/DBM-Deep>
my $self = shift;
my ($struct) = @_;
- eval {
- local $SIG{'__DIE__'};
- $self->push( @$struct );
- }; if ($@) {
- $self->_throw_error("Cannot import: type mismatch");
- }
+ $self->push( @$struct );
return 1;
}
$args->{byte_size} = 8;
}
else {
- die "Unknown pack_size value: '$args->{pack_size}'\n";
+ DBM::Deep->_throw_error( "Unknown pack_size value: '$args->{pack_size}'" );
}
}
# This will be a Reference sector
my $sector = $self->_load_sector( $obj->_base_offset )
- or die "How did get_classname fail (no sector for '$obj')?!\n";
+ or DBM::Deep->_throw_error( "How did get_classname fail (no sector for '$obj')?!" );
if ( $sector->staleness != $obj->_staleness ) {
return;
# This will be a Reference sector
my $sector = $self->_load_sector( $obj->_base_offset )
- or die "Cannot write to a deleted spot in DBM::Deep.\n";
+ or DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep." );
if ( $sector->staleness != $obj->_staleness ) {
- die "Cannot write to a deleted spot in DBM::Deep.\n";
+ DBM::Deep->_throw_error( "Cannot write to a deleted spot in DBM::Deep.n" );
}
# Create this after loading the reference sector in case something bad happens.
@{$self}{qw(byte_size max_buckets)} = @values;
my $header_var = 1 + 1 + 4 + 4 * $self->num_txns + 3 * $self->byte_size;
- unless ( $size eq $header_var ) {
+ unless ( $size == $header_var ) {
$self->storage->close;
DBM::Deep->_throw_error( "Unexpected size found ($size <-> $header_var)." );
}
return;
}
- die "'$offset': Don't know what to do with type '$type'\n";
+ DBM::Deep->_throw_error( "'$offset': Don't know what to do with type '$type'" );
}
sub _apply_digest {
sector => $sector,
});
}
- else {
- die "Why did $loc make a $sector?";
- }
+
+ DBM::Deep->_throw_error( "get_sector_iterator(): Why did $loc make a $sector?" );
}
sub get_next_key {
if ( defined $key ) {
return $key;
}
+ #XXX else { $iterator->set_to_end() } ?
# We hit the end of the bucketlist iterator, so redo
redo FIND_NEXT_KEY;
return if $self->at_end;
+ my $idx = $self->{curr_index}++;
+
my $data_loc = $self->{sector}->get_data_location_for({
allow_head => 1,
- idx => $self->{curr_index}++,
+ idx => $idx,
}) or return;
- my $key_sector = $self->{sector}->get_key_for( $self->{curr_index} - 1 );
-
- #XXX Is this check necessary now?
- return unless $key_sector;
-
- return $key_sector->data;
+ #XXX Do we want to add corruption checks here?
+ return $self->{sector}->get_key_for( $idx )->data;
}
package DBM::Deep::Engine::Sector;
$self->_init;
return $self;
}
+
#sub _init {}
-#sub clone { die "Must be implemented in the child class" }
+#sub clone { DBM::Deep->_throw_error( "Must be implemented in the child class" ); }
sub engine { $_[0]{engine} }
sub offset { $_[0]{offset} }
key_md5 => $args->{key_md5},
key => $args->{key},
create => 1,
- }) or die "How did write_data fail (no blist)?!\n";
+ }) or DBM::Deep->_throw_error( "How did write_data fail (no blist)?!" );
# Handle any transactional bookkeeping.
if ( $self->engine->trans_id ) {
else {
if ( @trans_ids ) {
foreach my $other_trans_id ( @trans_ids ) {
+ #XXX This doesn't seem to possible to ever happen . . .
next if $blist->get_data_location_for({ trans_id => $other_trans_id, allow_head => 0 });
$blist->mark_deleted({
trans_id => $other_trans_id,
# XXX What should happen if this fails?
my $blist = $self->get_bucket_list({
key_md5 => $args->{key_md5},
- }) or die "How did delete_key fail (no blist)?!\n";
+ }) or DBM::Deep->_throw_error( "How did delete_key fail (no blist)?!" );
# Save the location so that we can free the data
my $location = $blist->get_data_location_for({
return $blist;
}
- # Add searching here through the index layers, if any
my $sector = $engine->_load_sector( $blist_loc )
- or die "Cannot read sector at $blist_loc in get_bucket_list()";
+ or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
my $i = 0;
my $last_sector = undef;
while ( $sector->isa( 'DBM::Deep::Engine::Sector::Index' ) ) {
$last_sector = $sector;
if ( $blist_loc ) {
$sector = $engine->_load_sector( $blist_loc )
- or die "Cannot read sector at $blist_loc in get_bucket_list()";
+ or DBM::Deep->_throw_error( "Cannot read sector at $blist_loc in get_bucket_list()" );
}
else {
$sector = undef;
unless ( $sector ) {
return unless $args->{create};
- die "No last_sector when attempting to build a new entry"
+ DBM::Deep->_throw_error( "No last_sector when attempting to build a new entry" )
unless $last_sector;
my $blist = DBM::Deep::Engine::Sector::BucketList->new({
return $self->engine->_load_sector( $class_offset )->data;
}
+#XXX Add singleton handling here
sub data {
my $self = shift;
my $spot = $self->offset + $self->base_size + $idx * $self->bucket_size;
my $md5 = $e->storage->read_at( $spot, $e->hash_size );
+ #XXX If we're chopping, why would we ever have the blank_md5?
last if $md5 eq $e->blank_md5;
my $rest = $e->storage->read_at( undef, $self->bucket_size - $e->hash_size );
$self->engine->byte_size,
);
$location = unpack( $StP{$self->engine->byte_size}, $location );
- return unless $location;
+ DBM::Deep->_throw_error( "get_key_for: No location?" ) unless $location;
+
return $self->engine->_load_sector( $location );
}
my $e = $self->engine;
- die "get_entry: Out of range ($idx)"
+ DBM::Deep->_throw_error( "get_entry: Out of range ($idx)" )
if $idx < 0 || $idx >= $e->hash_chars;
return unpack(
my $e = $self->engine;
- die "set_entry: Out of range ($idx)"
+ DBM::Deep->_throw_error( "set_entry: Out of range ($idx)" )
if $idx < 0 || $idx >= $e->hash_chars;
$self->engine->storage->print_at(
file_offset => 0,
locking => undef,
locked => 0,
+#XXX Migrate this to the engine, where it really belongs.
filter_store_key => undef,
filter_store_value => undef,
filter_fetch_key => undef,
sub read_at {
my $self = shift;
my ($loc, $size) = @_;
- print join(":",map{$_||''}caller) . " - read_at(@{[$loc || 'undef']}, $size)\n" if $::DEBUG;
local ($/,$\);
my $self = shift;
my ($struct) = @_;
- eval {
- local $SIG{'__DIE__'};
- foreach my $key (keys %$struct) {
- $self->put($key, $struct->{$key});
- }
- }; if ($@) {
- $self->_throw_error("Cannot import: type mismatch");
+ foreach my $key (keys %$struct) {
+ $self->put($key, $struct->{$key});
}
return 1;
use Test::Deep;
use t::common qw( new_fh );
-plan tests => 5;
+plan tests => 9;
use_ok( 'DBM::Deep' );
my @control = sort map { "hello $_" } 0 .. $max_keys;
cmp_deeply( \@keys, \@control, "Correct keys are there" );
+ok( !exists $foo->{does_not_exist}, "EXISTS works on large hashes for non-existent keys" );
+is( $foo->{does_not_exist}, undef, "autovivification works on large hashes" );
+ok( exists $foo->{does_not_exist}, "EXISTS works on large hashes for newly-existent keys" );
+cmp_ok( scalar(keys %$foo), '==', $max_keys + 2, "Number of keys after autovivify is correct" );
+
$db->clear;
cmp_ok( scalar(keys %$db), '==', 0, "Number of keys after clear() is correct" );
is( $db->{key1}, 'value1', "key1's value is still there after optimize" );
is( $db->{a}{c}, 'value2', "key2's value is still there after optimize" );
-#print keys %{$db->{a}}, $/;
-
##
# now for the tricky one -- try to store a new key while file is being
# optimized and locked by another process. filehandle should be invalidated,
# first things first, get us about 1000 keys so the optimize() will take
# at least a few seconds on any machine, and re-open db with locking
##
- for (11..11) { $db->STORE( $_, $_ +1 ); }
+ for (1..1000) { $db->STORE( $_, $_ +1 ); }
undef $db;
##
exit( 0 );
}
-=pod
# parent fork
ok( defined($pid), "fork was successful" ); # make sure fork was successful
# see if it was stored successfully
is( $db->{parentfork}, "hello", "stored key while optimize took place" );
-# undef $db;
-# $db = DBM::Deep->new(
-# file => $filename,
-# autoflush => 1,
-# locking => 1
-# );
+ undef $db;
+ $db = DBM::Deep->new(
+ file => $filename,
+ autoflush => 1,
+ locking => 1
+ );
# now check some existing values from before
is( $db->{key1}, 'value1', "key1's value is still there after optimize" );
is( $db->{a}{c}, 'value2', "key2's value is still there after optimize" );
-=cut
}
# DBM::Deep Test
##
use strict;
-use Test::More tests => 17;
+use Test::More tests => 21;
+use Test::Deep;
use t::common qw( new_fh );
use_ok( 'DBM::Deep' );
##
# Try fetching keys as well as values
##
-my $first_key = $db->first_key();
-my $next_key = $db->next_key($first_key);
+cmp_bag( [ keys %$db ], [qw( key1 key2 )], "DB keys correct" );
-ok(
- (($first_key eq "key1") || ($first_key eq "key2")) &&
- (($next_key eq "key1") || ($next_key eq "key2"))
-);
+# Exists and delete tests
+ok( exists $db->{key1}, "Key1 exists" );
+ok( exists $db->{key2}, "Key2 exists" );
+
+is( delete $db->{key1}, 'value1', "Delete returns the right value" );
+
+ok( !exists $db->{key1}, "Key1 no longer exists" );
+ok( exists $db->{key2}, "Key2 exists" );
##
# Now clear all filters, and make sure all is unfiltered
ok( $db->set_filter( 'fetch_key', undef ), "Unset fetch_key filter" );
ok( $db->set_filter( 'fetch_value', undef ), "Unset fetch_value filter" );
-is($db->{MYFILTERkey1}, "MYFILTERvalue1");
-is($db->{MYFILTERkey2}, "MYFILTERvalue2");
+is( $db->{MYFILTERkey2}, "MYFILTERvalue2", "We get the right unfiltered value" );
sub my_filter_store_key { return 'MYFILTER' . $_[0]; }
sub my_filter_store_value { return 'MYFILTER' . $_[0]; }
# DBM::Deep Test
##
use strict;
-use Test::More tests => 9;
+use Test::More tests => 11;
use Test::Deep;
use t::common qw( new_fh );
}
};
-##
-# Import entire thing
-##
$db->import( $struct );
cmp_deeply(
{ foo => [ 2 .. 4 ] },
];
-##
-# Import entire thing
-##
$db->import( $struct );
cmp_deeply(
ok( $db->[-1], "\$db doesn't have the 'bar' value at the end, so \$struct is not tied" );
}
+# Failure case to verify that rollback occurs
+{
+ my ($fh, $filename) = new_fh();
+ my $db = DBM::Deep->new({
+ file => $filename,
+ autobless => 1,
+ });
+
+ $db->{foo} = 'bar';
+
+ my $struct = {
+ key1 => [
+ 2, sub {}, 3,
+ ],
+ };
+
+ eval {
+ $db->import( $struct );
+ };
+ like( $@, qr/Storage of references of type 'CODE' is not supported/, 'Error message correct' );
+
+ cmp_deeply(
+ $db,
+ noclass({
+ foo => 'bar',
+ }),
+ "Everything matches",
+ );
+}
+
__END__
Need to add tests for:
use strict;
-use Test::More tests => 36;
+use Test::More tests => 40;
use Test::Deep;
use t::common qw( new_fh );
my @keys = keys %$db;
cmp_ok( scalar(@keys), '==', 17, "Right number of keys returned" );
+
+ok( !exists $db->{does_not_exist}, "EXISTS works on large hashes for non-existent keys" );
+is( $db->{does_not_exist}, undef, "autovivification works on large hashes" );
+ok( exists $db->{does_not_exist}, "EXISTS works on large hashes for newly-existent keys" );
+cmp_ok( scalar(keys %$db), '==', 18, "Number of keys after autovivify is correct" );
+
use_ok( 'DBM::Deep' );
my ($fh, $filename) = new_fh();
-my $db1 = DBM::Deep->new(
+my $db = DBM::Deep->new(
file => $filename,
locking => 1,
autoflush => 1,
);
-my $x_outer = { a => 'b' };
-my $x_inner = { a => 'c' };
+my $outer = { a => 'b' };
+my $inner = { a => 'c' };
-$db1->{x} = $x_outer;
-is( $db1->{x}{a}, 'b', "BEFORE: We're looking at the right value from outer" );
+$db->{x} = $outer;
+is( $db->{x}{a}, 'b', "BEFORE: We're looking at the right value from outer" );
-$db1->begin_work;
+$db->begin_work;
- $db1->{x} = $x_inner;
- is( $db1->{x}{a}, 'c', "WITHIN: We're looking at the right value from inner" );
+ $db->{x} = $inner;
+ is( $db->{x}{a}, 'c', "WITHIN: We're looking at the right value from inner" );
TODO: {
local $TODO = "Transactions not done yet";
- is( $x_outer->{a}, 'c', "WITHIN: We're looking at the right value from outer" );
+ is( $outer->{a}, 'b', "WITHIN: We're looking at the right value from outer" );
}
-$db1->commit;
+$db->commit;
-is( $db1->{x}{a}, 'c', "AFTER: Commit means x_inner is still correct" );
+is( $db->{x}{a}, 'c', "AFTER: Commit means inner is still correct" );
TODO: {
local $TODO = "Transactions not done yet";
-is( $x_outer->{a}, 'c', "AFTER: outer made the move" );
-is( $x_inner->{a}, 'c', "AFTER: inner made the move" );
+is( $outer->{a}, undef, "AFTER: outer made the move" );
}
+is( $inner->{a}, 'c', "AFTER: inner made the move" );
$db->begin_work;
- $db->{foo} = $obj;
- $db->{foo}{bar} = 1;
+ $db->{foo} = $obj;
+ $db->{foo}{bar} = 1;
- cmp_ok( $db->{foo}{bar}, '==', 1, "The value is visible within the transaction" );
- cmp_ok( $obj->{bar}, '==', 1, "The value is visible within the object" );
+ cmp_ok( $db->{foo}{bar}, '==', 1, "The value is visible within the transaction" );
+ cmp_ok( $obj->{bar}, '==', 1, "The value is visible within the object" );
$db->rollback;
$db->begin_work;
- $db->{foo} = $obj;
- $db->{foo}{bar} = 1;
+ $db->{foo} = $obj;
+ $db->{foo}{bar} = 1;
- cmp_ok( $db->{foo}{bar}, '==', 1, "The value is visible within the transaction" );
- cmp_ok( $obj->{bar}, '==', 1, "The value is visible within the object" );
+ cmp_ok( $db->{foo}{bar}, '==', 1, "The value is visible within the transaction" );
+ cmp_ok( $obj->{bar}, '==', 1, "The value is visible within the object" );
$db->commit;
is( delete $x->{foo}, undef, "Deleting floober makes \$x empty (delete)" );
eval { $x->{foo} = 'bar'; };
-is( $@, "Cannot write to a deleted spot in DBM::Deep.\n", "Exception thrown when writing" );
+like( $@, qr/Cannot write to a deleted spot in DBM::Deep/, "Exception thrown when writing" );
cmp_ok( scalar( keys %$x ), '==', 0, "Keys returns nothing after deletion" );
is( delete $x->{foo}, undef, "Even after the space has been reused, \$x is still empty" );
eval { $x->{foo} = 'bar'; };
-is( $@, "Cannot write to a deleted spot in DBM::Deep.\n", "Exception thrown when writing" );
+like( $@, qr/Cannot write to a deleted spot in DBM::Deep/, "Exception thrown when writing" );
cmp_ok( scalar( keys %$x ), '==', 0, "Keys returns nothing after space reuse" );
--- /dev/null
+=head1 NAME
+
+Testing TODO
+
+=head1 PURPOSE
+
+This file is to detail the tests, in a general sense, that have yet to be
+written so that I don't forget them.
+
+=head1 MISSING TESTS
+
+=over 4
+
+=item * Readonly filehandles
+
+=over 4
+
+=item * Mutations on readonly filehandles
+
+This is to verify that the appropriate errors are thrown
+
+=item * Run an optimize on a readonly FH
+
+=back
+
+=item * _copy_value()
+
+For some reason, $c doesn't seem to be undefinable in _copy_value. Maybe this
+means that the bless()ing should occur iff C<!$c-E<gt>isa('DBM::Deep')>?
+
+=item * OO Array access with illegal keys
+
+There's a ton of tests that can be written here to verify the gatekeepers in
+the array methods.
+
+=item * Splice
+
+=over 4
+
+=item * Undefined initial offset
+
+=item * splicing in a group that's equal to the target
+
+=back
+
+=item * Passing in a fh without a file_offset
+
+=item * Do I ever use print_at() without passing in offset?
+
+=item * How should the inode check for locking happen?
+
+=item * Attempt to unlock an unlocked fh
+
+=item * medium and large pack_sizes
+
+Need to make sure I only run the large pack_size test on 64-bit Perls
+
+=item * max_buckets check
+
+=item * get_classname() on a deleted sector
+
+How should this be triggered?!
+
+=item * Open a corrupted file that has a header, but not initial reference
+
+=item * Max out the number of transactions
+
+=item * What happens when commit/rollback are called immediately after begin_work?
+
+=item * Delete something in the head that has its own value in a transaction
+
+=item * Run an import within a transaction
+
+=over 4
+
+=item * Should all assignments happen within a sub-transaction?
+
+=item * Does this mean that sub-transactions should just be done right now?
+
+It shouldn't be too hard to variablize which transaction is the base instead
+of hard-coding 0 . . .
+
+=back
+
+=item * Delete something within a transaction, then commit.
+
+Verify that the space is reusable by assigning more to the DB.
+
+=back
+
+=cut